__author__ = 'cern'

'''
BlackPyramid Market Crawler (Selenium)
'''

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver import ActionChains
import selenium.webdriver.support.ui as uiClasses
from selenium.webdriver.common.keys import Keys
from PIL import Image

import urllib.parse as urlparse
import os, re, time
import subprocess
import configparser
from bs4 import BeautifulSoup
from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.BlackPyramid.parser import BlackPyramid_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML

import traceback

counter = 1
baseURL = 'http://blackpyoc3gbnrlvxqvvytd3kxqj7pd226i2gvfyhysj24ne2snkmnyd.onion/'


# Opens Tor Browser, crawls the website
def startCrawling():
    marketName = getMKTName()
    driver = getAccess()

    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)

    new_parse(marketName, baseURL, True)


# Login
def login(driver):
    # wait for login page
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, "//input[@name='username_login']")))

    # entering username and password into input boxes
    usernameBox = driver.find_element(by=By.XPATH, value="//input[@name='username_login']")
    # Username here
    usernameBox.send_keys('ChipotleSteakBurrito')
    passwordBox = driver.find_element(by=By.XPATH, value="//input[@name='password_login']")
    # Password here
    passwordBox.send_keys('BlackBeans')

    input("Press ENTER when CAPTCHA is completed and you closed the newsletter\n")

    # wait for listing page show up (This Xpath may need to change based on different seed url)
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, '//*[@id="form93b"]')))


# Returns the name of the website
def getMKTName():
    name = 'BlackPyramid'
    return name


# Return the link of the website
def getFixedURL():
    url = 'http://blackpyoc3gbnrlvxqvvytd3kxqj7pd226i2gvfyhysj24ne2snkmnyd.onion/login/?login=1'

    return url


# Closes Tor Browser
def closetor(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
    print('Closing Tor...')
    driver.close()
    time.sleep(3)
    return


# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
    from MarketPlaces.Initialization.markets_mining import config

    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))

    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
    ff_prof.set_preference("places.history.enabled", False)
    ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
    ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
    ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
    ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
    ff_prof.set_preference("signon.rememberSignons", False)
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    # ff_prof.set_preference("network.dns.disablePrefetch", True)
    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference('network.proxy.type', 1)
    ff_prof.set_preference("network.proxy.socks_version", 5)
    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
    ff_prof.set_preference('network.proxy.socks_port', 9150)
    ff_prof.set_preference('network.proxy.socks_remote_dns', True)
    ff_prof.set_preference("javascript.enabled", False)
    ff_prof.update_preferences()

    service = Service(config.get('TOR', 'geckodriver_path'))

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver


def getAccess():
    url = getFixedURL()
    driver = createFFDriver()
    try:
        driver.get(url)
        return driver
    except:
        driver.close()
        return 'down'


# Saves the crawled html page
def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
    filePath = getFullPathName(url)
    os.makedirs(os.path.dirname(filePath), exist_ok=True)
    open(filePath, 'wb').write(cleanPage.encode('utf-8'))
    return


# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE

    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
    else:
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
    return fullPath


# Creates the file name from passed URL
def getNameFromURL(url):
    global counter
    name = ''.join(e for e in url if e.isalnum())
    if name == '':
        name = str(counter)
        counter = counter + 1
    return name


def page_is_fully_loaded(driver):
    return driver.execute_script("return document.readyState") == "complete"


def goToPage(driver, page):
    # hover over digital -> hacking tools
    a = ActionChains(driver)

    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, "//li[@class='dig940']/div/a")))

    # hover
    digitalB = driver.find_element(By.XPATH, "//li[@class='dig940']/div/a")
    time.sleep(1)
    a.move_to_element(digitalB).perform()
    # print(digitalB)

    # delay for website to register hover
    time.sleep(5)

    # click
    xpath = "//input[@name='" + page + "']"
    link = driver.find_element(By.XPATH, xpath)
    time.sleep(1)
    a.move_to_element(link).click().perform()
    # print(link)

    # wait for website to load
    time.sleep(10)
    WebDriverWait(driver, 100).until(page_is_fully_loaded)


def getInterestedLinks():
    links = []

    # h11 -> Hacking Tools
    links.append('h11')
    # g3 -> Guides, Hacking
    links.append('g3')
    # se3 -> Services
    links.append('se11')
    # f6 -> Fraud
    links.append('f11')

    return links


def crawlForum(driver):

    print("Crawling the BlackPyramid market")

    pages = getInterestedLinks()

    i = 0
    for listing in pages:
        print('Crawling :', listing)
        try:
            driver.get(baseURL)
            goToPage(driver, listing)

            has_next_page = True
            count = 0
            currentPage = 1

            while has_next_page:

                html = driver.page_source
                savePage(driver, html, listing + "page" + str(currentPage))

                # get a list of urls for each listing
                list = productPages(html)

                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        # driver.refresh()
                        continue
                    savePage(driver, driver.page_source, item)
                    # can't use the back button in dark pyramid
                    # driver.back()

                #     # comment out
                #     break
                #
                # # comment out
                # if count == 1:
                #     break

                # go to next page of market
                try:
                    # Scroll to top of page to see navigation bar
                    driver.find_element(by=By.XPATH, value="//body").send_keys(Keys.CONTROL + Keys.HOME)

                    goToPage(driver, listing)
                    nav = driver.find_element(by=By.XPATH, value="//input[@name='next_page']")

                    if nav.is_enabled():
                        # select next page
                        pgnum = uiClasses.Select(driver.find_element(by=By.XPATH, value="//select[@name='pageination']"))
                        # print("pg options:", pgnum.options)
                        numberOfPages = len(pgnum.options)

                        if currentPage >= numberOfPages:
                            raise NoSuchElementException

                        pgnum.select_by_index(currentPage)
                        currentPage += 1

                        # click button
                        pgbutton = driver.find_element(by=By.XPATH, value="//input[@value='go to page']")
                        pgbutton.click()

                        # wait for website to load
                        time.sleep(10)
                        WebDriverWait(driver, 100).until(page_is_fully_loaded)
                    else:
                        raise NoSuchElementException
                    count += 1

                except NoSuchElementException:
                    has_next_page = False

        except Exception as e:
            print(listing, e)
        i += 1

    print("Crawling the BlackPyramid market done.")


# Returns 'True' if the link is Topic link
def isDescriptionLink(url):
    if 'product' in url:
        return True
    return False


# Returns True if the link is a listingPage link
def isListingLink(url):
    if 'category=' in url:
        return True
    return False


# calling the parser to define the links
def productPages(html):
    soup = BeautifulSoup(html, "html.parser")
    return BlackPyramid_links_parser(soup)


def crawler():
    startCrawling()
    # print("Crawling and Parsing BestCardingWorld .... DONE!")