__author__ = 'cern'

'''
BlackPyramid Market Crawler (Selenium)
'''

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver import ActionChains
import selenium.webdriver.support.ui as uiClasses
from PIL import Image

import urllib.parse as urlparse
import os, re, time
import subprocess
import configparser
from bs4 import BeautifulSoup
from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.BlackPyramid.parser import BlackPyramid_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML

import traceback

config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://blackpyoc3gbnrlvxqvvytd3kxqj7pd226i2gvfyhysj24ne2snkmnyd.onion/login/?login=1'


# Opens Tor Browser, crawls the website
def startCrawling():
    # Opening tor beforehand gives "Tor exited during startup error"
    # opentor()

    marketName = getMarketName()

    driver = getAccess()

    # Wait for website to load
    input("Press ENTER when website has loaded")

    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)

    new_parse(marketName, baseURL, False)


# Opens Tor Browser
def opentor():
    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


# Login
def login(driver):
    # entering username and password into input boxes
    usernameBox = driver.find_element(by=By.XPATH, value="//input[@name='username_login']")
    # Username here
    usernameBox.send_keys('ChipotleSteakBurrito')
    passwordBox = driver.find_element(by=By.XPATH, value="//input[@name='password_login']")
    # Password here
    passwordBox.send_keys('BlackBeans')

    input("Press ENTER when CAPTCHA is completed\n")

    # wait for listing page show up (This Xpath may need to change based on different seed url)
    #WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
    #    (By.XPATH, '/html/body/div[2]/div[3]/div[3]/div[1]/div[3]/nav/ul/li[10]/a')))


# Returns the name of the website
def getMarketName():
    name = 'BlackPyramid'
    return name


# Return the link of the website
def getFixedURL():
    url = 'http://blackpyoc3gbnrlvxqvvytd3kxqj7pd226i2gvfyhysj24ne2snkmnyd.onion/login/?login=1'

    return url


# Closes Tor Browser
def closetor(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
    print('Closing Tor...')
    driver.quit()
    time.sleep(3)
    return


# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))

    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
    ff_prof.set_preference("places.history.enabled", False)
    ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
    ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
    ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
    ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
    ff_prof.set_preference("signon.rememberSignons", False)
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference('network.proxy.type', 1)
    ff_prof.set_preference("network.proxy.socks_version", 5)
    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
    ff_prof.set_preference('network.proxy.socks_port', 9150)
    ff_prof.set_preference('network.proxy.socks_remote_dns', True)
    ff_prof.set_preference("javascript.enabled", False)
    ff_prof.update_preferences()

    service = Service(config.get('TOR', 'geckodriver_path'))

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    return driver


def getAccess():
    url = getFixedURL()
    driver = createFFDriver()
    input('Tor Connected. Press ENTER to continue\n')
    try:
        driver.get(url)
        return driver
    except:
        driver.close()
        return 'down'


# Saves the crawled html page
def savePage(page, url):
    cleanPage = cleanHTML(page)
    filePath = getFullPathName(url)
    os.makedirs(os.path.dirname(filePath), exist_ok=True)
    open(filePath, 'wb').write(cleanPage.encode('utf-8'))
    return


# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
    global counter
    from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        if (os.path.exists(r'..\BlackPyramid\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html')):
            fullPath = r'..\BlackPyramid\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + "(" + str(counter) + ")" + '.html'
        else:
            fullPath = r'..\BlackPyramid\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
    else:
        if (os.path.exists(r'..\BlackPyramid\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html')):
            fullPath = r'..\BlackPyramid\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + "(" + str(counter) + ")" + '.html'
        else:
            fullPath = r'..\BlackPyramid\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
    return fullPath


# Creates the file name from passed URL
def getNameFromURL(url):
    global counter
    name = ''.join(e for e in url if e.isalnum())
    if name == '':
        name = str(counter)
        counter = counter + 1
    return name

def goToPage(driver, page):
    # hover over digital -> hacking tools
    a = ActionChains(driver)

    # hover
    digitalB = driver.find_element(By.XPATH, "//li[@class='dig940']/div/a")
    time.sleep(1)
    a.move_to_element(digitalB).perform()
    print(digitalB)

    # delay for website to register hover
    time.sleep(10)

    # click
    xpath = "//input[@name='" + page + "']"
    link = driver.find_element(By.XPATH, xpath)
    time.sleep(1)
    a.move_to_element(link).click().perform()
    print(link)

    # wait for website to load
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, '/html/body/center/div[4]/div[1]/div[3]/article/div[1]/h1/a')))


def getInterestedLinks():
    # h11 -> Hacking Tools
    # g3 -> Guides, Hacking
    # se3 -> Services, Hacking
    # f6 -> Fraud software
    links = ['h11','g3','se3','f6']

    return links


def crawlForum(driver):
    print("Crawling the BlackPyramid market")

    #linksToCrawl = getInterestedLinks()
    #pages = ["Hacking Tools"]
    pages = getInterestedLinks()
    #visited = set(linksToCrawl)
    initialTime = time.time()

    i = 0
    count = 0

    for listing in pages:
        #link = linksToCrawl[i]
        print('Crawling :', listing)

        try:
            try:
                goToPage(driver, listing)
            except:
                print("Try block 1")
                driver.refresh()
            time.sleep(5)
            html = driver.page_source
            savePage(html, listing)

            has_next_page = True
            currentPage = 1
            numberOfPages = 1
            while has_next_page:
                # get a list of urls for each listing
                list = productPages(html)
                for item in list:

                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        print("Try block 2")
                        driver.refresh()
                    savePage(driver.page_source, item)
                    # can't use the back button in dark pyramid
                    # driver.back()

                    # comment out
                    # break

                # comment out
                # if count == 1:
                #     count = 0
                #     break

                # go to next page of market
                try:
                    goToPage(driver, listing)
                    nav = driver.find_element(by=By.XPATH, value="//input[@name='next_page']")

                    if not nav.is_enabled():
                        raise NoSuchElementException
                    try:
                        # select next page
                        pgnum = uiClasses.Select(driver.find_element(by=By.XPATH, value="//select[@name='pageination']"))
                        print("pg options:", pgnum.options)
                        pgnum.select_by_index(currentPage)
                        numberOfPages = len(pgnum.options)

                        # click button
                        pgbutton = driver.find_element(by=By.XPATH, value="//input[@value='go to page']")
                        pgbutton.click()
                    except Exception as e:
                        print(e)
                        raise NoSuchElementException
                    time.sleep(10)
                    html = driver.page_source
                    savePage(html, listing)
                    currentPage += 1
                    if currentPage > numberOfPages:
                        raise NoSuchElementException
                    count += 1

                except NoSuchElementException:
                    has_next_page = False

        except Exception as e:
            traceback.print_exc()
            print(listing, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling Dark Pyramid done successfully. Press ENTER to continue\n")


# Returns 'True' if the link is Topic link
def isDescriptionLink(url):
    if 'product' in url:
        return True
    return False


# Returns True if the link is a listingPage link
def isListingLink(url):
    if 'category=' in url:
        return True
    return False


# calling the parser to define the links
def productPages(html):
    soup = BeautifulSoup(html, "html.parser")
    return BlackPyramid_links_parser(soup)


def crawler():
    startCrawling()
    # print("Crawling and Parsing BestCardingWorld .... DONE!")

if __name__ == '__main__':
    startCrawling()