from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.service import Service
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from PIL import Image

import codecs
import time
from datetime import date
import urllib.parse as urlparse
import os
from bs4 import BeautifulSoup
from MarketPlaces.DarkFox.parser import darkfox_links_parser

file = open('../../path.txt', 'r')
lines = file.readlines()

# torexe = os.popen(lines[0].strip())  # path for tor.exe
binary = FirefoxBinary(lines[0].strip())  # full path for firefox.exe
# options = Options()
profile = FirefoxProfile(lines[1].strip())  # full path for profile.default
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.socks', '127.0.0.1')
profile.set_preference('network.proxy.socks_port', 9150)
profile.set_preference("network.proxy.socks_remote_dns", True)
profile.update_preferences()
service = Service(lines[2].strip())  # full path for geckodriver.exe
driver = webdriver.Firefox(firefox_binary=binary, firefox_profile=profile,
                           service=service)

# Manual captcha solver
def captcha(driver):
    # wait for captcha page show up
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div/div/form/button[1]")))

    # save captcha to local
    driver.find_element(by=By.XPATH, value="/html/body/div/div/form/div[1]/div[1]").screenshot("captcha.png")

    # open method used to open different extension image file
    im = Image.open(r'C:\Users\CALSysLab\Documents\threatIntelligence-main\DarkWebMining_Working\MarketPlaces\DarkFox\captcha.png')

    # This method will show image in any image viewer
    im.show()

    # wait until input space show up
    inputBox = driver.find_element(by=By.XPATH, value="/html/body/div/div/form/div[1]/div[2]/input")

    # ask user input captha solution in terminal
    userIn = input("Enter solution: ")

    # send user solution into the input space
    inputBox.send_keys(userIn)

    # click the verify(submit) button
    driver.find_element(by=By.XPATH, value="/html/body/div/div/form/button[1]").click()

    # wait for listing page show up (This Xpath may need to change based on different seed url)
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, "/html/body/main/div/div/div[2]/div[1]/div[1]/form/div[1]/h1")))


# Saves the crawled html page
def savePage(page, url):
    filePath = getFullPathName(url)
    os.makedirs(os.path.dirname(filePath), exist_ok=True)
    open(filePath, 'wb').write(page)
    return


# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        fullPath = r'C:\Users\CALSysLab\Documents\threatIntelligence-main\DarkWebMining_Working\MarketPlaces\DarkFox\HTML_Pages\\' + str(
            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
            "%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
    else:
        fullPath = r'C:\Users\CALSysLab\Documents\threatIntelligence-main\DarkWebMining_Working\MarketPlaces\DarkFox\HTML_Pages\\' + str(
            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
            "%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
    return fullPath


# Creates the name of the file based on URL
def getNameFromURL(url):
    global counter
    name = ''.join(e for e in url if e.isalnum())
    if (name == ''):
        name = str(counter)
        counter = counter + 1
    return name


def getInterestedLinks():
    links = []

    # Guides and Tutorials
    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/30739153-1fcd-45cd-b919-072b439c6e06')
    # Digital Products
    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/0e384d5f-26ef-4561-b5a3-ff76a88ab781')
    # Software and Malware
    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/6b71210f-f1f9-4aa3-8f89-bd9ee28f7afc')
    # Services
    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/b9dc5846-5024-421e-92e6-09ba96a03280')
    # Miscellaneous
    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/fd1c989b-1a74-4dc0-92b0-67d8c1c487cb')
    # Hosting and Security
    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/5233fd6a-72e6-466d-b108-5cc61091cd14')

    return links


def isDescriptionLink(url):
    if 'product' in url:
        return True
    return False


def isListingLink(url):
    if 'category' in url:
        return True
    return False


def productPages(html):
    soup = BeautifulSoup(html, "html.parser")
    return darkfox_links_parser(soup)


def isSignOut(url):
    #absURL = urlparse.urljoin(url.base_url, url.url)
    if 'signout' in url.lower() or 'logout' in url.lower():
        return True

    return False


# dark fox seed url
baseurl = 'http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion'
driver.get(baseurl)
captcha(driver)

# visited = set()
# visited.add(br.geturl())
linksToCrawl = getInterestedLinks()
initialTime = time.time()

i = 0
while i < len(linksToCrawl):
    link = linksToCrawl[i]
    print('Crawling :', link)
    try:
        driver.get(link)
        html = driver.page_source.encode('utf-8', link)
        savePage(html, link)
        '''
        has_next_page = True
        while has_next_page:
            j = 0
            list = productPages(html)
            for item in list:
                if j == 1:
                    break
                itemURL = str(item)
                driver.get(itemURL)
                savePage(driver.page_source.encode('utf-8'), item)
                driver.back()
                j += 1

            try:
                link = driver.find_element(by=By.XPATH, value=
                    '/html/body/main/div/div[2]/div/div[2]/div/div/div/nav/a[2]').get_attribute('href')
                driver.get(link)
                html = driver.page_source.encode('utf-8', link)
                savePage(html, link)
            except NoSuchElementException:
                has_next_page = False
        '''
    except Exception as e:
        print(link, e.message)
    i += 1

# finalTime = time.time()
# print finalTime - initialTime

input("Crawling DarkFox marketplace done sucessfully. Press ENTER to continue\n")