__author__ = 'DarkWeb'

'''
nulled Forum Crawler (Selenium)
'''

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from PIL import Image
import base64
from io import BytesIO


import urllib.parse as urlparse
import os, re, time
from datetime import date
import subprocess
from bs4 import BeautifulSoup
from Forums.Initialization.prepare_parser import new_parse
from Forums.Nulled.parser import nulled_links_parser
from Forums.Utilities.utilities import cleanHTML

counter = 1
baseURL = 'https://www.nulled.to'


# Opens Tor Browser, crawls the website
def startCrawling():
    # opentor()
    forumName = getForumName()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     login(driver)
    #     crawlForum(driver)
    #     closetor(driver)

    new_parse(forumName, False)


# Opens Tor Browser
def opentor():
    global pid
    print("Connecting Tor...")
    path = open('../../path.txt').readline().strip()
    pro = subprocess.Popen(path)
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


# Login using premade account credentials and do login captcha manually
def login(driver):
    time.sleep(3)


# Returns the name of the website
def getForumName():
    name = 'Nulled'
    return name


# Return the link of the website
def getFixedURL():
    url = 'https://www.nulled.to'
    return url


# Closes Tor Browser
def closetor(driver):
    global pid
    # os.system("taskkill /pid " + str(pro.pid))
    os.system("taskkill /t /f /im tor.exe")
    print('Closing Tor...')
    driver.close()
    time.sleep(3)
    return


# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
    file = open('../../path.txt', 'r')
    lines = file.readlines()

    ff_binary = FirefoxBinary(lines[0].strip())

    ff_prof = FirefoxProfile(lines[1].strip())
    # ff_prof.set_preference("places.history.enabled", False)
    # ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
    # ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
    # ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
    # ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
    # ff_prof.set_preference("signon.rememberSignons", False)
    # ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    # ff_prof.set_preference("network.dns.disablePrefetch", True)
    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
    # ff_prof.set_preference("permissions.default.image", 3)
    # ff_prof.set_preference("browser.download.folderList", 2)
    # ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    # ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference('network.proxy.type', 1)
    ff_prof.set_preference("network.proxy.socks_version", 5)
    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
    ff_prof.set_preference('network.proxy.socks_port', 9150)
    ff_prof.set_preference('network.proxy.socks_remote_dns', True)
    ff_prof.set_preference("javascript.enabled", True)
    ff_prof.update_preferences()

    service = Service(lines[2].strip())

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    return driver


def getAccess():
    url = getFixedURL()
    driver = createFFDriver()

    try:
        driver.get(url)
        # time.sleep(3)
        return driver

    except:

        return 'down'


# Saves the crawled html page
def savePage(page, url):
    cleanPage = cleanHTML(page)
    filePath = getFullPathName(url)
    os.makedirs(os.path.dirname(filePath), exist_ok=True)
    open(filePath, 'wb').write(cleanPage.encode('utf-8'))
    return


# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        fullPath = r'..\Nulled\HTML_Pages\\' + str(
            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
            "%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
    else:
        fullPath = r'..\Nulled\HTML_Pages\\' + str(
            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
            "%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
    return fullPath


# Creates the file name from passed URL
def getNameFromURL(url):
    global counter
    name = ''.join(e for e in url if e.isalnum())
    if (name == ''):
        name = str(counter)
        counter = counter + 1
    return name


def getInterestedLinks():
    links = []

    # Cracking Tools
    links.append('https://www.nulled.to/forum/90-cracking-tools/')
    # # Cracking Tutorials
    # links.append('https://www.nulled.to/forum/98-cracking-tutorials/')
    # # Releases
    # links.append('https://www.nulled.to/forum/209-releases/')
    # # Newbi.Net Frameworkse
    # links.append('https://www.nulled.to/forum/51-net-framework/')
    # # html css js php
    # links.append('https://www.nulled.to/forum/54-html-css-js-php/')
    # # C C++
    # links.append('https://www.nulled.to/forum/52-cc/')
    # # other languages
    # links.append('https://www.nulled.to/forum/135-other-languages/')

    return links


def crawlForum(driver):
    print("Crawling the Nulled forum")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    i = 0
    count = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            while has_next_page:
                list = topicPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver.page_source, item)
                    driver.back()
                    break

                if count == 1:
                    count = 0
                    break

                try:
                    temp = driver.find_element(by=By.XPATH, value='/html/body/div[4]/div[3]/div/div[3]/div[4]')
                    temp = temp.find_element(by=By.CLASS_NAME, value='pagination')
                    link = temp.find_element(by=By.CLASS_NAME, value='next')
                    link = link.find_element(by=By.TAG_NAME, value='a').get_attribute('href')

                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
                    has_next_page = False

        except Exception as e:
            print(link, e.message)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling Nulled forum done sucessfully. Press ENTER to continue\n")


# Returns 'True' if the link is Topic link
def isDescriptionLink(url):
    if 'topic/' in url:
        return True
    return False


# Returns True if the link is a listingPage link
def isListingLink(url):
    if 'forum/' in url:
        return True
    return False


# calling the parser to define the links
def topicPages(html):
    soup = BeautifulSoup(html, "html.parser")
    #print(soup.find('div', id="container").find('div', id="content").find('table', {"class": "tborder clear"}).find('tbody').find('tr',{"class": "inline_row"}).find('strong').text)
    return nulled_links_parser(soup)


def crawler():
    startCrawling()
    # print("Crawling and Parsing BestCardingWorld .... DONE!")