|
|
@ -31,7 +31,6 @@ baseURL = 'http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion |
|
|
|
# Opens Tor Browser, crawls the website, then parses, then closes tor |
|
|
|
#acts like the main method for the crawler, another function at the end of this code calls this function later |
|
|
|
def startCrawling(): |
|
|
|
# opentor() |
|
|
|
mktName = getMKTName() |
|
|
|
driver = getAccess() |
|
|
|
|
|
|
@ -41,25 +40,11 @@ def startCrawling(): |
|
|
|
crawlForum(driver) |
|
|
|
except Exception as e: |
|
|
|
print(driver.current_url, e) |
|
|
|
closetor(driver) |
|
|
|
closeDriver(driver) |
|
|
|
|
|
|
|
new_parse(mktName, baseURL, True) |
|
|
|
|
|
|
|
|
|
|
|
# Opens Tor Browser |
|
|
|
#prompts for ENTER input to continue |
|
|
|
def opentor(): |
|
|
|
from MarketPlaces.Initialization.markets_mining import config |
|
|
|
|
|
|
|
global pid |
|
|
|
print("Connecting Tor...") |
|
|
|
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) |
|
|
|
pid = pro.pid |
|
|
|
time.sleep(7.5) |
|
|
|
input('Tor Connected. Press ENTER to continue\n') |
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
# Returns the name of the website |
|
|
|
#return: name of site in string type |
|
|
|
def getMKTName(): |
|
|
@ -76,7 +61,7 @@ def getFixedURL(): |
|
|
|
|
|
|
|
# Closes Tor Browser |
|
|
|
#@param: current selenium driver |
|
|
|
def closetor(driver): |
|
|
|
def closeDriver(driver): |
|
|
|
# global pid |
|
|
|
# os.system("taskkill /pid " + str(pro.pid)) |
|
|
|
# os.system("taskkill /t /f /im tor.exe") |
|
|
@ -103,7 +88,7 @@ def createFFDriver(): |
|
|
|
ff_prof.set_preference("network.cookie.lifetimePolicy", 2) |
|
|
|
ff_prof.set_preference("network.dns.disablePrefetch", True) |
|
|
|
ff_prof.set_preference("network.http.sendRefererHeader", 0) |
|
|
|
ff_prof.set_preference("permissions.default.image", 2) |
|
|
|
ff_prof.set_preference("permissions.default.image", 3) |
|
|
|
ff_prof.set_preference("browser.download.folderList", 2) |
|
|
|
ff_prof.set_preference("browser.download.manager.showWhenStarting", False) |
|
|
|
ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") |
|
|
@ -145,6 +130,7 @@ def login(driver): |
|
|
|
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( |
|
|
|
(By.XPATH, "/html/body/div[1]/div/div/div[2]/main/div/div/section[5]/div/div[1]/div"))) |
|
|
|
|
|
|
|
|
|
|
|
# Saves the crawled html page, makes the directory path for html pages if not made |
|
|
|
def savePage(driver, page, url): |
|
|
|
cleanPage = cleanHTML(driver, page) |
|
|
|