diff --git a/.idea/DW_Pipeline_Test.iml b/.idea/DW_Pipeline_Test.iml index 08a5719..ba22e8a 100644 --- a/.idea/DW_Pipeline_Test.iml +++ b/.idea/DW_Pipeline_Test.iml @@ -2,7 +2,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index dc9ea49..11f1ea0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/Forums/AbyssForum/crawler_selenium.py b/Forums/AbyssForum/crawler_selenium.py index 4823db2..f33b521 100644 --- a/Forums/AbyssForum/crawler_selenium.py +++ b/Forums/AbyssForum/crawler_selenium.py @@ -30,7 +30,6 @@ baseURL = 'http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() forumName = getForumName() driver = getAccess() @@ -40,24 +39,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(forumName, baseURL, True) -# Opens Tor Browser -def opentor(): - from Forums.Initialization.forums_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): # wait for listing page show up (This Xpath may need to change based on different seed url) @@ -78,7 +64,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") diff --git a/Forums/Altenens/crawler_selenium.py b/Forums/Altenens/crawler_selenium.py index e996150..8f1994f 100644 --- a/Forums/Altenens/crawler_selenium.py +++ b/Forums/Altenens/crawler_selenium.py @@ -30,7 +30,6 @@ baseURL = 'https://altenens.is/' # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() forumName = getForumName() driver = getAccess() @@ -40,24 +39,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(forumName, baseURL, True) -# Opens Tor Browser -def opentor(): - from Forums.Initialization.forums_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): #click login button @@ -93,7 +79,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") diff --git a/Forums/BestCardingWorld/crawler_selenium.py b/Forums/BestCardingWorld/crawler_selenium.py index 22f6077..487863b 100644 --- a/Forums/BestCardingWorld/crawler_selenium.py +++ b/Forums/BestCardingWorld/crawler_selenium.py @@ -27,7 +27,6 @@ baseURL = 'http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() forumName = getForumName() driver = getAccess() @@ -36,25 +35,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(forumName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from Forums.Initialization.forums_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getForumName(): @@ -71,7 +56,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -98,7 +83,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True)#might need to turn off ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 2) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/Forums/Cardingleaks/crawler_selenium.py b/Forums/Cardingleaks/crawler_selenium.py index 70200ff..caf4a9a 100644 --- a/Forums/Cardingleaks/crawler_selenium.py +++ b/Forums/Cardingleaks/crawler_selenium.py @@ -32,7 +32,6 @@ baseURL = 'https://leaks.ws/' # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() forumName = getForumName() driver = getAccess() @@ -42,24 +41,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(forumName, baseURL, True) -# Opens Tor Browser -def opentor(): - from Forums.Initialization.forums_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): #click login button @@ -101,7 +87,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") diff --git a/Forums/CryptBB/crawler_selenium.py b/Forums/CryptBB/crawler_selenium.py index a462a65..637078c 100644 --- a/Forums/CryptBB/crawler_selenium.py +++ b/Forums/CryptBB/crawler_selenium.py @@ -28,7 +28,6 @@ baseURL = 'http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() forumName = getForumName() driver = getAccess() @@ -38,24 +37,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(forumName, baseURL, True) -# Opens Tor Browser -def opentor(): - from Forums.Initialization.forums_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): #click login button @@ -119,7 +105,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") diff --git a/Forums/HiddenAnswers/crawler_selenium.py b/Forums/HiddenAnswers/crawler_selenium.py index 392c90f..a7f37ea 100644 --- a/Forums/HiddenAnswers/crawler_selenium.py +++ b/Forums/HiddenAnswers/crawler_selenium.py @@ -30,7 +30,6 @@ baseURL = 'http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() forumName = getForumName() driver: webdriver.Firefox = getAccess() @@ -40,24 +39,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(forumName, baseURL, True) -# Opens Tor Browser -def opentor(): - from Forums.Initialization.forums_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): # wait for listing page show up (This Xpath may need to change based on different seed url) @@ -78,7 +64,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") diff --git a/Forums/Initialization/forumsList.txt b/Forums/Initialization/forumsList.txt index efa9686..9cfeb56 100644 --- a/Forums/Initialization/forumsList.txt +++ b/Forums/Initialization/forumsList.txt @@ -1 +1,8 @@ -BestCardingWorld \ No newline at end of file +Altenens +BestCardingWorld +Cardingleaks +CryptBB +HiddenAnswers +Libre +OnniForums +Procrax \ No newline at end of file diff --git a/Forums/Libre/crawler_selenium.py b/Forums/Libre/crawler_selenium.py index 1033474..98b5517 100644 --- a/Forums/Libre/crawler_selenium.py +++ b/Forums/Libre/crawler_selenium.py @@ -28,7 +28,6 @@ baseURL = 'http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() forumName = getForumName() driver = getAccess() @@ -38,24 +37,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(forumName, baseURL, True) -# Opens Tor Browser -def opentor(): - from Forums.Initialization.forums_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): @@ -101,7 +87,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") diff --git a/Forums/OnniForums/crawler_selenium.py b/Forums/OnniForums/crawler_selenium.py index 0888d14..806f869 100644 --- a/Forums/OnniForums/crawler_selenium.py +++ b/Forums/OnniForums/crawler_selenium.py @@ -31,7 +31,6 @@ baseURL = 'http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() forumName = getForumName() driver = getAccess() @@ -41,24 +40,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(forum=forumName, url=baseURL, createLog=True) -# Opens Tor Browser -def opentor(): - from Forums.Initialization.forums_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): #click login button @@ -96,7 +82,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -141,7 +127,6 @@ def createFFDriver(): driver.maximize_window() - return driver diff --git a/Forums/Procrax/crawler_selenium.py b/Forums/Procrax/crawler_selenium.py index 2a8be96..7115d6c 100644 --- a/Forums/Procrax/crawler_selenium.py +++ b/Forums/Procrax/crawler_selenium.py @@ -32,7 +32,6 @@ FORUM_NAME = 'Procrax' # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() driver = getAccess() if driver != 'down': @@ -41,7 +40,7 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse( forum=FORUM_NAME, @@ -50,19 +49,6 @@ def startCrawling(): ) -# Opens Tor Browser -def opentor(): - from Forums.Initialization.forums_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): WebDriverWait(driver, 50).until(EC.visibility_of_element_located( @@ -97,7 +83,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") diff --git a/MarketPlaces/AnonymousMarketplace/crawler_selenium.py b/MarketPlaces/AnonymousMarketplace/crawler_selenium.py index f7f9f17..3a26b5e 100644 --- a/MarketPlaces/AnonymousMarketplace/crawler_selenium.py +++ b/MarketPlaces/AnonymousMarketplace/crawler_selenium.py @@ -32,7 +32,6 @@ baseURL = 'http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -42,25 +41,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -77,7 +62,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -104,7 +89,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 1) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") @@ -146,6 +131,7 @@ def login(driver): WebDriverWait(driver, 100).until(EC.visibility_of_element_located( (By.ID, "woocommerce_product_categories-2"))) + # Saves the crawled html page, makes the directory path for html pages if not made def savePage(driver, page, url): cleanPage = cleanHTML(driver, page) diff --git a/MarketPlaces/Apocalypse/crawler_selenium.py b/MarketPlaces/Apocalypse/crawler_selenium.py index 4885c19..7a684df 100644 --- a/MarketPlaces/Apocalypse/crawler_selenium.py +++ b/MarketPlaces/Apocalypse/crawler_selenium.py @@ -32,7 +32,6 @@ baseURL = 'http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -47,20 +46,6 @@ def startCrawling(): new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -104,7 +89,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 1)## + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") @@ -162,6 +147,7 @@ def login(driver): WebDriverWait(driver, 100).until(EC.visibility_of_element_located( (By.XPATH, "/html/body/div[1]/div[2]/div[1]/div[1]/a[13]"))) + # Saves the crawled html page, makes the directory path for html pages if not made def savePage(driver, page, url): cleanPage = cleanHTML(driver, page) diff --git a/MarketPlaces/BlackPyramid/crawler_selenium.py b/MarketPlaces/BlackPyramid/crawler_selenium.py index ef7d7e0..b257c40 100644 --- a/MarketPlaces/BlackPyramid/crawler_selenium.py +++ b/MarketPlaces/BlackPyramid/crawler_selenium.py @@ -33,7 +33,6 @@ baseURL = 'http://blackpyoc3gbnrlvxqvvytd3kxqj7pd226i2gvfyhysj24ne2snkmnyd.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -43,25 +42,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -78,7 +63,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -105,7 +90,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 2) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/MarketPlaces/CityMarket/crawler_selenium.py b/MarketPlaces/CityMarket/crawler_selenium.py index 4819f2a..8dc0e4f 100644 --- a/MarketPlaces/CityMarket/crawler_selenium.py +++ b/MarketPlaces/CityMarket/crawler_selenium.py @@ -33,7 +33,6 @@ baseURL = 'http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -43,25 +42,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -78,7 +63,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -105,7 +90,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 1)## + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/MarketPlaces/CypherMarketplace/crawler_selenium.py b/MarketPlaces/CypherMarketplace/crawler_selenium.py index 2219af5..6eff758 100644 --- a/MarketPlaces/CypherMarketplace/crawler_selenium.py +++ b/MarketPlaces/CypherMarketplace/crawler_selenium.py @@ -32,7 +32,6 @@ baseURL = 'http://6c5qa2ke2esh6ake6u6yoxjungz2czbbl7hqxl75v5k37frtzhxuk7ad.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -42,25 +41,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -77,7 +62,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -104,7 +89,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 2) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/MarketPlaces/DarkBazar/crawler_selenium.py b/MarketPlaces/DarkBazar/crawler_selenium.py index 1f80aec..4a8f4e5 100644 --- a/MarketPlaces/DarkBazar/crawler_selenium.py +++ b/MarketPlaces/DarkBazar/crawler_selenium.py @@ -30,7 +30,6 @@ baseURL = 'http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -40,24 +39,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website def getMKTName(): name = 'DarkBazar' @@ -71,7 +57,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -98,7 +84,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) # ff_prof.set_preference("network.dns.disablePrefetch", True) # ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 1) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/MarketPlaces/DarkFox/crawler_selenium.py b/MarketPlaces/DarkFox/crawler_selenium.py index 8e1ca7b..49a4462 100644 --- a/MarketPlaces/DarkFox/crawler_selenium.py +++ b/MarketPlaces/DarkFox/crawler_selenium.py @@ -30,7 +30,6 @@ baseURL = 'http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -40,25 +39,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -81,7 +66,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -108,10 +93,10 @@ def createFFDriver(): # ff_prof.set_preference("network.cookie.lifetimePolicy", 2) # ff_prof.set_preference("network.dns.disablePrefetch", True) # ff_prof.set_preference("network.http.sendRefererHeader", 0) - # ff_prof.set_preference("permissions.default.image", 2) - # ff_prof.set_preference("browser.download.folderList", 2) - # ff_prof.set_preference("browser.download.manager.showWhenStarting", False) - # ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") + ff_prof.set_preference("permissions.default.image", 3) + ff_prof.set_preference("browser.download.folderList", 2) + ff_prof.set_preference("browser.download.manager.showWhenStarting", False) + ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") ff_prof.set_preference('network.proxy.type', 1) ff_prof.set_preference("network.proxy.socks_version", 5) ff_prof.set_preference('network.proxy.socks', '127.0.0.1') diff --git a/MarketPlaces/DarkMatter/crawler_selenium.py b/MarketPlaces/DarkMatter/crawler_selenium.py index 67183e5..a390abf 100644 --- a/MarketPlaces/DarkMatter/crawler_selenium.py +++ b/MarketPlaces/DarkMatter/crawler_selenium.py @@ -32,7 +32,6 @@ baseURL = 'http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -42,25 +41,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -77,7 +62,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -94,7 +79,6 @@ def createFFDriver(): ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) - ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof.set_preference("places.history.enabled", False) ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True) @@ -105,7 +89,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) #ff_prof.set_preference("network.dns.disablePrefetch", True)#connection issue #ff_prof.set_preference("network.http.sendRefererHeader", 0)#connection issue - ff_prof.set_preference("permissions.default.image", 1) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/MarketPlaces/DarkTor/crawler_selenium.py b/MarketPlaces/DarkTor/crawler_selenium.py index 24c2990..eab640a 100644 --- a/MarketPlaces/DarkTor/crawler_selenium.py +++ b/MarketPlaces/DarkTor/crawler_selenium.py @@ -31,7 +31,6 @@ baseURL = 'http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -41,25 +40,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -76,7 +61,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -103,7 +88,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 2) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") @@ -145,6 +130,7 @@ def login(driver): WebDriverWait(driver, 100).until(EC.visibility_of_element_located( (By.XPATH, "/html/body/div[1]/div/div/div[2]/main/div/div/section[5]/div/div[1]/div"))) + # Saves the crawled html page, makes the directory path for html pages if not made def savePage(driver, page, url): cleanPage = cleanHTML(driver, page) diff --git a/MarketPlaces/DigitalThriftShop/crawler_selenium.py b/MarketPlaces/DigitalThriftShop/crawler_selenium.py index 39d9f5d..3162a74 100644 --- a/MarketPlaces/DigitalThriftShop/crawler_selenium.py +++ b/MarketPlaces/DigitalThriftShop/crawler_selenium.py @@ -32,7 +32,6 @@ baseURL = 'http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -42,24 +41,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -76,7 +62,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") diff --git a/MarketPlaces/HiddenMarket/crawler_selenium.py b/MarketPlaces/HiddenMarket/crawler_selenium.py index b8cc323..fb466fa 100644 --- a/MarketPlaces/HiddenMarket/crawler_selenium.py +++ b/MarketPlaces/HiddenMarket/crawler_selenium.py @@ -29,7 +29,6 @@ baseURL = 'http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() marketName = getMKTName() driver = getAccess() @@ -39,24 +38,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(marketName, baseURL, True) -# Opens Tor Browser -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): # wait for login page @@ -118,7 +104,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -145,10 +131,10 @@ def createFFDriver(): # ff_prof.set_preference("network.cookie.lifetimePolicy", 2) # ff_prof.set_preference("network.dns.disablePrefetch", True) # ff_prof.set_preference("network.http.sendRefererHeader", 0) - # ff_prof.set_preference("permissions.default.image", 3) - # ff_prof.set_preference("browser.download.folderList", 2) - # ff_prof.set_preference("browser.download.manager.showWhenStarting", False) - # ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") + ff_prof.set_preference("permissions.default.image", 3) + ff_prof.set_preference("browser.download.folderList", 2) + ff_prof.set_preference("browser.download.manager.showWhenStarting", False) + ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") ff_prof.set_preference('network.proxy.type', 1) ff_prof.set_preference("network.proxy.socks_version", 5) ff_prof.set_preference('network.proxy.socks', '127.0.0.1') diff --git a/MarketPlaces/LionMarketplace/crawler_selenium.py b/MarketPlaces/LionMarketplace/crawler_selenium.py index ab8e41d..d0bac37 100644 --- a/MarketPlaces/LionMarketplace/crawler_selenium.py +++ b/MarketPlaces/LionMarketplace/crawler_selenium.py @@ -31,7 +31,6 @@ baseURL = 'http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -41,25 +40,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -76,7 +61,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -103,7 +88,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 1) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/MarketPlaces/M00nkeyMarket/crawler_selenium.py b/MarketPlaces/M00nkeyMarket/crawler_selenium.py index 690913e..d92650a 100644 --- a/MarketPlaces/M00nkeyMarket/crawler_selenium.py +++ b/MarketPlaces/M00nkeyMarket/crawler_selenium.py @@ -33,7 +33,6 @@ MARKET_NAME = 'M00nkeyMarket' # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() driver = getAccess() if driver != 'down': @@ -42,25 +41,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(MARKET_NAME, BASE_URL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type # def getMKTName(): @@ -77,7 +62,7 @@ def opentor(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -104,7 +89,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 1) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/MarketPlaces/MetaVerseMarket/crawler_selenium.py b/MarketPlaces/MetaVerseMarket/crawler_selenium.py index e624d51..49760fc 100644 --- a/MarketPlaces/MetaVerseMarket/crawler_selenium.py +++ b/MarketPlaces/MetaVerseMarket/crawler_selenium.py @@ -33,7 +33,6 @@ baseURL = 'http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -43,25 +42,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -78,7 +63,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -105,7 +90,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 1)## + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/MarketPlaces/MikesGrandStore/crawler_selenium.py b/MarketPlaces/MikesGrandStore/crawler_selenium.py index cd45464..2bb9e1d 100644 --- a/MarketPlaces/MikesGrandStore/crawler_selenium.py +++ b/MarketPlaces/MikesGrandStore/crawler_selenium.py @@ -31,7 +31,6 @@ baseURL = 'http://4yx2akutmkhwfgzlpdxiah7cknurw6vlddlq24fxa3r3ebophwgpvhyd.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -41,25 +40,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -76,7 +61,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -103,7 +88,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 1) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") @@ -145,6 +130,7 @@ def login(driver): WebDriverWait(driver, 100).until(EC.visibility_of_element_located( (By.XPATH, "/html/body/div[1]/header/div/div[3]/div/div/ul/li[6]/a"))) + # Saves the crawled html page, makes the directory path for html pages if not made def savePage(driver, page, url): cleanPage = cleanHTML(driver, page) diff --git a/MarketPlaces/Nexus/crawler_selenium.py b/MarketPlaces/Nexus/crawler_selenium.py index 1962d79..d7c84c2 100644 --- a/MarketPlaces/Nexus/crawler_selenium.py +++ b/MarketPlaces/Nexus/crawler_selenium.py @@ -31,7 +31,6 @@ baseURL = 'http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -40,22 +39,10 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return # Returns the name of the website #return: name of site in string type @@ -73,7 +60,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -100,7 +87,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 2) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") @@ -133,6 +120,7 @@ def getAccess(): driver.close() return 'down' + def savePage(driver, page, url): cleanPage = cleanHTML(driver, page) filePath = getFullPathName(url) diff --git a/MarketPlaces/PabloEscobarMarket/crawler_selenium.py b/MarketPlaces/PabloEscobarMarket/crawler_selenium.py index cc9b890..8dc783c 100644 --- a/MarketPlaces/PabloEscobarMarket/crawler_selenium.py +++ b/MarketPlaces/PabloEscobarMarket/crawler_selenium.py @@ -28,7 +28,6 @@ baseURL = 'http://niejmptjzwhlfywruoab4pbuxg7kp2mtcr4c6mgpeykju5matewg36yd.onion # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -38,24 +37,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): input("Press ENTER when CAPTCHA is complete and login page has loaded\n") @@ -89,7 +75,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -132,6 +118,8 @@ def createFFDriver(): driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service) + driver.maximize_window() + return driver @@ -181,8 +169,8 @@ def getNameFromURL(url): def getInterestedLinks(): links = [] - # hire hacker - links.append('http://niejmptjzwhlfywruoab4pbuxg7kp2mtcr4c6mgpeykju5matewg36yd.onion/?sub_id=36') + # # hire hacker + # links.append('http://niejmptjzwhlfywruoab4pbuxg7kp2mtcr4c6mgpeykju5matewg36yd.onion/?sub_id=36') # hacker links.append('http://niejmptjzwhlfywruoab4pbuxg7kp2mtcr4c6mgpeykju5matewg36yd.onion/?sub_id=34') @@ -221,7 +209,7 @@ def crawlForum(driver): driver.back() # comment out - break + # break # comment out if count == 1: diff --git a/MarketPlaces/RobinhoodMarket/crawler_selenium.py b/MarketPlaces/RobinhoodMarket/crawler_selenium.py index 4ce1cfe..06d8bc9 100644 --- a/MarketPlaces/RobinhoodMarket/crawler_selenium.py +++ b/MarketPlaces/RobinhoodMarket/crawler_selenium.py @@ -29,9 +29,6 @@ baseURL = 'http://ilr3qzubfnx33vbhal7l5coo4ftqlkv2tboph4ujog5crz6m5ua2b2ad.onion # Opens Tor Browser, crawls the website def startCrawling(): - # Opening tor beforehand gives "Tor exited during startup error" - # opentor() - marketName = getMKTName() driver = getAccess() @@ -45,24 +42,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(marketName, baseURL, True) -# Opens Tor Browser -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login is not needed in Robinhood def login(driver): pass @@ -82,7 +66,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") diff --git a/MarketPlaces/ThiefWorld/crawler_selenium.py b/MarketPlaces/ThiefWorld/crawler_selenium.py index e532006..16f60b0 100644 --- a/MarketPlaces/ThiefWorld/crawler_selenium.py +++ b/MarketPlaces/ThiefWorld/crawler_selenium.py @@ -32,7 +32,6 @@ baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -42,25 +41,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -77,7 +62,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -104,7 +89,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 1) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/MarketPlaces/Tor2door/crawler_selenium.py b/MarketPlaces/Tor2door/crawler_selenium.py index ec2e37f..a2df655 100644 --- a/MarketPlaces/Tor2door/crawler_selenium.py +++ b/MarketPlaces/Tor2door/crawler_selenium.py @@ -29,7 +29,6 @@ baseURL = 'http://yzrrne3pveltulbavydr2kiashvlnysdwclwmklo6cyjuqpxi7ku4xqd.onion # Opens Tor Browser, crawls the website def startCrawling(): - # opentor() marketName = getMKTName() driver = getAccess() @@ -39,24 +38,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(marketName, baseURL, True) -# Opens Tor Browser -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Login using premade account credentials and do login captcha manually def login(driver): #wait for login page @@ -118,7 +104,7 @@ def getFixedURL(): # Closes Tor Browser -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") diff --git a/MarketPlaces/TorBay/crawler_selenium.py b/MarketPlaces/TorBay/crawler_selenium.py index 4c8de5b..b830acb 100644 --- a/MarketPlaces/TorBay/crawler_selenium.py +++ b/MarketPlaces/TorBay/crawler_selenium.py @@ -32,7 +32,6 @@ baseURL = 'http://torbay3253zck4ym5cbowwvrbfjjzruzthrx3np5y6owvifrnhy5ybid.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -42,25 +41,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -77,7 +62,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -124,6 +109,7 @@ def createFFDriver(): return driver + #the driver 'gets' the url, attempting to get on the site, if it can't access return 'down' #return: return the selenium driver or string 'down' def getAccess(): diff --git a/MarketPlaces/TorMarket/crawler_selenium.py b/MarketPlaces/TorMarket/crawler_selenium.py index e1612ea..de75f89 100644 --- a/MarketPlaces/TorMarket/crawler_selenium.py +++ b/MarketPlaces/TorMarket/crawler_selenium.py @@ -31,7 +31,6 @@ baseURL = 'http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -40,25 +39,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -75,7 +60,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -102,7 +87,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) # ff_prof.set_preference("network.dns.disablePrefetch", True) # ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 1) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") diff --git a/MarketPlaces/ViceCity/crawler_selenium.py b/MarketPlaces/ViceCity/crawler_selenium.py index 05250e9..9c0bb53 100644 --- a/MarketPlaces/ViceCity/crawler_selenium.py +++ b/MarketPlaces/ViceCity/crawler_selenium.py @@ -32,7 +32,6 @@ baseURL = 'http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - # opentor() mktName = getMKTName() driver = getAccess() @@ -42,25 +41,11 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return - - # Returns the name of the website #return: name of site in string type def getMKTName(): @@ -77,7 +62,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -104,10 +89,10 @@ def createFFDriver(): # ff_prof.set_preference("network.cookie.lifetimePolicy", 2) # ff_prof.set_preference("network.dns.disablePrefetch", True) # ff_prof.set_preference("network.http.sendRefererHeader", 0) - # ff_prof.set_preference("permissions.default.image", 3) - # ff_prof.set_preference("browser.download.folderList", 2) - # ff_prof.set_preference("browser.download.manager.showWhenStarting", False) - # ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") + ff_prof.set_preference("permissions.default.image", 3) + ff_prof.set_preference("browser.download.folderList", 2) + ff_prof.set_preference("browser.download.manager.showWhenStarting", False) + ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") ff_prof.set_preference('network.proxy.type', 1) ff_prof.set_preference("network.proxy.socks_version", 5) ff_prof.set_preference('network.proxy.socks', '127.0.0.1')