From 4cacfeebd9cea8842ee72902a1439f4515fa353b Mon Sep 17 00:00:00 2001 From: Helium Date: Wed, 7 Feb 2024 13:00:36 -0800 Subject: [PATCH] crawler fixed for kerberos, but not checked. --- MarketPlaces/Kerberos/crawler_selenium.py | 115 ++++++++++++---------- 1 file changed, 62 insertions(+), 53 deletions(-) diff --git a/MarketPlaces/Kerberos/crawler_selenium.py b/MarketPlaces/Kerberos/crawler_selenium.py index fe8f1c0..d199c73 100644 --- a/MarketPlaces/Kerberos/crawler_selenium.py +++ b/MarketPlaces/Kerberos/crawler_selenium.py @@ -1,4 +1,4 @@ -__author__ = 'DarkWeb' +__author__ = 'Helium' ''' Kerberos Market Crawler (Selenium) @@ -29,17 +29,18 @@ baseURL = 'http://kerberosazmnfrjinmftp3im3cr7hw4nxbavm4ngofn64g24be7h3kqd.onion # Opens Tor Browser, crawls the website def startCrawling(): - opentor() - # marketName = getMarketName() + mktName = getMKTName() driver = getAccess() if driver != 'down': - captcha(driver) - login(driver) - crawlForum(driver) + try: + login(driver) + crawlForum(driver) + except Exception as e: + print(driver.current_url, e) + closeDriver(driver) - # new_parse(marketName, False) - closetor(driver) + # new_parse(mktName, baseURL, True) # Opens Tor Browser @@ -53,24 +54,28 @@ def opentor(): input('Tor Connected. Press ENTER to continue\n') return - def captcha(driver): - # wait for captcha page - WebDriverWait(driver, 100).until(EC.visibility_of_element_located( - (By.XPATH, "/html/body/div/div/img"))) + # do captchas manually and then wait + input('Complete CAPTCHA\'s manually\nthen press enter when completed') - # too hard to code, requires manual completion - - # wait for login page - WebDriverWait(driver, 100).until(EC.visibility_of_element_located( - (By.XPATH, "/html/body/div[1]/div[2]/div/form/div[10]/button"))) +def closeDriver(driver): + # global pid + # os.system("taskkill /pid " + str(pro.pid)) + # os.system("taskkill /t /f /im tor.exe") + print('Closing Tor...') + driver.close() + time.sleep(3) + return # Login using premade account credentials and do login captcha manually def login(driver): + captcha(driver) #wait for login page WebDriverWait(driver, 100).until(EC.visibility_of_element_located( - (By.XPATH, "/html/body/div[1]/div[2]/div/form/div[10]/button"))) + (By.XPATH, "/html/body/div[1]/div[2]"))) + + input("There may be an enter button you need to press.\npress it now then press enter on the keyboard") #entering username and password into input boxes usernameBox = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div/form/input[1]') @@ -80,40 +85,43 @@ def login(driver): #Password here passwordBox.send_keys('fishowal') + input('complete CAPTCHA, press login, and then press enter on keyboard') + + # wait for captcha page show up # WebDriverWait(driver, 100).until(EC.visibility_of_element_located( # (By.XPATH, "/html/body/div/img[24]"))) - time.sleep(10) + # time.sleep(10) # save captcha to local - driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div/form/div[6]').screenshot( - r'..\Kerberos\captcha.png') - - # This method will show image in any image viewer - im = Image.open(r'..\Kerberos\captcha.png') - - im.show() - - # wait until input space show up - inputBox = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div/form/input[3]') - - # ask user input captcha solution in terminal - userIn = input("Enter solution: ") - - # send user solution into the input space - inputBox.send_keys(userIn) - - # click the verify(submit) button - driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") - driver.find_element(by=By.XPATH, value="/html/body/div[1]/div[2]/div/form/div[10]/button").click() - - # wait for listing page show up (This Xpath may need to change based on different seed url) - WebDriverWait(driver, 100).until(EC.visibility_of_element_located( - (By.XPATH, '//*[@id="breadcrumb"]'))) + # driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div/form/div[6]').screenshot( + # r'..\Kerberos\captcha.png') + # + # # This method will show image in any image viewer + # im = Image.open(r'..\Kerberos\captcha.png') + # + # im.show() + # + # # wait until input space show up + # inputBox = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div/form/input[3]') + + # # ask user input captcha solution in terminal + # userIn = input("Enter solution: ") + # + # # send user solution into the input space + # inputBox.send_keys(userIn) + # + # # click the verify(submit) button + # driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") + # driver.find_element(by=By.XPATH, value="/html/body/div[1]/div[2]/div/form/div[10]/button").click() + # + # # wait for listing page show up (This Xpath may need to change based on different seed url) + # WebDriverWait(driver, 100).until(EC.visibility_of_element_located( + # (By.XPATH, '//*[@id="breadcrumb"]'))) # Returns the name of the website -def getMarketName(): +def getMKTName(): name = 'Kerberos' return name @@ -139,12 +147,11 @@ def closetor(driver): # Creates FireFox 'driver' and configure its 'Profile' # to use Tor proxy and socket def createFFDriver(): - file = open('../../path.txt', 'r') - lines = file.readlines() + from MarketPlaces.Initialization.markets_mining import config - ff_binary = FirefoxBinary(lines[0].strip()) + ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) - ff_prof = FirefoxProfile(lines[1].strip()) + ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof.set_preference("places.history.enabled", False) ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True) ff_prof.set_preference("privacy.clearOnShutdown.passwords", True) @@ -153,7 +160,7 @@ def createFFDriver(): ff_prof.set_preference("signon.rememberSignons", False) ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) - # ff_prof.set_preference("network.http.sendRefererHeader", 0) + ff_prof.set_preference("network.http.sendRefererHeader", 0) ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) @@ -166,24 +173,24 @@ def createFFDriver(): ff_prof.set_preference("javascript.enabled", False) ff_prof.update_preferences() - service = Service(executable_path=lines[2].strip()) + service = Service(config.get('TOR', 'geckodriver_path')) driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service) + driver.maximize_window() + return driver def getAccess(): url = getFixedURL() driver = createFFDriver() - try: - driver.get(url) return driver except: - + driver.close() return 'down' @@ -284,6 +291,8 @@ def crawlForum(driver): break try: + # /html/body/div[4]/div[4]/div[4]/div/div[1]/div[28]/a[15] + # /html/body/div[4]/div[4]/div[4]/div/div[1]/div[28]/a[16] nav = driver.find_element(by=By.XPATH, value= '/html/body/div[3]/div[4]/div[4]/div/div[1]/div[28]') a = nav.find_element(by=By.LINK_TEXT, value="Next")