From 0a9d4c56acf1436d8495c875173c8b983a3ec3af Mon Sep 17 00:00:00 2001 From: Joshua Date: Thu, 26 Oct 2023 08:34:52 -0700 Subject: [PATCH] finished crawler --- MarketPlaces/Ares/crawler_selenium.py | 25 ++++++++++++------------- MarketPlaces/Ares/parser.py | 2 +- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/MarketPlaces/Ares/crawler_selenium.py b/MarketPlaces/Ares/crawler_selenium.py index 52986ec..5120f06 100644 --- a/MarketPlaces/Ares/crawler_selenium.py +++ b/MarketPlaces/Ares/crawler_selenium.py @@ -26,7 +26,7 @@ from MarketPlaces.Ares.parser import ares_links_parser from MarketPlaces.Utilities.utilities import cleanHTML counter = 1 -baseURL = 'http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/signin' +baseURL = 'http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/' def startCrawling(): @@ -41,7 +41,7 @@ def startCrawling(): print(driver.current_url, e) closeDriver(driver) - new_parse(mktName, baseURL, True) + # new_parse(mktName, baseURL, True) # Returns the name of the website @@ -52,7 +52,7 @@ def getMKTName(): # Return the base link of the website def getFixedURL(): - url = 'http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/signin' + url = 'http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/' return url @@ -118,7 +118,7 @@ def getAccess(): def login(driver): - input("Press ENTER when CAPTCHA is complete and login page has loaded\n") + # input("Press ENTER when CAPTCHA is complete and login page has loaded\n") # entering username and password into input boxes usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]') @@ -173,19 +173,19 @@ def getInterestedLinks(): links = [] # Digital - Malware - links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/95c37970-002c-11ec-a5dc-1f4432087ed2') + # links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/95c37970-002c-11ec-a5dc-1f4432087ed2') # Digital - Guides (Mostly carding, some useful hacking guides. probably dont use) - links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/9a8bea70-002b-11ec-a3db-c90dd329f662') + # links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/9a8bea70-002b-11ec-a3db-c90dd329f662') # Digital - Hacking - links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/a81693f0-002b-11ec-9c39-110550ce4921') + # links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/a81693f0-002b-11ec-9c39-110550ce4921') # Digital - Malware2 - links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/b3258c50-002b-11ec-b658-876d3d651145') + links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/b3258c50-002b-11ec-b658-876d3d651145') # Digital - Sofware (50/50 hacking stuff and cracked software) - links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/cff75df0-002b-11ec-8d0a-81fddeb36bf1') + # links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/cff75df0-002b-11ec-8d0a-81fddeb36bf1') # Digital - Exploits - links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/ef029550-002f-11ec-8711-675a8b116ba6') + # links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/ef029550-002f-11ec-8711-675a8b116ba6') # Digital - Tutorials (Mostly random stuff, some useful tutorials, probably dont use) - links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/f6e9c3b0-002b-11ec-85aa-c79a6ac8cfe8') + # links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/f6e9c3b0-002b-11ec-85aa-c79a6ac8cfe8') return links @@ -211,7 +211,6 @@ def crawlForum(driver): driver.refresh() html = driver.page_source savePage(driver, html, link) - list = productPages(html) for item in list: @@ -231,7 +230,7 @@ def crawlForum(driver): # break try: - link = driver.find_element(by=By.XPATH, value='//a[contains(text(), "Next")]').get_attribute('href') + link = driver.find_element(by=By.XPATH, value='/html/body/div[6]/div[3]/div/div[2]/nav/ul/li[4]/a').get_attribute('href') if link == "": raise NoSuchElementException count += 1 diff --git a/MarketPlaces/Ares/parser.py b/MarketPlaces/Ares/parser.py index 83c986b..28e0850 100644 --- a/MarketPlaces/Ares/parser.py +++ b/MarketPlaces/Ares/parser.py @@ -272,7 +272,7 @@ def ares_links_parser(soup): # Returning all links that should be visited by the Crawler href = [] - listing = soup.findAll('div', {"id": "itembox"}) + listing = soup.findAll('div', {"class": "col-md-4 my-md-0 my-2 col-12"}) # for a in listing: # bae = a.find('a', {"class": "text-info"}, href=True)