diff --git a/MarketPlaces/Vortex/crawler_selenium.py b/MarketPlaces/Vortex/crawler_selenium.py index d1c6f91..d6f4881 100644 --- a/MarketPlaces/Vortex/crawler_selenium.py +++ b/MarketPlaces/Vortex/crawler_selenium.py @@ -26,19 +26,19 @@ from MarketPlaces.Vortex.parser import vortex_links_parser from MarketPlaces.Utilities.utilities import cleanHTML counter = 1 -baseURL = 'http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/login' +baseURL = 'http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/' def startCrawling(): mktName = getMKTName() - # driver = getAccess() - # - # if driver != 'down': - # try: - # login(driver) - # crawlForum(driver) - # except Exception as e: - # print(driver.current_url, e) - # closeDriver(driver) + driver = getAccess() + + if driver != 'down': + try: + login(driver) + crawlForum(driver) + except Exception as e: + print(driver.current_url, e) + closeDriver(driver) new_parse(mktName, baseURL, True) @@ -137,15 +137,14 @@ def login(driver): input("Press ENTER when captcha is solved") - try: - agree_button = driver.find_element(by=By.NAME, value='login') - agree_button.click() - except Exception as e: - print('Problem with clicking login button', e) - - WebDriverWait(driver, 100).until(EC.visibility_of_element_located( - (By.XPATH, '//*[@id="main"]'))) - + # try: + # agree_button = driver.find_element(by=By.NAME, value='login') + # agree_button.click() + # except Exception as e: + # print('Problem with clicking login button', e) + # + # WebDriverWait(driver, 100).until(EC.visibility_of_element_located( + # (By.XPATH, '//*[@id="main"]'))) def savePage(driver, page, url): @@ -186,11 +185,11 @@ def getInterestedLinks(): links = [] # security and hacking - # links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Security+%26+Hacking') + links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Security+%26+Hacking') # fraud links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Fraud') # malware, nothing here for now - # links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Cracked+softwares%26comma%3B+Botnets+%26+Malware') + links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Cracked+softwares%26comma%3B+Botnets+%26+Malware') return links @@ -228,12 +227,12 @@ def crawlForum(driver): savePage(driver, driver.page_source, item) driver.back() - # comment out - break + # # comment out + # break # - # comment out - if count == 1: - break + # # comment out + # if count == 1: + # break try: temp = driver.find_element(by=By.XPATH, value = '//*[@id="main"]') diff --git a/MarketPlaces/Vortex/parser.py b/MarketPlaces/Vortex/parser.py index a4d9465..ba69a5b 100644 --- a/MarketPlaces/Vortex/parser.py +++ b/MarketPlaces/Vortex/parser.py @@ -142,10 +142,10 @@ def vortex_listing_parser(soup): href = [] # 22 Product_Links y temp = soup.find('main', {'id': 'main'}).find('section', {'id':'page_container'}) - listings = temp.findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-5"}) + listings = temp.findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-0"}) - cat = soup.find('section', {'class': 'row px-md-4 mx-0 mb-3'}).find('ol').find_all('li') - cat = cat[1].find('a').text + # cat = soup.find('section', {'class': 'row px-md-4 mx-0 my-3'}).find('ol').find_all('li') + # cat = cat[1].find('a').text # Populating the Number of Products nm = len(listings) @@ -203,9 +203,9 @@ def vortex_listing_parser(soup): MSValue = me MS.append(MSValue) - # Finding the category - check - category_text = cleanString(cat).strip() - category.append(category_text) + # # Finding the category - check + # category_text = cleanString(cat).strip() + # category.append(category_text) # Finding the hrefs - check description_link = listing.find('h4').find('a')['href'] @@ -278,7 +278,7 @@ def vortex_links_parser(soup): # Returning all links that should be visited by the Crawler href = [] - listings = soup.find('main').findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-5"}) + listings = soup.find('main').findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-0"}) for listing in listings: # Adding the url to the list of urls