Browse Source

moved Vortex back to test

main
westernmeadow 1 year ago
parent
commit
07e71ec2ea
2 changed files with 32 additions and 33 deletions
  1. +25
    -26
      MarketPlaces/Vortex/crawler_selenium.py
  2. +7
    -7
      MarketPlaces/Vortex/parser.py

+ 25
- 26
MarketPlaces/Vortex/crawler_selenium.py View File

@ -26,19 +26,19 @@ from MarketPlaces.Vortex.parser import vortex_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
counter = 1 counter = 1
baseURL = 'http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/login' baseURL = 'http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/'
def startCrawling(): def startCrawling():
mktName = getMKTName() mktName = getMKTName()
# driver = getAccess() driver = getAccess()
# if driver != 'down':
# if driver != 'down': try:
# try: login(driver)
# login(driver) crawlForum(driver)
# crawlForum(driver) except Exception as e:
# except Exception as e: print(driver.current_url, e)
# print(driver.current_url, e) closeDriver(driver)
# closeDriver(driver)
new_parse(mktName, baseURL, True) new_parse(mktName, baseURL, True)
@ -137,15 +137,14 @@ def login(driver):
input("Press ENTER when captcha is solved") input("Press ENTER when captcha is solved")
try: # try:
agree_button = driver.find_element(by=By.NAME, value='login') # agree_button = driver.find_element(by=By.NAME, value='login')
agree_button.click() # agree_button.click()
except Exception as e: # except Exception as e:
print('Problem with clicking login button', e) # print('Problem with clicking login button', e)
#
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( # WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//*[@id="main"]'))) # (By.XPATH, '//*[@id="main"]')))
def savePage(driver, page, url): def savePage(driver, page, url):
@ -186,11 +185,11 @@ def getInterestedLinks():
links = [] links = []
# security and hacking # security and hacking
# links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Security+%26+Hacking') links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Security+%26+Hacking')
# fraud # fraud
links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Fraud') links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Fraud')
# malware, nothing here for now # malware, nothing here for now
# links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Cracked+softwares%26comma%3B+Botnets+%26+Malware') links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Cracked+softwares%26comma%3B+Botnets+%26+Malware')
return links return links
@ -228,12 +227,12 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out # # comment out
break # break
# #
# comment out # # comment out
if count == 1: # if count == 1:
break # break
try: try:
temp = driver.find_element(by=By.XPATH, value = '//*[@id="main"]') temp = driver.find_element(by=By.XPATH, value = '//*[@id="main"]')


+ 7
- 7
MarketPlaces/Vortex/parser.py View File

@ -142,10 +142,10 @@ def vortex_listing_parser(soup):
href = [] # 22 Product_Links y href = [] # 22 Product_Links y
temp = soup.find('main', {'id': 'main'}).find('section', {'id':'page_container'}) temp = soup.find('main', {'id': 'main'}).find('section', {'id':'page_container'})
listings = temp.findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-5"}) listings = temp.findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-0"})
cat = soup.find('section', {'class': 'row px-md-4 mx-0 mb-3'}).find('ol').find_all('li') # cat = soup.find('section', {'class': 'row px-md-4 mx-0 my-3'}).find('ol').find_all('li')
cat = cat[1].find('a').text # cat = cat[1].find('a').text
# Populating the Number of Products # Populating the Number of Products
nm = len(listings) nm = len(listings)
@ -203,9 +203,9 @@ def vortex_listing_parser(soup):
MSValue = me MSValue = me
MS.append(MSValue) MS.append(MSValue)
# Finding the category - check # # Finding the category - check
category_text = cleanString(cat).strip() # category_text = cleanString(cat).strip()
category.append(category_text) # category.append(category_text)
# Finding the hrefs - check # Finding the hrefs - check
description_link = listing.find('h4').find('a')['href'] description_link = listing.find('h4').find('a')['href']
@ -278,7 +278,7 @@ def vortex_links_parser(soup):
# Returning all links that should be visited by the Crawler # Returning all links that should be visited by the Crawler
href = [] href = []
listings = soup.find('main').findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-5"}) listings = soup.find('main').findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-0"})
for listing in listings: for listing in listings:
# Adding the url to the list of urls # Adding the url to the list of urls


|||||||
x
 
000:0
Loading…
Cancel
Save