|
@ -26,19 +26,19 @@ from MarketPlaces.Vortex.parser import vortex_links_parser |
|
|
from MarketPlaces.Utilities.utilities import cleanHTML |
|
|
from MarketPlaces.Utilities.utilities import cleanHTML |
|
|
|
|
|
|
|
|
counter = 1 |
|
|
counter = 1 |
|
|
baseURL = 'http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/login' |
|
|
|
|
|
|
|
|
baseURL = 'http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/' |
|
|
|
|
|
|
|
|
def startCrawling(): |
|
|
def startCrawling(): |
|
|
mktName = getMKTName() |
|
|
mktName = getMKTName() |
|
|
# driver = getAccess() |
|
|
|
|
|
# |
|
|
|
|
|
# if driver != 'down': |
|
|
|
|
|
# try: |
|
|
|
|
|
# login(driver) |
|
|
|
|
|
# crawlForum(driver) |
|
|
|
|
|
# except Exception as e: |
|
|
|
|
|
# print(driver.current_url, e) |
|
|
|
|
|
# closeDriver(driver) |
|
|
|
|
|
|
|
|
driver = getAccess() |
|
|
|
|
|
|
|
|
|
|
|
if driver != 'down': |
|
|
|
|
|
try: |
|
|
|
|
|
login(driver) |
|
|
|
|
|
crawlForum(driver) |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
print(driver.current_url, e) |
|
|
|
|
|
closeDriver(driver) |
|
|
|
|
|
|
|
|
new_parse(mktName, baseURL, True) |
|
|
new_parse(mktName, baseURL, True) |
|
|
|
|
|
|
|
@ -137,15 +137,14 @@ def login(driver): |
|
|
|
|
|
|
|
|
input("Press ENTER when captcha is solved") |
|
|
input("Press ENTER when captcha is solved") |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
agree_button = driver.find_element(by=By.NAME, value='login') |
|
|
|
|
|
agree_button.click() |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
print('Problem with clicking login button', e) |
|
|
|
|
|
|
|
|
|
|
|
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( |
|
|
|
|
|
(By.XPATH, '//*[@id="main"]'))) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# try: |
|
|
|
|
|
# agree_button = driver.find_element(by=By.NAME, value='login') |
|
|
|
|
|
# agree_button.click() |
|
|
|
|
|
# except Exception as e: |
|
|
|
|
|
# print('Problem with clicking login button', e) |
|
|
|
|
|
# |
|
|
|
|
|
# WebDriverWait(driver, 100).until(EC.visibility_of_element_located( |
|
|
|
|
|
# (By.XPATH, '//*[@id="main"]'))) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def savePage(driver, page, url): |
|
|
def savePage(driver, page, url): |
|
@ -186,11 +185,11 @@ def getInterestedLinks(): |
|
|
links = [] |
|
|
links = [] |
|
|
|
|
|
|
|
|
# security and hacking |
|
|
# security and hacking |
|
|
# links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Security+%26+Hacking') |
|
|
|
|
|
|
|
|
links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Security+%26+Hacking') |
|
|
# fraud |
|
|
# fraud |
|
|
links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Fraud') |
|
|
links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Fraud') |
|
|
# malware, nothing here for now |
|
|
# malware, nothing here for now |
|
|
# links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Cracked+softwares%26comma%3B+Botnets+%26+Malware') |
|
|
|
|
|
|
|
|
links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Cracked+softwares%26comma%3B+Botnets+%26+Malware') |
|
|
|
|
|
|
|
|
return links |
|
|
return links |
|
|
|
|
|
|
|
@ -228,12 +227,12 @@ def crawlForum(driver): |
|
|
savePage(driver, driver.page_source, item) |
|
|
savePage(driver, driver.page_source, item) |
|
|
driver.back() |
|
|
driver.back() |
|
|
|
|
|
|
|
|
# comment out |
|
|
|
|
|
break |
|
|
|
|
|
|
|
|
# # comment out |
|
|
|
|
|
# break |
|
|
# |
|
|
# |
|
|
# comment out |
|
|
|
|
|
if count == 1: |
|
|
|
|
|
break |
|
|
|
|
|
|
|
|
# # comment out |
|
|
|
|
|
# if count == 1: |
|
|
|
|
|
# break |
|
|
|
|
|
|
|
|
try: |
|
|
try: |
|
|
temp = driver.find_element(by=By.XPATH, value = '//*[@id="main"]') |
|
|
temp = driver.find_element(by=By.XPATH, value = '//*[@id="main"]') |
|
|