|
|
@ -41,7 +41,7 @@ def startCrawling(): |
|
|
|
print(driver.current_url, e) |
|
|
|
closeDriver(driver) |
|
|
|
|
|
|
|
new_parse(mktName, baseURL, True) |
|
|
|
# new_parse(mktName, baseURL, True) |
|
|
|
|
|
|
|
|
|
|
|
# Returns the name of the website |
|
|
@ -121,21 +121,20 @@ def login(driver): |
|
|
|
input("Press ENTER when CAPTCHA is complete and login page has loaded\n") |
|
|
|
|
|
|
|
# entering username and password into input boxes |
|
|
|
usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="username"]') |
|
|
|
usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]') |
|
|
|
# Username here |
|
|
|
usernameBox.send_keys('aliciamykeys') |
|
|
|
passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="password"]') |
|
|
|
usernameBox.send_keys('itsmedio') |
|
|
|
passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="password"]') |
|
|
|
# Password here |
|
|
|
passwordBox.send_keys('aliciawherearemykey$') |
|
|
|
# session time |
|
|
|
session_select = Select(driver.find_element(by=By.XPATH, value='/html/body/main/div/div/div/div/div/form/div[4]/div/div[2]/select')) |
|
|
|
session_select.select_by_visible_text('Session 60min') |
|
|
|
passwordBox.send_keys('DementedBed123-') |
|
|
|
# submit |
|
|
|
submit = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/form/div[7]/input') |
|
|
|
submit.click() |
|
|
|
|
|
|
|
input("Press ENTER when CAPTCHA is completed and you exit the newsletter\n") |
|
|
|
|
|
|
|
# wait for listing page show up (This Xpath may need to change based on different seed url) |
|
|
|
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( |
|
|
|
(By.XPATH, '//*[@id="submit"]'))) |
|
|
|
(By.XPATH, '/html/body/div/div[3]/div[2]/div[1]'))) |
|
|
|
|
|
|
|
|
|
|
|
def savePage(driver, page, url): |
|
|
@ -176,17 +175,17 @@ def getInterestedLinks(): |
|
|
|
links = [] |
|
|
|
|
|
|
|
# Hosting and Security |
|
|
|
links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=84') |
|
|
|
# links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=84') |
|
|
|
# Exploits and Kits |
|
|
|
links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=107') |
|
|
|
# Botnets and Malware |
|
|
|
links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=97') |
|
|
|
# links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=97') |
|
|
|
# Other Software |
|
|
|
links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=108') |
|
|
|
# links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=108') |
|
|
|
# Hacking Guide |
|
|
|
links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=129') |
|
|
|
# links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=129') |
|
|
|
# Fraud (mostly carding) |
|
|
|
links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=128') |
|
|
|
# links.append('http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion/?c=128') |
|
|
|
|
|
|
|
return links |
|
|
|
|
|
|
@ -212,7 +211,6 @@ def crawlForum(driver): |
|
|
|
driver.refresh() |
|
|
|
html = driver.page_source |
|
|
|
savePage(driver, html, link) |
|
|
|
|
|
|
|
list = productPages(html) |
|
|
|
|
|
|
|
for item in list: |
|
|
@ -225,16 +223,17 @@ def crawlForum(driver): |
|
|
|
driver.back() |
|
|
|
|
|
|
|
# comment out |
|
|
|
break |
|
|
|
# break |
|
|
|
|
|
|
|
# comment out |
|
|
|
if count == 1: |
|
|
|
break |
|
|
|
# if count == 1: |
|
|
|
# break |
|
|
|
|
|
|
|
try: |
|
|
|
link = driver.find_element(by=By.XPATH, value='//a[contains(text(), "Next")]').get_attribute('href') |
|
|
|
link = driver.find_element(by=By.XPATH, value='/html/body/div/div[3]/div[2]/div[2]/nav/ul/li[3]/a').get_attribute('href') |
|
|
|
if link == "": |
|
|
|
raise NoSuchElementException |
|
|
|
link = urlparse.urljoin(baseURL, str(link)) |
|
|
|
count += 1 |
|
|
|
|
|
|
|
except NoSuchElementException: |
|
|
@ -249,14 +248,14 @@ def crawlForum(driver): |
|
|
|
|
|
|
|
# Returns 'True' if the link is Topic link, may need to change for every website |
|
|
|
def isDescriptionLink(url): |
|
|
|
if 'item' in url: |
|
|
|
if 'a=' in url: |
|
|
|
return True |
|
|
|
return False |
|
|
|
|
|
|
|
|
|
|
|
# Returns True if the link is a listingPage link, may need to change for every website |
|
|
|
def isListingLink(url): |
|
|
|
if 'category=' in url: |
|
|
|
if 'c=' in url: |
|
|
|
return True |
|
|
|
return False |
|
|
|
|
|
|
|