Browse Source

finished crawler

main
Joshua 1 year ago
parent
commit
0a9d4c56ac
2 changed files with 13 additions and 14 deletions
  1. +12
    -13
      MarketPlaces/Ares/crawler_selenium.py
  2. +1
    -1
      MarketPlaces/Ares/parser.py

+ 12
- 13
MarketPlaces/Ares/crawler_selenium.py View File

@ -26,7 +26,7 @@ from MarketPlaces.Ares.parser import ares_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
counter = 1
baseURL = 'http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/signin'
baseURL = 'http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/'
def startCrawling():
@ -41,7 +41,7 @@ def startCrawling():
print(driver.current_url, e)
closeDriver(driver)
new_parse(mktName, baseURL, True)
# new_parse(mktName, baseURL, True)
# Returns the name of the website
@ -52,7 +52,7 @@ def getMKTName():
# Return the base link of the website
def getFixedURL():
url = 'http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/signin'
url = 'http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/'
return url
@ -118,7 +118,7 @@ def getAccess():
def login(driver):
input("Press ENTER when CAPTCHA is complete and login page has loaded\n")
# input("Press ENTER when CAPTCHA is complete and login page has loaded\n")
# entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
@ -173,19 +173,19 @@ def getInterestedLinks():
links = []
# Digital - Malware
links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/95c37970-002c-11ec-a5dc-1f4432087ed2')
# links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/95c37970-002c-11ec-a5dc-1f4432087ed2')
# Digital - Guides (Mostly carding, some useful hacking guides. probably dont use)
links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/9a8bea70-002b-11ec-a3db-c90dd329f662')
# links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/9a8bea70-002b-11ec-a3db-c90dd329f662')
# Digital - Hacking
links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/a81693f0-002b-11ec-9c39-110550ce4921')
# links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/a81693f0-002b-11ec-9c39-110550ce4921')
# Digital - Malware2
links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/b3258c50-002b-11ec-b658-876d3d651145')
links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/b3258c50-002b-11ec-b658-876d3d651145')
# Digital - Sofware (50/50 hacking stuff and cracked software)
links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/cff75df0-002b-11ec-8d0a-81fddeb36bf1')
# links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/cff75df0-002b-11ec-8d0a-81fddeb36bf1')
# Digital - Exploits
links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/ef029550-002f-11ec-8711-675a8b116ba6')
# links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/ef029550-002f-11ec-8711-675a8b116ba6')
# Digital - Tutorials (Mostly random stuff, some useful tutorials, probably dont use)
links.append('http://sn2sfdqay6cxztroslaxa36covrhoowe6a5xug6wlm6ek7nmeiujgvad.onion/category/f6e9c3b0-002b-11ec-85aa-c79a6ac8cfe8')
# links.append('http://aresonewgdyfb7oyyanlvp6e75arfivpkuy7rwo5uegayrv7rkztfwad.onion/category/f6e9c3b0-002b-11ec-85aa-c79a6ac8cfe8')
return links
@ -211,7 +211,6 @@ def crawlForum(driver):
driver.refresh()
html = driver.page_source
savePage(driver, html, link)
list = productPages(html)
for item in list:
@ -231,7 +230,7 @@ def crawlForum(driver):
# break
try:
link = driver.find_element(by=By.XPATH, value='//a[contains(text(), "Next")]').get_attribute('href')
link = driver.find_element(by=By.XPATH, value='/html/body/div[6]/div[3]/div/div[2]/nav/ul/li[4]/a').get_attribute('href')
if link == "":
raise NoSuchElementException
count += 1


+ 1
- 1
MarketPlaces/Ares/parser.py View File

@ -272,7 +272,7 @@ def ares_links_parser(soup):
# Returning all links that should be visited by the Crawler
href = []
listing = soup.findAll('div', {"id": "itembox"})
listing = soup.findAll('div', {"class": "col-md-4 my-md-0 my-2 col-12"})
# for a in listing:
# bae = a.find('a', {"class": "text-info"}, href=True)


Loading…
Cancel
Save