Browse Source

darkmatter and m00key scraper verified

main
Helium 1 year ago
parent
commit
d5ac12195d
4 changed files with 18 additions and 22 deletions
  1. +12
    -12
      MarketPlaces/DarkMatter/crawler_selenium.py
  2. +1
    -1
      MarketPlaces/Initialization/marketsList.txt
  3. +3
    -7
      MarketPlaces/Initialization/prepare_parser.py
  4. +2
    -2
      MarketPlaces/M00nkeyMarket/crawler_selenium.py

+ 12
- 12
MarketPlaces/DarkMatter/crawler_selenium.py View File

@ -32,17 +32,17 @@ baseURL = 'http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion
# Opens Tor Browser, crawls the website, then parses, then closes tor
#acts like the main method for the crawler, another function at the end of this code calls this function later
def startCrawling():
# opentor()
opentor()
mktName = getMKTName()
# driver = getAccess()
driver = getAccess()
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
new_parse(mktName, baseURL, False)
@ -190,11 +190,11 @@ def getInterestedLinks():
# legit
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=78')
# # hack guides
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=94')
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=94')
# # services
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=117')
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=117')
# # software/malware
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=121')
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=121')
return links


+ 1
- 1
MarketPlaces/Initialization/marketsList.txt View File

@ -1 +1 @@
DarkMatter
M00nkeyMarket

+ 3
- 7
MarketPlaces/Initialization/prepare_parser.py View File

@ -224,8 +224,7 @@ def new_parse(marketPlace, url, createLog):
else:
parseError = True
except Exception as e:# change back
print(e)
except:
nError += 1
print("There was a problem to parse the file " + line1 + " in the listing section!")
@ -268,7 +267,7 @@ def new_parse(marketPlace, url, createLog):
try:
persist_data(url, tuple(rec), cur)
con.commit()
except Exception as e:
except:
trace = traceback.format_exc()
@ -291,10 +290,7 @@ def new_parse(marketPlace, url, createLog):
try:
shutil.move(source, destination)
num_persisted_moved += 1
except Exception as e:
# except:
print(e)
except:
print("There was a problem to move the file " + detPage[key]['filename'] + " in the Description section!")
nError += 1
if createLog:


+ 2
- 2
MarketPlaces/M00nkeyMarket/crawler_selenium.py View File

@ -153,7 +153,7 @@ def login(driver):
# Password here
passwordBox.send_keys('genie_show_metheWorld')
input("Press ENTER when CAPTCHA and exit pressed is completed\n")
input("Press ENTER when CAPTCHA and exit pressed is completed\nWAIT FOR PAGE TO LOAD SOMETIMES THY SEND NEWSLETTERS")
# wait for listing page show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
@ -203,7 +203,7 @@ def getInterestedLinks():
# software
links.append('http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/search/subcategories?subcategory=30')
# # guides
# links.append('http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/search/subcategories?subcategory=17')
links.append('http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/search/subcategories?subcategory=17')
return links


Loading…
Cancel
Save