Browse Source

Finished Hiddenmarket Parser. Ran successful tests. Hiddenmarket complete

main
Joshua 1 year ago
parent
commit
0baf9ff36d
3 changed files with 55 additions and 17 deletions
  1. +4
    -4
      MarketPlaces/HiddenMarket/crawler_selenium.py
  2. +16
    -13
      MarketPlaces/HiddenMarket/parser.py
  3. +35
    -0
      MarketPlaces/Initialization/geckodriver.log

+ 4
- 4
MarketPlaces/HiddenMarket/crawler_selenium.py View File

@ -211,7 +211,7 @@ def getInterestedLinks():
links = []
# # Civil Software
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/civil_softwares')
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/civil_softwares')
# # Tutorials - Carding
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/carding')
# # Digital - Hacks
@ -227,7 +227,7 @@ def getInterestedLinks():
# Tutorials - Worms
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/worms')
# Tutorials - Viruses
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/viruses')
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/viruses')
# Tutorials - Trojans
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/trojans')
# Tutorials - Botnets
@ -278,8 +278,8 @@ def crawlForum(driver):
# break
# comment out
if count == 2:
break
# if count == 2:
# break
try:
pageCount += 1


+ 16
- 13
MarketPlaces/HiddenMarket/parser.py View File

@ -147,27 +147,29 @@ def hiddenmarket_listing_parser(soup):
shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
listing = soup.findAll('div', {"class": "info"})
listing = soup.findAll('div', {"class": "item"})
# Populating the Number of Products
nm = len(listing)
# Finding Category
cat = soup.find("div", {'class': "heading"}).text
cat = cat.replace(",", "")
cat = cat.strip()
# cat = soup.find("div", {'class': "heading"}).text
# cat = cat.replace(",", "")
# cat = cat.strip()
for card in listing:
category.append(cat)
# category.append(cat)
# Adding the url to the list of urls
link = card.find_all('a')
link = link[1].get('href')
# Adding the url to the list of urls TODO: fix this
link = card.next_sibling
link.find('a').get('href')
href.append(link)
# Finding Product Name
product = card.next_sibling.find('div', {'class': "title"}).find('a').text
product = card.next_sibling.find('div', {'class': "title"})
product = product.text
product = product.replace('\n', ' ')
product = product.replace(",", "")
product = product.strip()
@ -185,7 +187,8 @@ def hiddenmarket_listing_parser(soup):
usd = usd.strip()
USD.append(usd)
tb = card.next_sibling.find("span", {"class": "stats"}).find_all('td')
tb = card.next_sibling.find("div", {"class": "stats"})
tb = tb.find_all('td')
# Finding Reviews
num = tb[-1].text
@ -193,11 +196,11 @@ def hiddenmarket_listing_parser(soup):
reviews.append(num)
# Finding Views
view = tb[0].text.strip()
view = tb[-3].text.strip()
views.append(view)
# Finding Num of Sales
sale = tb[1].text.strip()
sale = tb[-2].text.strip()
sold.append(sale)
# Finding shipping info
@ -209,7 +212,7 @@ def hiddenmarket_listing_parser(soup):
destination = shipping[1].strip()
shipTo.append(destination)
# Finding description
# Finding description (site only shows partial description on listing pages)
description = card.next_sibling.find('div', {'class': "description"}).text
description = description.replace("\n", " ")
description = description.replace("\r", " ")


+ 35
- 0
MarketPlaces/Initialization/geckodriver.log View File

@ -17053,3 +17053,38 @@ unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1691118575980 geckodriver INFO Listening on 127.0.0.1:56897
1691118579673 mozrunner::runner INFO Running command: "C:\\Users\\John Wick\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "56898" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\JOHNWI~1\\AppData\\Local\\Temp\\rust_mozprofileZn6cQv"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1691118580325 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:56898/devtools/browser/e9283719-33ec-4df0-b591-2a2eab55d4be
1691118581896 Marionette INFO Listening on port 58459
1691118582358 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
1691118809392 Marionette INFO Stopped listening on port 58459
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1691118809550 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138

Loading…
Cancel
Save