diff --git a/MarketPlaces/HiddenMarket/crawler_selenium.py b/MarketPlaces/HiddenMarket/crawler_selenium.py index 2582967..8a99400 100644 --- a/MarketPlaces/HiddenMarket/crawler_selenium.py +++ b/MarketPlaces/HiddenMarket/crawler_selenium.py @@ -211,7 +211,7 @@ def getInterestedLinks(): links = [] # # Civil Software - links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/civil_softwares') + # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/civil_softwares') # # Tutorials - Carding # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/carding') # # Digital - Hacks @@ -227,7 +227,7 @@ def getInterestedLinks(): # Tutorials - Worms # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/worms') # Tutorials - Viruses - # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/viruses') + links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/viruses') # Tutorials - Trojans # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/trojans') # Tutorials - Botnets @@ -278,8 +278,8 @@ def crawlForum(driver): # break # comment out - if count == 2: - break + # if count == 2: + # break try: pageCount += 1 diff --git a/MarketPlaces/HiddenMarket/parser.py b/MarketPlaces/HiddenMarket/parser.py index c75e97e..375a0e4 100644 --- a/MarketPlaces/HiddenMarket/parser.py +++ b/MarketPlaces/HiddenMarket/parser.py @@ -147,27 +147,29 @@ def hiddenmarket_listing_parser(soup): shipTo = [] # 19 Product_ShippedTo href = [] # 20 Product_Links - listing = soup.findAll('div', {"class": "info"}) + listing = soup.findAll('div', {"class": "item"}) # Populating the Number of Products nm = len(listing) # Finding Category - cat = soup.find("div", {'class': "heading"}).text - cat = cat.replace(",", "") - cat = cat.strip() + # cat = soup.find("div", {'class': "heading"}).text + # cat = cat.replace(",", "") + # cat = cat.strip() for card in listing: - category.append(cat) + # category.append(cat) + + # Adding the url to the list of urls + link = card.find_all('a') + link = link[1].get('href') - # Adding the url to the list of urls TODO: fix this - link = card.next_sibling - link.find('a').get('href') href.append(link) # Finding Product Name - product = card.next_sibling.find('div', {'class': "title"}).find('a').text + product = card.next_sibling.find('div', {'class': "title"}) + product = product.text product = product.replace('\n', ' ') product = product.replace(",", "") product = product.strip() @@ -185,7 +187,8 @@ def hiddenmarket_listing_parser(soup): usd = usd.strip() USD.append(usd) - tb = card.next_sibling.find("span", {"class": "stats"}).find_all('td') + tb = card.next_sibling.find("div", {"class": "stats"}) + tb = tb.find_all('td') # Finding Reviews num = tb[-1].text @@ -193,11 +196,11 @@ def hiddenmarket_listing_parser(soup): reviews.append(num) # Finding Views - view = tb[0].text.strip() + view = tb[-3].text.strip() views.append(view) # Finding Num of Sales - sale = tb[1].text.strip() + sale = tb[-2].text.strip() sold.append(sale) # Finding shipping info @@ -209,7 +212,7 @@ def hiddenmarket_listing_parser(soup): destination = shipping[1].strip() shipTo.append(destination) - # Finding description + # Finding description (site only shows partial description on listing pages) description = card.next_sibling.find('div', {'class': "description"}).text description = description.replace("\n", " ") description = description.replace("\r", " ") diff --git a/MarketPlaces/Initialization/geckodriver.log b/MarketPlaces/Initialization/geckodriver.log index 77a0a28..18a2522 100644 --- a/MarketPlaces/Initialization/geckodriver.log +++ b/MarketPlaces/Initialization/geckodriver.log @@ -17053,3 +17053,38 @@ unwatchForTargets()@TargetList.jsm:37 destructor()@TargetList.jsm:109 stop()@CDP.jsm:104 close()@RemoteAgent.jsm:138 +1691118575980 geckodriver INFO Listening on 127.0.0.1:56897 +1691118579673 mozrunner::runner INFO Running command: "C:\\Users\\John Wick\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "56898" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\JOHNWI~1\\AppData\\Local\\Temp\\rust_mozprofileZn6cQv" +console.log: "TorSettings: loadFromPrefs()" +console.log: "TorConnect: init()" +console.log: "TorConnect: Entering Initial state" +console.log: "TorConnect: Observed profile-after-change" +console.log: "TorConnect: Observing topic 'TorProcessExited'" +console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" +console.log: "TorConnect: Observing topic 'torsettings:ready'" +console.log: "TorSettings: Observed profile-after-change" +1691118580325 Marionette INFO Marionette enabled +console.log: "TorConnect: Will load after bootstrap => [about:blank]" +console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined +DevTools listening on ws://localhost:56898/devtools/browser/e9283719-33ec-4df0-b591-2a2eab55d4be +1691118581896 Marionette INFO Listening on port 58459 +1691118582358 RemoteAgent WARN TLS certificate errors will be ignored for this session +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +1691118809392 Marionette INFO Stopped listening on port 58459 +JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] +!!! error running onStopped callback: TypeError: callback is not a function + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1691118809550 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 +unwatchForTabs()@TargetList.jsm:70 +unwatchForTargets()@TargetList.jsm:37 +destructor()@TargetList.jsm:109 +stop()@CDP.jsm:104 +close()@RemoteAgent.jsm:138