From da98edf98017116f8d917d01be91ad6466587190 Mon Sep 17 00:00:00 2001 From: Helium Date: Tue, 11 Jul 2023 21:36:29 -0700 Subject: [PATCH] finished m00nkey crawler again. --- MarketPlaces/Initialization/geckodriver.log | 179 ++++++++++++++++++ MarketPlaces/Initialization/marketsList.txt | 2 +- .../M00nkeyMarket/crawler_selenium.py | 8 +- MarketPlaces/M00nkeyMarket/parser.py | 4 +- 4 files changed, 185 insertions(+), 8 deletions(-) diff --git a/MarketPlaces/Initialization/geckodriver.log b/MarketPlaces/Initialization/geckodriver.log index 0d2ceb6..8b61c4e 100644 --- a/MarketPlaces/Initialization/geckodriver.log +++ b/MarketPlaces/Initialization/geckodriver.log @@ -15353,3 +15353,182 @@ unwatchForTargets()@TargetList.jsm:37 destructor()@TargetList.jsm:109 stop()@CDP.jsm:104 close()@RemoteAgent.jsm:138 +1689135294320 geckodriver INFO Listening on 127.0.0.1:49892 +1689135303056 mozrunner::runner INFO Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "49893" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofiledBfogX" +console.log: "TorSettings: loadFromPrefs()" +console.log: "TorConnect: init()" +console.log: "TorConnect: Entering Initial state" +console.log: "TorConnect: Observed profile-after-change" +console.log: "TorConnect: Observing topic 'TorProcessExited'" +console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" +console.log: "TorConnect: Observing topic 'torsettings:ready'" +console.log: "TorSettings: Observed profile-after-change" +1689135305636 Marionette INFO Marionette enabled +console.log: "TorConnect: Will load after bootstrap => [about:blank]" +console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined +DevTools listening on ws://localhost:49893/devtools/browser/e63765b6-1b9a-4436-a700-3a2d050b25a6 +1689135311495 Marionette INFO Listening on port 49910 +1689135311850 RemoteAgent WARN TLS certificate errors will be ignored for this session +JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofiledBfogX\thumbnails) because it does not exist +SourceActor threw an exception: [Exception... "Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/7b3955b4-4321-421a-bb33-af67cc25c46b'" nsresult: "0x805303f4 ()" location: "JS frame :: resource://devtools/shared/DevToolsUtils.js :: mainThreadFetch/< :: line 670" data: yes] +Stack: mainThreadFetch/<@resource://devtools/shared/DevToolsUtils.js:670:15 +mainThreadFetch@resource://devtools/shared/DevToolsUtils.js:516:10 +_fetchURLContents@resource://devtools/server/actors/utils/sources-manager.js:442:22 +urlContents@resource://devtools/server/actors/utils/sources-manager.js:406:17 +_resurrectSource@resource://devtools/server/actors/thread.js:2142:35 +addAllSources@resource://devtools/server/actors/thread.js:1509:14 +watch@resource://devtools/server/actors/resources/sources.js:52:17 +watchResources@resource://devtools/server/actors/resources/index.js:239:19 +_watchTargetResources@resource://devtools/server/actors/targets/target-actor-mixin.js:156:24 +addWatcherDataEntry@resource://devtools/server/actors/targets/target-actor-mixin.js:47:20 +_addWatcherDataEntry@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:483:24 +receiveMessage@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:425:21 +Line: 670, column: 0 +console.error: ({}) +SourceActor threw an exception: [Exception... "Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/909eddcc-06f2-4fe4-98f3-5a939a0815c4'" nsresult: "0x805303f4 ()" location: "JS frame :: resource://devtools/shared/DevToolsUtils.js :: mainThreadFetch/< :: line 670" data: yes] +Stack: mainThreadFetch/<@resource://devtools/shared/DevToolsUtils.js:670:15 +mainThreadFetch@resource://devtools/shared/DevToolsUtils.js:516:10 +_fetchURLContents@resource://devtools/server/actors/utils/sources-manager.js:442:22 +urlContents@resource://devtools/server/actors/utils/sources-manager.js:406:17 +_resurrectSource@resource://devtools/server/actors/thread.js:2142:35 +addAllSources@resource://devtools/server/actors/thread.js:1509:14 +watch@resource://devtools/server/actors/resources/sources.js:52:17 +watchResources@resource://devtools/server/actors/resources/index.js:239:19 +_watchTargetResources@resource://devtools/server/actors/targets/target-actor-mixin.js:156:24 +addWatcherDataEntry@resource://devtools/server/actors/targets/target-actor-mixin.js:47:20 +_addWatcherDataEntry@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:483:24 +receiveMessage@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:425:21 +Line: 670, column: 0 +console.error: ({}) +JavaScript error: resource://devtools/shared/DevToolsUtils.js, line 670: Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/7b3955b4-4321-421a-bb33-af67cc25c46b' +JavaScript error: resource://devtools/shared/DevToolsUtils.js, line 670: Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/909eddcc-06f2-4fe4-98f3-5a939a0815c4' +1689135669076 Marionette INFO Stopped listening on port 49910 +JavaScript error: resource://devtools/shared/protocol/Front.js, line 103: Error: Connection closed, pending request to server0.conn0.child4/domnode274, type setNodeValue failed + +Request stack: +request@resource://devtools/shared/protocol/Front.js:289:14 +generateRequestMethods/ [about:blank]" +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined +DevTools listening on ws://localhost:50495/devtools/browser/0cd8d224-de14-402e-bf14-b188a0cfee74 +1689135753779 Marionette INFO Listening on port 50509 +1689135754136 RemoteAgent WARN TLS certificate errors will be ignored for this session +SourceActor threw an exception: [Exception... "Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/6d01013d-594b-4955-9df3-71fff4a95985'" nsresult: "0x805303f4 ()" location: "JS frame :: resource://devtools/shared/DevToolsUtils.js :: mainThreadFetch/< :: line 670" data: yes] +Stack: mainThreadFetch/<@resource://devtools/shared/DevToolsUtils.js:670:15 +mainThreadFetch@resource://devtools/shared/DevToolsUtils.js:516:10 +_fetchURLContents@resource://devtools/server/actors/utils/sources-manager.js:442:22 +urlContents@resource://devtools/server/actors/utils/sources-manager.js:406:17 +_resurrectSource@resource://devtools/server/actors/thread.js:2142:35 +addAllSources@resource://devtools/server/actors/thread.js:1509:14 +watch@resource://devtools/server/actors/resources/sources.js:52:17 +watchResources@resource://devtools/server/actors/resources/index.js:239:19 +_watchTargetResources@resource://devtools/server/actors/targets/target-actor-mixin.js:156:24 +addWatcherDataEntry@resource://devtools/server/actors/targets/target-actor-mixin.js:47:20 +_addWatcherDataEntry@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:483:24 +receiveMessage@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:425:21 +Line: 670, column: 0 +console.error: ({}) +SourceActor threw an exception: [Exception... "Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/3a3901b0-918b-4c2d-88fb-aa4e867073ed'" nsresult: "0x805303f4 ()" location: "JS frame :: resource://devtools/shared/DevToolsUtils.js :: mainThreadFetch/< :: line 670" data: yes] +Stack: mainThreadFetch/<@resource://devtools/shared/DevToolsUtils.js:670:15 +mainThreadFetch@resource://devtools/shared/DevToolsUtils.js:516:10 +_fetchURLContents@resource://devtools/server/actors/utils/sources-manager.js:442:22 +urlContents@resource://devtools/server/actors/utils/sources-manager.js:406:17 +_resurrectSource@resource://devtools/server/actors/thread.js:2142:35 +addAllSources@resource://devtools/server/actors/thread.js:1509:14 +watch@resource://devtools/server/actors/resources/sources.js:52:17 +watchResources@resource://devtools/server/actors/resources/index.js:239:19 +_watchTargetResources@resource://devtools/server/actors/targets/target-actor-mixin.js:156:24 +addWatcherDataEntry@resource://devtools/server/actors/targets/target-actor-mixin.js:47:20 +_addWatcherDataEntry@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:483:24 +receiveMessage@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:425:21 +Line: 670, column: 0 +console.error: ({}) +JavaScript error: resource://devtools/shared/DevToolsUtils.js, line 670: Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/6d01013d-594b-4955-9df3-71fff4a95985' +JavaScript error: resource://devtools/shared/DevToolsUtils.js, line 670: Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/3a3901b0-918b-4c2d-88fb-aa4e867073ed' +1689136122076 Marionette INFO Stopped listening on port 50509 +JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] +JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData] +!!! error running onStopped callback: TypeError: callback is not a function +JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. +JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileuxmlH9\thumbnails) because it does not exist + +###!!! [Parent][MessageChannel] Error: (msgtype=0x390076,name=PContent::Msg_DestroyBrowsingContextGroup) Closed channel: cannot send/recv + + +###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1689136122588 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 +unwatchForTabs()@TargetList.jsm:70 +unwatchForTargets()@TargetList.jsm:37 +destructor()@TargetList.jsm:109 +stop()@CDP.jsm:104 +close()@RemoteAgent.jsm:138 +1689136165618 geckodriver INFO Listening on 127.0.0.1:51080 +1689136174929 mozrunner::runner INFO Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "51081" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofilevfpmmI" +console.log: "TorSettings: loadFromPrefs()" +console.log: "TorConnect: init()" +console.log: "TorConnect: Entering Initial state" +console.log: "TorConnect: Observed profile-after-change" +console.log: "TorConnect: Observing topic 'TorProcessExited'" +console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" +console.log: "TorConnect: Observing topic 'torsettings:ready'" +console.log: "TorSettings: Observed profile-after-change" +1689136177563 Marionette INFO Marionette enabled +console.log: "TorConnect: Will load after bootstrap => [about:blank]" +console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" +JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +DevTools listening on ws://localhost:51081/devtools/browser/ef699bfb-b8a4-403a-a2e6-6beab82da0e5 +1689136181166 Marionette INFO Listening on port 51097 +1689136181511 RemoteAgent WARN TLS certificate errors will be ignored for this session +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. +JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. diff --git a/MarketPlaces/Initialization/marketsList.txt b/MarketPlaces/Initialization/marketsList.txt index 559010a..032ecf3 100644 --- a/MarketPlaces/Initialization/marketsList.txt +++ b/MarketPlaces/Initialization/marketsList.txt @@ -1 +1 @@ -Apocalypse \ No newline at end of file +M00nkeyMarket \ No newline at end of file diff --git a/MarketPlaces/M00nkeyMarket/crawler_selenium.py b/MarketPlaces/M00nkeyMarket/crawler_selenium.py index 9726442..dd422ce 100644 --- a/MarketPlaces/M00nkeyMarket/crawler_selenium.py +++ b/MarketPlaces/M00nkeyMarket/crawler_selenium.py @@ -141,7 +141,7 @@ def getAccess(): # then allows for manual solving of captcha in the terminal #@param: current selenium web driver def login(driver): - input("Press ENTER when CAPTCHA is completed\n") + input("Press ENTER when CAPTCHA is completed. This will fill in your login credentials\n") # wait for page to show up (This Xpath may need to change based on different seed url) WebDriverWait(driver, 100).until(EC.visibility_of_element_located( @@ -205,7 +205,7 @@ def getInterestedLinks(): # software links.append('http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/search/subcategories?subcategory=30') # # guides - # links.append('http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/search/subcategories?subcategory=17') + links.append('http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/search/subcategories?subcategory=17') return links @@ -254,9 +254,7 @@ def crawlForum(driver): # break try: - temp = driver.find_element(by=By.CLASS_NAME, value='col-lg-12 flex-fill ml-auto text-right mb-1') - temp2 = temp.find_element(by=By.CLASS_NAME, value="next ml-1 mt-1") - link = temp2.find_element(By.TAG_NAME, value='a').get_attribute('href') + link = driver.find_element(by=By.LINK_TEXT, value='Next ›').get_attribute('href') if link == "": raise NoSuchElementException diff --git a/MarketPlaces/M00nkeyMarket/parser.py b/MarketPlaces/M00nkeyMarket/parser.py index fffdf1b..9faf795 100644 --- a/MarketPlaces/M00nkeyMarket/parser.py +++ b/MarketPlaces/M00nkeyMarket/parser.py @@ -281,10 +281,10 @@ def m00nkey_links_parser(soup): # Returning all links that should be visited by the Crawler href = [] - listing = soup.findAll('h5', {"class": "card-title rounded text-truncate"}) + listing = soup.findAll('div', {"class": "card mt-1"}) for a in listing: - bae = a.find('a', href=True)#card-title rounded text-truncate + bae = a.find('a', href=True)#card-title rounded text-truncate link = bae['href'] href.append(link)