From 7493fb30bb482144564a14b0718e47ae811f9cd7 Mon Sep 17 00:00:00 2001 From: westernmeadow Date: Thu, 29 Jun 2023 12:38:01 -0700 Subject: [PATCH] save html to shared folder examples --- .idea/.gitignore | 1 + .idea/DW_Pipeline_Test.iml | 4 +- .idea/misc.xml | 2 +- Forums/CryptBB/crawler_selenium.py | 25 +- Forums/DB_Connection/db_connection.py | 4 +- Forums/Initialization/forums_mining.py | 7 +- Forums/Initialization/geckodriver.log | 337 ++---------------- Forums/Initialization/prepare_parser.py | 25 +- MarketPlaces/DB_Connection/db_connection.py | 4 +- MarketPlaces/Initialization/marketsList.txt | 2 +- MarketPlaces/Initialization/markets_mining.py | 8 +- MarketPlaces/Initialization/prepare_parser.py | 8 +- MarketPlaces/ThiefWorld/crawler_selenium.py | 38 +- setup.ini | 13 +- 14 files changed, 97 insertions(+), 381 deletions(-) diff --git a/.idea/.gitignore b/.idea/.gitignore index 7b08725..1b8473e 100644 --- a/.idea/.gitignore +++ b/.idea/.gitignore @@ -2,6 +2,7 @@ /shelf/ /workspace.xml /selenium/geckodriver.exe +setup.ini *.html *.log *.png diff --git a/.idea/DW_Pipeline_Test.iml b/.idea/DW_Pipeline_Test.iml index f1b317c..b4bb6d5 100644 --- a/.idea/DW_Pipeline_Test.iml +++ b/.idea/DW_Pipeline_Test.iml @@ -2,7 +2,7 @@ - + @@ -12,8 +12,6 @@ diff --git a/.idea/misc.xml b/.idea/misc.xml index a971a2c..11f1ea0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/Forums/CryptBB/crawler_selenium.py b/Forums/CryptBB/crawler_selenium.py index 5065999..58072b5 100644 --- a/Forums/CryptBB/crawler_selenium.py +++ b/Forums/CryptBB/crawler_selenium.py @@ -17,24 +17,21 @@ from PIL import Image import urllib.parse as urlparse import os, re, time import subprocess -import configparser from bs4 import BeautifulSoup from Forums.Initialization.prepare_parser import new_parse from Forums.CryptBB.parser import cryptBB_links_parser from Forums.Utilities.utilities import cleanHTML -config = configparser.ConfigParser() -config.read('../../setup.ini') counter = 1 baseURL = 'http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/' # Opens Tor Browser, crawls the website def startCrawling(): - forumName = getForumName() # opentor() + forumName = getForumName() # driver = getAccess() - + # # if driver != 'down': # try: # login(driver) @@ -48,6 +45,8 @@ def startCrawling(): # Opens Tor Browser def opentor(): + from Forums.Initialization.forums_mining import config + global pid print("Connecting Tor...") pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) @@ -133,6 +132,8 @@ def closetor(driver): # Creates FireFox 'driver' and configure its 'Profile' # to use Tor proxy and socket def createFFDriver(): + from Forums.Initialization.forums_mining import config + ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) @@ -186,12 +187,14 @@ def savePage(page, url): # Gets the full path of the page to be saved along with its appropriate file name def getFullPathName(url): - from Forums.Initialization.forums_mining import CURRENT_DATE + from Forums.Initialization.forums_mining import config, CURRENT_DATE + + mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages") fileName = getNameFromURL(url) if isDescriptionLink(url): - fullPath = r'..\\CryptBB\\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html' + fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html') else: - fullPath = r'..\\CryptBB\\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html' + fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html') return fullPath @@ -208,10 +211,10 @@ def getNameFromURL(url): def getInterestedLinks(): links = [] - # Beginner Programming + # # Beginner Programming links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86') # # Beginner Carding and Fraud - links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=91') + # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=91') # # Beginner Hacking # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=87') # # Newbie @@ -287,9 +290,9 @@ def crawlForum(driver): if item == "": raise NoSuchElementException - has_next_topic_page = False else: counter += 1 + except NoSuchElementException: has_next_topic_page = False diff --git a/Forums/DB_Connection/db_connection.py b/Forums/DB_Connection/db_connection.py index eeaf69b..340abfc 100644 --- a/Forums/DB_Connection/db_connection.py +++ b/Forums/DB_Connection/db_connection.py @@ -9,8 +9,8 @@ def connectDataBase(): try: - config = configparser.ConfigParser() - config.read('../../setup.ini') + from Forums.Initialization.forums_mining import config + ip = config.get('PostgreSQL', 'ip') username = config.get('PostgreSQL', 'username') password = config.get('PostgreSQL', 'password') diff --git a/Forums/Initialization/forums_mining.py b/Forums/Initialization/forums_mining.py index 3086ca9..8876190 100644 --- a/Forums/Initialization/forums_mining.py +++ b/Forums/Initialization/forums_mining.py @@ -13,9 +13,11 @@ from Forums.AbyssForum.crawler_selenium import crawler as crawlerAbyssForum from Forums.Altenens.crawler_selenium import crawler as crawlerAltenensForum from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers - +import configparser import time +config = configparser.ConfigParser() +config.read('../../setup.ini') CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year) @@ -34,7 +36,8 @@ def createDirectory(forum): if forum == 'Reddits': pagesMainDir = '../' + forum else: - pagesMainDir = '../' + forum + "/HTML_Pages" + # pagesMainDir = '../' + forum + "/HTML_Pages" + pagesMainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages") if not os.path.isdir(pagesMainDir): os.makedirs(pagesMainDir) diff --git a/Forums/Initialization/geckodriver.log b/Forums/Initialization/geckodriver.log index fad9864..aecb607 100644 --- a/Forums/Initialization/geckodriver.log +++ b/Forums/Initialization/geckodriver.log @@ -5841,8 +5841,8 @@ unwatchForTargets()@TargetList.jsm:37 destructor()@TargetList.jsm:109 stop()@CDP.jsm:104 close()@RemoteAgent.jsm:138 -1687895546413 geckodriver INFO Listening on 127.0.0.1:52237 -1687895550932 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "52238" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileiOR21Q" +1687896430885 geckodriver INFO Listening on 127.0.0.1:50135 +1687896434527 mozrunner::runner INFO Running command: "C:\\Users\\calsyslab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" ... "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofilenQCzgp" console.log: "TorSettings: loadFromPrefs()" console.log: "TorConnect: init()" console.log: "TorConnect: Entering Initial state" @@ -5851,7 +5851,7 @@ console.log: "TorConnect: Observing topic 'TorProcessExited'" console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" console.log: "TorConnect: Observing topic 'torsettings:ready'" console.log: "TorSettings: Observed profile-after-change" -1687895551675 Marionette INFO Marionette enabled +1687896435185 Marionette INFO Marionette enabled console.log: "TorConnect: Will load after bootstrap => [about:blank]" console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. @@ -5859,240 +5859,30 @@ JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't fin JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined -DevTools listening on ws://localhost:52238/devtools/browser/ad1dc524-5cad-4983-9dd6-c7f6f3d5caee -1687895553974 Marionette INFO Listening on port 52243 -1687895554561 RemoteAgent WARN TLS certificate errors will be ignored for this session -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileiOR21Q\thumbnails) because it does not exist -1687895804567 Marionette INFO Stopped listening on port 52243 -JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] -!!! error running onStopped callback: TypeError: callback is not a function -JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileiOR21Q\thumbnails) because it does not exist - -###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost - -1687895804907 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 -unwatchForTabs()@TargetList.jsm:70 -unwatchForTargets()@TargetList.jsm:37 -destructor()@TargetList.jsm:109 -stop()@CDP.jsm:104 -close()@RemoteAgent.jsm:138 -1687977218822 geckodriver INFO Listening on 127.0.0.1:51022 -1687977226564 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51023" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileikuU2J" -console.log: "TorSettings: loadFromPrefs()" -console.log: "TorConnect: init()" -console.log: "TorConnect: Entering Initial state" -console.log: "TorConnect: Observed profile-after-change" -console.log: "TorConnect: Observing topic 'TorProcessExited'" -console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" -console.log: "TorConnect: Observing topic 'torsettings:ready'" -console.log: "TorSettings: Observed profile-after-change" -1687977228948 Marionette INFO Marionette enabled -console.log: "TorConnect: Will load after bootstrap => [about:blank]" -console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined -DevTools listening on ws://localhost:51023/devtools/browser/3b0200ed-8dcd-4975-a337-55ca97127f81 -1687977234067 Marionette INFO Listening on port 51028 -1687977234672 RemoteAgent WARN TLS certificate errors will be ignored for this session -1687977449724 Marionette INFO Stopped listening on port 51028 -JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] -!!! error running onStopped callback: TypeError: callback is not a function -JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileikuU2J\thumbnails) because it does not exist - -###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost - -1687977450647 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 -unwatchForTabs()@TargetList.jsm:70 -unwatchForTargets()@TargetList.jsm:37 -destructor()@TargetList.jsm:109 -stop()@CDP.jsm:104 -close()@RemoteAgent.jsm:138 -1687977513313 geckodriver INFO Listening on 127.0.0.1:51084 -1687977521019 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51085" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileWUrtuT" -console.log: "TorSettings: loadFromPrefs()" -console.log: "TorConnect: init()" -console.log: "TorConnect: Entering Initial state" -console.log: "TorConnect: Observed profile-after-change" -console.log: "TorConnect: Observing topic 'TorProcessExited'" -console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" -console.log: "TorConnect: Observing topic 'torsettings:ready'" -console.log: "TorSettings: Observed profile-after-change" -1687977523015 Marionette INFO Marionette enabled -console.log: "TorConnect: Will load after bootstrap => [about:blank]" -console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined -DevTools listening on ws://localhost:51085/devtools/browser/64d878ac-9491-4b68-8378-3cdcd42b86f9 -1687977528316 Marionette INFO Listening on port 51090 -1687977529126 RemoteAgent WARN TLS certificate errors will be ignored for this session -1687978083314 Marionette INFO Stopped listening on port 51090 -JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] -!!! error running onStopped callback: TypeError: callback is not a function -JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileWUrtuT\thumbnails) because it does not exist - -###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost - -1687978083874 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 -unwatchForTabs()@TargetList.jsm:70 -unwatchForTargets()@TargetList.jsm:37 -destructor()@TargetList.jsm:109 -stop()@CDP.jsm:104 -close()@RemoteAgent.jsm:138 -1687978133464 geckodriver INFO Listening on 127.0.0.1:51172 -1687978141034 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51173" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileu5IdWT" -console.log: "TorSettings: loadFromPrefs()" -console.log: "TorConnect: init()" -console.log: "TorConnect: Entering Initial state" -console.log: "TorConnect: Observed profile-after-change" -console.log: "TorConnect: Observing topic 'TorProcessExited'" -console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" -console.log: "TorConnect: Observing topic 'torsettings:ready'" -console.log: "TorSettings: Observed profile-after-change" -1687978143085 Marionette INFO Marionette enabled -console.log: "TorConnect: Will load after bootstrap => [about:blank]" -console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined -DevTools listening on ws://localhost:51173/devtools/browser/92c771f3-77dc-4ad5-9787-19e461c45ad6 -1687978148067 Marionette INFO Listening on port 51178 -1687978148324 RemoteAgent WARN TLS certificate errors will be ignored for this session - -###!!! [Child][MessageChannel] Error: (msgtype=0x390097,name=PContent::Msg_InitBackground) Channel closing: too late to send/recv, messages will be lost - -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileu5IdWT\thumbnails) because it does not exist -1687984051859 Marionette INFO Stopped listening on port 51178 -JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] -!!! error running onStopped callback: TypeError: callback is not a function -JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileu5IdWT\thumbnails) because it does not exist -[Parent 6808, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167 - -###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost - -1687984052405 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 -unwatchForTabs()@TargetList.jsm:70 -unwatchForTargets()@TargetList.jsm:37 -destructor()@TargetList.jsm:109 -stop()@CDP.jsm:104 -close()@RemoteAgent.jsm:138 -O Listening on port 51239 -1687978539391 RemoteAgent WARN TLS certificate errors will be ignored for this session -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileodTbYM\thumbnails) because it does not exist -1687984050773 Marionette INFO Stopped listening on port 51239 -JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] -!!! error running onStopped callback: TypeError: callback is not a function -JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileodTbYM\thumbnails) because it does not exist -[Parent 2612, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167 -1687984051727 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 -unwatchForTabs()@TargetList.jsm:70 -unwatchForTargets()@TargetList.jsm:37 -destructor()@TargetList.jsm:109 -stop()@CDP.jsm:104 -close()@RemoteAgent.jsm:138 -s://localhost:51280/devtools/browser/d4d6f9cc-7d5f-45e3-8873-a460f62cc4cf -1687978926427 Marionette INFO Listening on port 51285 -1687978926534 RemoteAgent WARN TLS certificate errors will be ignored for this session -1687979030758 Marionette INFO Stopped listening on port 51285 -JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] -!!! error running onStopped callback: TypeError: callback is not a function -JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileanrFrL\thumbnails) because it does not exist -1687979031575 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 -unwatchForTabs()@TargetList.jsm:70 -unwatchForTargets()@TargetList.jsm:37 -destructor()@TargetList.jsm:109 -stop()@CDP.jsm:104 -close()@RemoteAgent.jsm:138 -1687979050690 geckodriver INFO Listening on 127.0.0.1:51360 -1687979053723 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51361" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofile0hAG1R" -console.log: "TorSettings: loadFromPrefs()" -console.log: "TorConnect: init()" -console.log: "TorConnect: Entering Initial state" -console.log: "TorConnect: Observed profile-after-change" -console.log: "TorConnect: Observing topic 'TorProcessExited'" -console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" -console.log: "TorConnect: Observing topic 'torsettings:ready'" -console.log: "TorSettings: Observed profile-after-change" -1687979054534 Marionette INFO Marionette enabled -console.log: "TorConnect: Will load after bootstrap => [about:blank]" -console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined -DevTools listening on ws://localhost:51361/devtools/browser/92278a26-d591-4e02-9b50-6d94f582bba6 -1687979056856 Marionette INFO Listening on port 51366 -1687979057092 RemoteAgent WARN TLS certificate errors will be ignored for this session -1687979258295 Marionette INFO Stopped listening on port 51366 +DevTools listening on ws://localhost:50136/devtools/browser/773adaec-44e1-4b13-9fac-c38bfb170221 +1687896436579 Marionette INFO Listening on port 50142 +1687896436612 RemoteAgent WARN TLS certificate errors will be ignored for this session +JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\calsyslab\AppData\Local\Temp\rust_mozprofilenQCzgp\thumbnails) because it does not exist +JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/, line 2: ReferenceError: $ is not defined +1687896481968 Marionette INFO Stopped listening on port 50142 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] !!! error running onStopped callback: TypeError: callback is not a function JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofile0hAG1R\thumbnails) because it does not exist +JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\calsyslab\AppData\Local\Temp\rust_mozprofilenQCzgp\thumbnails) because it does not exist -###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost +###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost -1687979258801 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 -unwatchForTabs()@TargetList.jsm:70 -unwatchForTargets()@TargetList.jsm:37 -destructor()@TargetList.jsm:109 -stop()@CDP.jsm:104 -close()@RemoteAgent.jsm:138 -1687979267242 geckodriver INFO Listening on 127.0.0.1:51432 -1687979271790 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51433" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofilexKgOT4" -console.log: "TorSettings: loadFromPrefs()" -console.log: "TorConnect: init()" -console.log: "TorConnect: Entering Initial state" -console.log: "TorConnect: Observed profile-after-change" -console.log: "TorConnect: Observing topic 'TorProcessExited'" -console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" -console.log: "TorConnect: Observing topic 'torsettings:ready'" -console.log: "TorSettings: Observed profile-after-change" -1687979272999 Marionette INFO Marionette enabled -console.log: "TorConnect: Will load after bootstrap => [about:blank]" -console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined -DevTools listening on ws://localhost:51433/devtools/browser/cda6fecb-bd37-4670-968b-8a378fded89f -1687979276192 Marionette INFO Listening on port 51444 -1687979276461 RemoteAgent WARN TLS certificate errors will be ignored for this session -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -1687979332888 Marionette INFO Stopped listening on port 51444 -JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] -JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData] -!!! error running onStopped callback: TypeError: callback is not a function -JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofilexKgOT4\thumbnails) because it does not exist -[Parent 4980, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167 ###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost -1687979333650 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 +1687896482482 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 unwatchForTabs()@TargetList.jsm:70 unwatchForTargets()@TargetList.jsm:37 destructor()@TargetList.jsm:109 stop()@CDP.jsm:104 close()@RemoteAgent.jsm:138 -1687979430724 geckodriver INFO Listening on 127.0.0.1:51502 -1687979436324 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51503" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofilegHC201" +1687897304511 geckodriver INFO Listening on 127.0.0.1:50201 +1687897308111 mozrunner::runner INFO Running command: "C:\\Users\\calsyslab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" ... "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile2TNTj7" console.log: "TorSettings: loadFromPrefs()" console.log: "TorConnect: init()" console.log: "TorConnect: Entering Initial state" @@ -6101,7 +5891,7 @@ console.log: "TorConnect: Observing topic 'TorProcessExited'" console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" console.log: "TorConnect: Observing topic 'torsettings:ready'" console.log: "TorSettings: Observed profile-after-change" -1687979437856 Marionette INFO Marionette enabled +1687897308686 Marionette INFO Marionette enabled console.log: "TorConnect: Will load after bootstrap => [about:blank]" console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. @@ -6109,101 +5899,16 @@ JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't fin JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined -DevTools listening on ws://localhost:51503/devtools/browser/103a6f45-7bf6-46d2-8040-cefffb477152 -1687979442204 Marionette INFO Listening on port 51508 -1687979442652 RemoteAgent WARN TLS certificate errors will be ignored for this session -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofilegHC201\thumbnails) because it does not exist -1687984048079 Marionette INFO Stopped listening on port 51508 -JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] -!!! error running onStopped callback: TypeError: callback is not a function -JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofilegHC201\thumbnails) because it does not exist - -###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost - -1687984048659 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 -unwatchForTabs()@TargetList.jsm:70 -unwatchForTargets()@TargetList.jsm:37 -destructor()@TargetList.jsm:109 -stop()@CDP.jsm:104 -close()@RemoteAgent.jsm:138 -vaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined -DevTools listening on ws://localhost:51622/devtools/browser/8b6f89c5-5489-4aa7-84ae-816a519ac6d2 -1687983200540 Marionette INFO Listening on port 51627 -1687983200642 RemoteAgent WARN TLS certificate errors will be ignored for this session -1687984043915 Marionette INFO Stopped listening on port 51627 -JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] -!!! error running onStopped callback: TypeError: callback is not a function -JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofiletQSuzW\thumbnails) because it does not exist -[Parent 1532, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167 -1687984044451 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 -unwatchForTabs()@TargetList.jsm:70 -unwatchForTargets()@TargetList.jsm:37 -destructor()@TargetList.jsm:109 -stop()@CDP.jsm:104 -close()@RemoteAgent.jsm:138 -1687989865551 geckodriver INFO Listening on 127.0.0.1:49687 -1687989870785 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "49688" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileNUIghb" -console.log: "TorSettings: loadFromPrefs()" -console.log: "TorConnect: init()" -console.log: "TorConnect: Entering Initial state" -console.log: "TorConnect: Observed profile-after-change" -console.log: "TorConnect: Observing topic 'TorProcessExited'" -console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" -console.log: "TorConnect: Observing topic 'torsettings:ready'" -console.log: "TorSettings: Observed profile-after-change" -1687989872437 Marionette INFO Marionette enabled -console.log: "TorConnect: Will load after bootstrap => [about:blank]" -console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined -DevTools listening on ws://localhost:49688/devtools/browser/05e0b61d-92e1-4c2b-ac81-164fc698ee43 -1687989876314 Marionette INFO Listening on port 49693 -1687989876583 RemoteAgent WARN TLS certificate errors will be ignored for this session -1687989882290 Marionette INFO Stopped listening on port 49693 -JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] -!!! error running onStopped callback: TypeError: callback is not a function -JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. -JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileNUIghb\thumbnails) because it does not exist -1687989883656 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 -unwatchForTabs()@TargetList.jsm:70 -unwatchForTargets()@TargetList.jsm:37 -destructor()@TargetList.jsm:109 -stop()@CDP.jsm:104 -close()@RemoteAgent.jsm:138 -1687989967990 geckodriver INFO Listening on 127.0.0.1:53543 -1687989972970 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "53544" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofile50PiiS" -console.log: "TorSettings: loadFromPrefs()" -console.log: "TorConnect: init()" -console.log: "TorConnect: Entering Initial state" -console.log: "TorConnect: Observed profile-after-change" -console.log: "TorConnect: Observing topic 'TorProcessExited'" -console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" -console.log: "TorConnect: Observing topic 'torsettings:ready'" -console.log: "TorSettings: Observed profile-after-change" -1687989974728 Marionette INFO Marionette enabled -console.log: "TorConnect: Will load after bootstrap => [about:blank]" -console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid" -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. -JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined -DevTools listening on ws://localhost:53544/devtools/browser/574837da-6642-43f8-a689-8dbe14b1e254 -1687989978232 Marionette INFO Listening on port 53549 -1687989978914 RemoteAgent WARN TLS certificate errors will be ignored for this session -1687990165288 Marionette INFO Stopped listening on port 53549 +DevTools listening on ws://localhost:50202/devtools/browser/c30256b0-c71f-40da-a95f-bb1313b3e35e +1687897310328 Marionette INFO Listening on port 50208 +1687897310788 RemoteAgent WARN TLS certificate errors will be ignored for this session +JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/, line 2: ReferenceError: $ is not defined +1687897315273 Marionette INFO Stopped listening on port 50208 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] !!! error running onStopped callback: TypeError: callback is not a function JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. JavaScript error: resource://gre/modules/PageThumbs.jsm, line 709: AbortError: IOUtils.profileBeforeChange getter: IOUtils: profileBeforeChange phase has already finished -[Parent 8704, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167 -1687990165952 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 +1687897315776 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64 unwatchForTabs()@TargetList.jsm:70 unwatchForTargets()@TargetList.jsm:37 destructor()@TargetList.jsm:109 diff --git a/Forums/Initialization/prepare_parser.py b/Forums/Initialization/prepare_parser.py index 7b268a1..a57e15d 100644 --- a/Forums/Initialization/prepare_parser.py +++ b/Forums/Initialization/prepare_parser.py @@ -2,12 +2,11 @@ __author__ = 'DarkWeb' import codecs import glob -import os +import os, re import shutil from Forums.DB_Connection.db_connection import * from Forums.BestCardingWorld.parser import * from Forums.CryptBB.parser import * -import re from Forums.Classifier.classify_product import predict # from DarkWebMining_Sample.Forums.Classifier.classify_product import predict_semi @@ -80,7 +79,7 @@ def persist_data(url, row, cur): #calls the different parser methods here depending on the type of html page def new_parse(forum, url, createLog): - from Forums.Initialization.forums_mining import CURRENT_DATE + from Forums.Initialization.forums_mining import config, CURRENT_DATE print("Parsing The " + forum + " Forum and conduct data classification to store the information in the database.") @@ -110,12 +109,14 @@ def new_parse(forum, url, createLog): " in the _Logs folder to read files from this Forum of this date again.") raise SystemExit - # Reading the Listing Html Pages -> to memory - for fileListing in glob.glob(os.path.join("..\\" + forum + "\\HTML_Pages\\" + CURRENT_DATE + "\\Listing", '*.html')): + mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages") + + # Reading the Listing Html Pages + for fileListing in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing", '*.html')): lines.append(fileListing) - # Reading the Description Html Pages -> to memory - for fileDescription in glob.glob(os.path.join("..\\" + forum + "\\HTML_Pages\\" + CURRENT_DATE + "\\Description" ,'*.html')): + # Reading the Description Html Pages + for fileDescription in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", '*.html')): lns.append(fileDescription) # Parsing the Description Pages and put the tag's content into a dictionary (Hash table) @@ -124,8 +125,8 @@ def new_parse(forum, url, createLog): print("Reading description folder of '" + forum + "', file '" + os.path.basename(line2) + "', index= " + str(index + 1) + " ... " + str(len(lns))) try: - html = codecs.open(line2.strip('\n'), encoding='utf8')#trying t open them in utf8 format - soup = BeautifulSoup(html, "html.parser")#throw into beautiful soup + html = codecs.open(line2.strip('\n'), encoding='utf8') + soup = BeautifulSoup(html, "html.parser") html.close() except: @@ -142,16 +143,14 @@ def new_parse(forum, url, createLog): continue try: - #Where actual parsing occurs + if forum == "BestCardingWorld": rmm = bestcardingworld_description_parser(soup) elif forum == "CryptBB": rmm = cryptBB_description_parser(soup) # key = u"Top:" + rmm[0].upper().strip() + u" User:" + rmm[2][0].upper().strip() - #essentially filename and url - key = u"Url:" + os.path.basename(line2).replace(".html", "")#should end with either no(page+num) or no page+num - + key = u"Url:" + os.path.basename(line2).replace(".html", "") # check if page or page exists at the end of a string followed by a series of numbers #if yes add to other if no add to first page dictionary diff --git a/MarketPlaces/DB_Connection/db_connection.py b/MarketPlaces/DB_Connection/db_connection.py index 97296e3..a1b27ff 100644 --- a/MarketPlaces/DB_Connection/db_connection.py +++ b/MarketPlaces/DB_Connection/db_connection.py @@ -9,8 +9,8 @@ def connectDataBase(): try: - config = configparser.ConfigParser() - config.read('../../setup.ini') + from MarketPlaces.Initialization.markets_mining import config + ip = config.get('PostgreSQL', 'ip') username = config.get('PostgreSQL', 'username') password = config.get('PostgreSQL', 'password') diff --git a/MarketPlaces/Initialization/marketsList.txt b/MarketPlaces/Initialization/marketsList.txt index 3a2f6e1..87f811c 100644 --- a/MarketPlaces/Initialization/marketsList.txt +++ b/MarketPlaces/Initialization/marketsList.txt @@ -1 +1 @@ -DarkMatter \ No newline at end of file +ThiefWorld \ No newline at end of file diff --git a/MarketPlaces/Initialization/markets_mining.py b/MarketPlaces/Initialization/markets_mining.py index c097411..aad5f63 100644 --- a/MarketPlaces/Initialization/markets_mining.py +++ b/MarketPlaces/Initialization/markets_mining.py @@ -21,10 +21,11 @@ from MarketPlaces.CityMarket.crawler_selenium import crawler as crawlerCityMarke from MarketPlaces.DarkMatter.crawler_selenium import crawler as crawlerDarkMatter from MarketPlaces.M00nkeyMarket.crawler_selenium import crawler as crawlerM00nkeyMarket - - +import configparser import time +config = configparser.ConfigParser() +config.read('../../setup.ini') CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year) @@ -40,7 +41,8 @@ def getMarkets(): def createDirectory(mkt): # Package should already be there, holding crawler and parser - pagesDir = '../' + mkt + '/HTML_Pages' + # pagesDir = '../' + mkt + '/HTML_Pages' + pagesDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + mkt + "/HTML_Pages") if not os.path.isdir(pagesDir): os.makedirs(pagesDir) diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py index de13899..accd697 100644 --- a/MarketPlaces/Initialization/prepare_parser.py +++ b/MarketPlaces/Initialization/prepare_parser.py @@ -71,7 +71,7 @@ def persist_data(url, row, cur): def new_parse(marketPlace, url, createLog): - from MarketPlaces.Initialization.markets_mining import CURRENT_DATE + from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE print("Parsing the " + marketPlace + " marketplace and conduct data classification to store the information in the database.") @@ -100,12 +100,14 @@ def new_parse(marketPlace, url, createLog): " in the _Logs folder to read files from this Market Place of this date again.") raise SystemExit + mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + marketPlace + "/HTML_Pages") + # Reading the Listing Html Pages - for fileListing in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\" + CURRENT_DATE + "\\Listing", '*.html')): + for fileListing in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing", '*.html')): lines.append(fileListing) # Reading the Description Html Pages - for fileDescription in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\" + CURRENT_DATE + "\\Description", '*.html')): + for fileDescription in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", '*.html')): lns.append(fileDescription) # Parsing the Description Pages and put the tag's content into a dictionary (Hash table) diff --git a/MarketPlaces/ThiefWorld/crawler_selenium.py b/MarketPlaces/ThiefWorld/crawler_selenium.py index a8293bb..5478763 100644 --- a/MarketPlaces/ThiefWorld/crawler_selenium.py +++ b/MarketPlaces/ThiefWorld/crawler_selenium.py @@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse from MarketPlaces.ThiefWorld.parser import thiefworld_links_parser from MarketPlaces.Utilities.utilities import cleanHTML -config = configparser.ConfigParser() -config.read('../../setup.ini') counter = 1 baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/' @@ -33,24 +31,26 @@ baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - opentor() - # mktName = getMKTName() - driver = getAccess() + # opentor() + mktName = getMKTName() + # driver = getAccess() + # + # if driver != 'down': + # try: + # login(driver) + # crawlForum(driver) + # except Exception as e: + # print(driver.current_url, e) + # closetor(driver) - if driver != 'down': - try: - login(driver) - crawlForum(driver) - except Exception as e: - print(driver.current_url, e) - closetor(driver) - - # new_parse(forumName, baseURL, False) + new_parse(mktName, baseURL, False) # Opens Tor Browser #prompts for ENTER input to continue def opentor(): + from MarketPlaces.Initialization.markets_mining import config + global pid print("Connecting Tor...") pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) @@ -89,6 +89,8 @@ def closetor(driver): # Creates FireFox 'driver' and configure its 'Profile' # to use Tor proxy and socket def createFFDriver(): + from MarketPlaces.Initialization.markets_mining import config + ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) @@ -161,12 +163,14 @@ def savePage(page, url): # Gets the full path of the page to be saved along with its appropriate file name #@param: raw url as crawler crawls through every site def getFullPathName(url): - from MarketPlaces.Initialization.markets_mining import CURRENT_DATE + from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE + + mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages") fileName = getNameFromURL(url) if isDescriptionLink(url): - fullPath = r'..\ThiefWorld\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html' + fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html') else: - fullPath = r'..\ThiefWorld\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html' + fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html') return fullPath diff --git a/setup.ini b/setup.ini index 6b3a07e..fd4b09a 100644 --- a/setup.ini +++ b/setup.ini @@ -1,15 +1,14 @@ - [TOR] -firefox_binary_path = C:\Users\\dabadcuber5\Desktop\Tor Browser\Browser\firefox.exe -firefox_profile_path = C:\Users\\dabadcuber5\Desktop\Tor Browser\Browser\TorBrowser\Data\Browser\profile.default -geckodriver_path = C:\Users\\dabadcuber5\\PycharmProjects\dw_pipeline_test\selenium\geckodriver.exe +firefox_binary_path = C:\Users\calsyslab\Desktop\Tor Browser\Browser\firefox.exe +firefox_profile_path = C:\Users\calsyslab\Desktop\Tor Browser\Browser\TorBrowser\Data\Browser\profile.default +geckodriver_path = C:\Users\calsyslab\Projects\dw_pipeline_test\selenium\geckodriver.exe [Project] -project_directory = C:\Users\dabadcuber5\\PycharmProjects\dw_pipeline_test -shared_folder = \\VBoxSvr\VM_Files_(shared) +project_directory = C:\Users\calsyslab\Projects\dw_pipeline_test +shared_folder = \\VBoxSvr\Shared [PostgreSQL] ip = localhost username = postgres -password = Ilovelucky1! +password = password database = darkweb_markets_forums \ No newline at end of file