Browse Source

Merge branch 'khoi-dev' of https://gitlab.com/dw9372422/dw_pipeline_test into khoi-dev

main
Khoi 1 year ago
parent
commit
adf046c3e4
8 changed files with 291 additions and 3 deletions
  1. +2
    -0
      Forums/Initialization/forums_mining.py
  2. +151
    -0
      Forums/Initialization/geckodriver.log
  3. +14
    -0
      Forums/OnniForums/HTML_Pages/06272023/Listing/httponnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qdonionForumHackingCrackingtutorials.html
  4. +6
    -0
      Forums/OnniForums/parser.py
  5. +2
    -2
      MarketPlaces/MikesGrandStore/parser.py
  6. +80
    -1
      MarketPlaces/ThiefWorld/parser.py
  7. +5
    -0
      setup.ini
  8. +31
    -0
      test.py

+ 2
- 0
Forums/Initialization/forums_mining.py View File

@ -112,6 +112,8 @@ if __name__ == '__main__':
elif forum == "HiddenAnswers":
crawlerHiddenAnswers()


+ 151
- 0
Forums/Initialization/geckodriver.log View File

@ -6228,6 +6228,7 @@ close()@RemoteAgent.jsm:138
1687895546413 geckodriver INFO Listening on 127.0.0.1:52237
1687895550932 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "52238" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileiOR21Q"
=======
<<<<<<< HEAD
1687896430885 geckodriver INFO Listening on 127.0.0.1:50135
1687896434527 mozrunner::runner INFO Running command: "C:\\Users\\calsyslab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" ... "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofilenQCzgp"
>>>>>>> 98de158ee6f14b35b2a5ac61427d7de36b4ee442
@ -6247,6 +6248,8 @@ JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't fin
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
=======
>>>>>>> 6485ddb42552dee0b828f020640f169235a9b5a8
<<<<<<< HEAD
DevTools listening on ws://localhost:52238/devtools/browser/ad1dc524-5cad-4983-9dd6-c7f6f3d5caee
1687895553974 Marionette INFO Listening on port 52243
@ -6345,6 +6348,7 @@ console.error: TorMonitorService:
Tor not running, not starting to monitor it.
1687300343567 RemoteAgent WARN TLS certificate errors will be ignored for this session
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
<<<<<<< HEAD
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
@ -6424,6 +6428,12 @@ console.debug: TorProtocolService:
TorProtocolService initialized
console.debug: TorMonitorService:
TorMonitorService initialized
=======
=======
1687895546413 geckodriver INFO Listening on 127.0.0.1:52237
1687895550932 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "52238" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileiOR21Q"
>>>>>>> 4856cd9f1f47ee8e0bf83456fc261e3c59c63faa
>>>>>>> 6485ddb42552dee0b828f020640f169235a9b5a8
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
@ -6655,6 +6665,7 @@ awaitPromise@resource://gre/modules/addons/XPIProvider.jsm:220:15
processFileChanges@resource://gre/modules/addons/XPIDatabase.jsm:3504:19
getNewSideloads@resource://gre/modules/addons/XPIProvider.jsm:3111:28
<<<<<<< HEAD
JavaScript error: http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
console.debug: TorMonitorService:
Control port not ready, waiting 10s.
@ -6738,15 +6749,46 @@ JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Cou
[Parent 6792, IPC I/O Parent] WARNING: DuplicateHandle failed for handle 0 in TransferHandles: file /var/tmp/build/firefox-b1c79245b9a4/ipc/chromium/src/chrome/common/ipc_channel_win.cc:723
1688166493894 geckodriver INFO Listening on 127.0.0.1:65516
1688166497807 mozrunner::runner INFO Running command: "C:\\\\Users\\\\minhkhoitran\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "65517" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\MINHKH~1\\AppData\\Local\\Temp\\rust_mozprofileHDQjHd"
=======
1687895804907 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
<<<<<<< HEAD
>>>>>>> d30c8066e307536b5e951ec07a15f08833074d5e
1687300338814 geckodriver INFO Listening on 127.0.0.1:50194
1687300342525 mozrunner::runner INFO Running command: "C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50195" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\MINHKH~1\\AppData\\Local\\Temp\\rust_mozprofileoLmKgV"
=======
>>>>>>> e327ce67264bf947eb8fe1873f1aeffac0b830d9
1687900254145 geckodriver INFO Listening on 127.0.0.1:53931
1687900258204 mozrunner::runner INFO Running command: "C:\\Users\\\\dabadcuber5\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-po ... "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\DABADC~1\\AppData\\Local\\Temp\\rust_mozprofilerbpiWR"
>>>>>>> 4856cd9f1f47ee8e0bf83456fc261e3c59c63faa
>>>>>>> 6485ddb42552dee0b828f020640f169235a9b5a8
console.info: SecurityLevel: Listening for messages from NoScript.
console.info: SecurityLevel: Initializing security-prefs.js
console.info: SecurityLevel: security-prefs.js initialization complete
console.info: TorProtocolService: SOCKS port type: TCP
console.info: TorProtocolService: SOCKS host: 127.0.0.1
console.info: TorProtocolService: SOCKS port: 9150
<<<<<<< HEAD
1688166498745 Marionette INFO Marionette enabled
1688166498759 Marionette INFO Listening on port 59088
WebDriver BiDi listening on ws://localhost:65517
=======
<<<<<<< HEAD
1687300343376 Marionette INFO Marionette enabled
1687300343386 Marionette INFO Listening on port 50202
WebDriver BiDi listening on ws://localhost:50195
Read port: 50202
=======
1687900259436 Marionette INFO Marionette enabled
1687900259455 Marionette INFO Listening on port 53942
WebDriver BiDi listening on ws://localhost:53932
Read port: 53942
>>>>>>> 4856cd9f1f47ee8e0bf83456fc261e3c59c63faa
>>>>>>> 6485ddb42552dee0b828f020640f169235a9b5a8
console.debug: TorProtocolService:
TorProtocolService initialized
Read port: 59088
@ -6758,13 +6800,39 @@ console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
<<<<<<< HEAD
1688166498941 RemoteAgent WARN TLS certificate errors will be ignored for this session
=======
<<<<<<< HEAD
console.error: "getTorFile: cannot get torrc-defaults" (new Error("torrc-defaults file not found: C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\TorBrowser\\Tor\\torrc-defaults", "resource://gre/modules/TorLauncherUtil.jsm", 245))
console.error: TorProcess:
startTor error:
Message: TypeError: torrcDefaultsFile is null
Stack:
_makeArgs@resource://gre/modules/TorProcess.jsm:296:23
start@resource://gre/modules/TorProcess.jsm:81:12
console.error: TorMonitorService:
Tor not running, not starting to monitor it.
1687300343567 RemoteAgent WARN TLS certificate errors will be ignored for this session
=======
1687900259738 RemoteAgent WARN TLS certificate errors will be ignored for this session
>>>>>>> 4856cd9f1f47ee8e0bf83456fc261e3c59c63faa
>>>>>>> 6485ddb42552dee0b828f020640f169235a9b5a8
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
<<<<<<< HEAD
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
=======
<<<<<<< HEAD
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:50195/devtools/browser/0948c23a-5b0a-4841-b1b3-ea335a196c82
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\MINHKH~1\AppData\Local\Temp\rust_mozprofileoLmKgV\thumbnails) because it does not exist
=======
>>>>>>> 6485ddb42552dee0b828f020640f169235a9b5a8
console.info: TorMonitorService: tor started
console.error: TorMonitorService:
Cannot open a control port connection
@ -6897,11 +6965,16 @@ processFileChanges@resource://gre/modules/addons/XPIDatabase.jsm:3504:19
getNewSideloads@resource://gre/modules/addons/XPIProvider.jsm:3111:28
console.log: "TorConnect: Observed TorProcessExited"
<<<<<<< HEAD
=======
>>>>>>> 4856cd9f1f47ee8e0bf83456fc261e3c59c63faa
>>>>>>> 6485ddb42552dee0b828f020640f169235a9b5a8
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
<<<<<<< HEAD
1688166519421 Marionette WARN TimedPromise timed out after 500 ms: stacktrace:
TimedPromise/<@chrome://remote/content/marionette/sync.js:234:19
TimedPromise@chrome://remote/content/marionette/sync.js:219:10
@ -7503,3 +7576,81 @@ JavaScript error: http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4q
JavaScript error: http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
=======
<<<<<<< HEAD
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
JavaScript warning: https://newassets.hcaptcha.com/c/45108af/hsw.js line 1 > Function, line 3: readPixels: Not allowed in Resist Fingerprinting Mode
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
JavaScript warning: https://newassets.hcaptcha.com/c/45108af/hsw.js line 1 > Function, line 3: readPixels: Not allowed in Resist Fingerprinting Mode
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
console.warn: LoginRecipes: "getRecipes: falling back to a synchronous message for:" "http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion"
JavaScript error: resource://gre/modules/LoginManagerParent.jsm, line 136: TypeError: gRecipeManager is null
=======
>>>>>>> 4856cd9f1f47ee8e0bf83456fc261e3c59c63faa
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
<<<<<<< HEAD
JavaScript error: moz-extension://fbd37dfe-7646-49ac-9cbb-81373e802524/nscl/content/patchWindow.js, line 294: TypeError: can't access dead object
=======
>>>>>>> 4856cd9f1f47ee8e0bf83456fc261e3c59c63faa
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
<<<<<<< HEAD
=======
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
JavaScript error: https://onniforums.com/jscripts/thread.js?ver=1827, line 188: ReferenceError: spinner is not defined
1687900372557 Marionette WARN Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
Crash Annotation GraphicsCriticalError: |[0][GFX1-]: RenderCompositorSWGL failed mapping default framebuffer, no dt (t=3393.97) [GFX1-]: RenderCompositorSWGL failed mapping default framebuffer, no dt
1687900400047 Marionette INFO Stopped listening on port 53942
JavaScript error: resource://gre/modules/AsyncShutdown.jsm, line 575: uncaught exception: SessionFileInternal.getWriter() called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PageThumbs.jsm, line 718: AbortError: IOUtils.profileBeforeChange getter: IOUtils: profileBeforeChange phase has already finished
>>>>>>> 4856cd9f1f47ee8e0bf83456fc261e3c59c63faa
>>>>>>> 6485ddb42552dee0b828f020640f169235a9b5a8

+ 14
- 0
Forums/OnniForums/HTML_Pages/06272023/Listing/httponnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qdonionForumHackingCrackingtutorials.html
File diff suppressed because it is too large
View File


+ 6
- 0
Forums/OnniForums/parser.py View File

@ -115,9 +115,15 @@ def onniForums_description_parser(soup: BeautifulSoup) -> tuple:
def onniForums_listing_parser(soup: BeautifulSoup):
<<<<<<< HEAD
boardName = "-1" # board name (the previous level of the topic in the Forum categorization tree.
# For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
forum = "OnniForums"
=======
boardName = "-1" # board name (the previous level of the topic in the Forum categorization tree.
# For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
>>>>>>> 6485ddb42552dee0b828f020640f169235a9b5a8
nm = 0 # this variable should receive the number of topics
topic : List[str] = [] # all topics
user : List[str] = [] # all users of each topic


+ 2
- 2
MarketPlaces/MikesGrandStore/parser.py View File

@ -85,11 +85,11 @@ def mikesGrandStore_description_parser(soup: BeautifulSoup) -> Tuple:
return row
def mikesGtrandStore_listing_parser(soup: BeautifulSoup) -> List:
def mikesGrandStore_listing_parser(soup: BeautifulSoup) -> List:
# Fields to be parsed
nm = 0 # Total_Products (Should be Integer)
mktName = "DarkFox" # 0 Marketplace_Name
mktName = "MikesGrandStore" # 0 Marketplace_Name
name = [] # 1 Product_Name
CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = [] # 3 Product_MS_Classification (Microsoft Security)


+ 80
- 1
MarketPlaces/ThiefWorld/parser.py View File

@ -5,7 +5,7 @@ from typing import List, Tuple
from MarketPlaces.Utilities.utilities import *
# Here, we are importing BeautifulSoup to search through the HTML tree
from bs4 import BeautifulSoup, Tag
from bs4 import BeautifulSoup, ResultSet, Tag
def thiefWorld_description_parser(soup: BeautifulSoup) -> Tuple:
@ -230,6 +230,85 @@ def darkfox_description_parser(soup):
return row
def thiefWorld_listing_parser(soup: BeautifulSoup):
# Fields to be parsed
nm = 0 # Total_Products (Should be Integer)
mktName = "ThiefWorld" # 0 Marketplace_Name
name = [] # 1 Product_Name
CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = [] # 3 Product_MS_Classification (Microsoft Security)
category = [] # 4 Product_Category
describe = [] # 5 Product_Description
escrow = [] # 6 Vendor_Warranty
views = [] # 7 Product_Number_Of_Views
reviews = [] # 8 Product_Number_Of_Reviews
addDate = [] # 9 Product_AddDate
lastSeen = [] # 10 Product_LastViewDate
BTC = [] # 11 Product_BTC_SellingPrice
USD = [] # 12 Product_USD_SellingPrice
EURO = [] # 13 Product_EURO_SellingPrice
sold = [] # 14 Product_QuantitySold
qLeft =[] # 15 Product_QuantityLeft
shipFrom = [] # 16 Product_ShippedFrom
shipTo = [] # 17 Product_ShippedTo
vendor = [] # 18 Vendor
rating = [] # 19 Vendor_Rating
success = [] # 20 Vendor_Successful_Transactions
href = [] # 23 Product_Links (Urls)
productList: ResultSet[Tag] = soup.find_all('div', {'class': 'catalog_item'})
nm = len(productList)
for product in productList:
productTitle: Tag = product.find('div', {'class': 'title'}).find('a')
productName = cleanString(productTitle.text.strip())
name.append(productName)
productHref = productTitle.get('href')
href.append(productHref)
CVE.append('-1')
MS.append('-1')
category.append('-1')
productDescription = product.find('div', {'class': 'text'}).text
productDescription = cleanString(productDescription.strip())
describe.append(productDescription)
escrow.append('-1')
views.append('-1')
reviews.append('-1')
addDate.append('-1')
lastSeen.append('-1')
BTC.append('-1')
priceText = product.find('span', {'class': 'price'}).find('span').text
priceText = priceText.split('USD')[0]
priceText = cleanString(priceText.strip())
USD.append(priceText)
EURO.append('-1')
sold.append('-1')
qLeft.append('-1')
shipFrom.append('-1')
shipTo.append('-1')
productVendor = product.find('div', {'class': 'market over'}).find('a').text
productVendor = cleanString(productVendor.strip())
vendor.append(productVendor)
rating.append('-1')
success.append('-1')
return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen,
BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href)
#parses listing pages, so takes html pages of listing pages using soup object, and parses it for info it needs
#stores info it needs in different lists, these lists are returned after being organized
#@param: soup object looking at html page of listing page


+ 5
- 0
setup.ini View File

@ -5,8 +5,13 @@ firefox_profile_path = C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\T
geckodriver_path = C:\\NSF-REU\\dw_pipeline_test\\selenium\\geckodriver.exe
[Project]
<<<<<<< HEAD
project_directory = C:\\NSF-REU\\dw_pipeline_test\
shared_folder = \\VBoxSvr\Shared
=======
project_directory = C:\\NSF-REU\\dw_pipeline_test
shared_folder = \\VBoxSvr\\VM_Files_(shared)
>>>>>>> 6485ddb42552dee0b828f020640f169235a9b5a8
[PostgreSQL]
ip = localhost


+ 31
- 0
test.py View File

@ -0,0 +1,31 @@
from dataclasses import dataclass
import datetime
from typing import Iterable, List
@dataclass
class ForumPost:
userName: str = '-1'
status: str = '-1'
reputation: str = '-1'
interest: str = '-1'
sign: str = '-1'
post: str = '-1'
feedback: str = '-1'
datePosted: datetime = datetime.datetime(1970,1,1)
@dataclass
class ForumTopic:
topicId: str
topicName: str
href: str
postList: List[ForumPost]
@dataclass
class ForumListing:
boardName: str
topicsNum: int
topicList: List[ForumTopic]

Loading…
Cancel
Save