Browse Source

preparing test project

main
westernmeadow 1 year ago
parent
commit
300241f247
25 changed files with 43 additions and 3111 deletions
  1. +8
    -9
      Forums/BestCardingWorld/crawler_selenium.py
  2. +10
    -10
      Forums/CryptBB/crawler_selenium.py
  3. +0
    -0
      Forums/DWForums/__init__.py
  4. BIN
      Forums/DWForums/captcha.png
  5. +0
    -299
      Forums/DWForums/crawler_selenium.py
  6. +0
    -462
      Forums/DWForums/geckodriver.log
  7. +0
    -330
      Forums/DWForums/parser.py
  8. +0
    -0
      Forums/Dread/__init__.py
  9. BIN
      Forums/Dread/captcha.png
  10. +0
    -299
      Forums/Dread/crawler_selenium.py
  11. +0
    -462
      Forums/Dread/geckodriver.log
  12. +0
    -357
      Forums/Dread/parser.py
  13. BIN
      Forums/Helium/captcha.png
  14. +0
    -327
      Forums/Helium/crawler_selenium.py
  15. +0
    -248
      Forums/Helium/parser.py
  16. BIN
      Forums/Initialization/desc_transformer.pickle
  17. +0
    -3
      Forums/Initialization/forumsList.txt
  18. +6
    -18
      Forums/Initialization/forums_mining.py
  19. +0
    -20
      Forums/Initialization/prepare_parser.py
  20. +7
    -8
      MarketPlaces/DarkFox/crawler_selenium.py
  21. +0
    -192
      MarketPlaces/DarkFox/crawler_seleniumtest.py
  22. +0
    -6
      MarketPlaces/Initialization/marketsList.txt
  23. +6
    -25
      MarketPlaces/Initialization/markets_mining.py
  24. +0
    -30
      MarketPlaces/Initialization/prepare_parser.py
  25. +6
    -6
      MarketPlaces/Tor2door/crawler_selenium.py

+ 8
- 9
Forums/BestCardingWorld/crawler_selenium.py View File

@ -32,11 +32,13 @@ def startCrawling():
# driver = getAccess()
# if driver != 'down':
# crawlForum(driver)
# new_parse(forumName, False)
new_parse(forumName, False)
# try:
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
# closetor(driver)
new_parse(forumName, False)
# Opens Tor Browser
@ -120,14 +122,11 @@ def createFFDriver():
def getAccess():
url = getFixedURL()
driver = createFFDriver()
try:
driver.get(url)
return driver
except:
driver.close()
return 'down'
@ -251,7 +250,7 @@ def crawlForum(driver):
has_next_page = False
except Exception as e:
print(link, e.message)
print(link, e)
i += 1
# finalTime = time.time()


+ 10
- 10
Forums/CryptBB/crawler_selenium.py View File

@ -34,8 +34,11 @@ def startCrawling():
driver = getAccess()
if driver != 'down':
login(driver)
crawlForum(driver)
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# new_parse(forumName, False)
@ -116,9 +119,9 @@ def getFixedURL():
# Closes Tor Browser
def closetor(driver):
global pid
# global pid
# os.system("taskkill /pid " + str(pro.pid))
os.system("taskkill /t /f /im tor.exe")
# os.system("taskkill /t /f /im tor.exe")
print('Closing Tor...')
driver.close()
time.sleep(3)
@ -165,14 +168,11 @@ def createFFDriver():
def getAccess():
url = getFixedURL()
driver = createFFDriver()
try:
driver.get(url)
return driver
except:
driver.close()
return 'down'
@ -278,7 +278,7 @@ def crawlForum(driver):
try:
temp = driver.find_element(by=By.XPATH, value=
'/html/body/div/div[2]/div/div[2]/div')
link = temp.find_element_by_class_name('pagination_next').get_attribute('href')
link = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')
if link == "":
raise NoSuchElementException
@ -294,7 +294,7 @@ def crawlForum(driver):
has_next_page = False
except Exception as e:
print(link, e.message)
print(link, e)
i += 1
# finalTime = time.time()


+ 0
- 0
Forums/DWForums/__init__.py View File


BIN
Forums/DWForums/captcha.png View File

Before After
Width: 200  |  Height: 60  |  Size: 16 KiB

+ 0
- 299
Forums/DWForums/crawler_selenium.py View File

@ -1,299 +0,0 @@
__author__ = 'DarkWeb'
'''
DWForums Forum Crawler (Selenium)
'''
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import urllib.parse as urlparse
import os, time
from datetime import date
import subprocess
from bs4 import BeautifulSoup
from Forums.Initialization.prepare_parser import new_parse
from Forums.DWForums.parser import dwForums_links_parser
from Forums.Utilities.utilities import cleanHTML
counter = 1
baseURL = 'http://dwforumuugiyderhybcpfxmlmoawgq6z3w6hk45nrnem3p7kwszhybad.onion/'
# Opens Tor Browser, crawls the website
def startCrawling():
# opentor()
# forumName = getForumName()
driver = getAccess()
if driver != 'down':
login(driver)
crawlForum(driver)
closetor(driver)
# new_parse(forumName, False)
# Opens Tor Browser
def opentor():
global pid
print("Connecting Tor...")
path = open('../../path.txt').readline().strip()
pro = subprocess.Popen(path)
pid = pro.pid
time.sleep(7.5)
input('Tor Connected. Press ENTER to continue\n')
return
# Login using premade account credentials and do login captcha manually
def login(driver):
#click login button
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.CSS_SELECTOR, ".button--icon--user")))
login_link = driver.find_element_by_css_selector(".button--icon--user")
login_link.click()
#entering username and password into input boxes
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[4]/div/div[2]/div/form/div[1]")))
container = driver.find_element(by=By.XPATH, value="/html/body/div[4]/div/div[2]/div/form/div[1]")
# print(container.get_attribute("outerHTML"))
boxes = container.find_elements_by_class_name("input")
# print(len(boxes))
#Username here
boxes[0].send_keys('nice_reamer08')
#Password here
boxes[1].send_keys('tjpv$]Nc}XG@`%LM')
# no captcha on this site
# click the verify(submit) button
driver.find_element_by_css_selector(".button--icon--login").click()
# wait for listing page show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 50).until(EC.visibility_of_element_located(
(By.CSS_SELECTOR, '.p-staffBar-inner > div:nth-child(4) > div:nth-child(1) > a:nth-child(1)')))
# Returns the name of the website
def getForumName():
name = 'DWForums'
return name
# Return the link of the website
def getFixedURL():
url = 'http://dwforumuugiyderhybcpfxmlmoawgq6z3w6hk45nrnem3p7kwszhybad.onion/'
return url
# Closes Tor Browser
def closetor(driver):
global pid
# os.system("taskkill /pid " + str(pro.pid))
os.system("taskkill /t /f /im tor.exe")
print('Closing Tor...')
driver.close()
time.sleep(3)
return
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
file = open('../../path.txt', 'r')
lines = file.readlines()
ff_binary = FirefoxBinary(lines[0].strip())
ff_prof = FirefoxProfile(lines[1].strip())
ff_prof.set_preference("places.history.enabled", False)
ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
ff_prof.set_preference("signon.rememberSignons", False)
ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
ff_prof.set_preference("network.dns.disablePrefetch", True)
ff_prof.set_preference("network.http.sendRefererHeader", 0)
ff_prof.set_preference("permissions.default.image", 3)
ff_prof.set_preference("browser.download.folderList", 2)
ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
ff_prof.set_preference('network.proxy.type', 1)
ff_prof.set_preference("network.proxy.socks_version", 5)
ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
ff_prof.set_preference('network.proxy.socks_port', 9150)
ff_prof.set_preference('network.proxy.socks_remote_dns', True)
ff_prof.set_preference("javascript.enabled", True)
ff_prof.update_preferences()
service = Service(lines[2].strip())
driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
return driver
def getAccess():
url = getFixedURL()
driver = createFFDriver()
try:
driver.get(url)
return driver
except:
return 'down'
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
return
# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\DWForums\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
else:
fullPath = r'..\DWForums\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
return fullPath
# Creates the file name from passed URL
def getNameFromURL(url):
global counter
name = ''.join(e for e in url if e.isalnum())
if (name == ''):
name = str(counter)
counter = counter + 1
return name
def getInterestedLinks():
links = []
# Hacking
links.append('http://dwforumuugiyderhybcpfxmlmoawgq6z3w6hk45nrnem3p7kwszhybad.onion/forums/hacking-forum.33/')
# # Beginner Carding and Fraud
links.append('http://dwforumuugiyderhybcpfxmlmoawgq6z3w6hk45nrnem3p7kwszhybad.onion/forums/remote-administration.34/')
# # Cracking Tools
links.append('http://dwforumuugiyderhybcpfxmlmoawgq6z3w6hk45nrnem3p7kwszhybad.onion/forums/cracking-tools.35/')
# # Cracking Tutorials and Other Methods - error here about file not exisitng
# links.append('http://dwforumuugiyderhybcpfxmlmoawgq6z3w6hk45nrnem3p7kwszhybad.onion/forums/cracking-tutorials-other-methods.36/')
# # Combolists and Configs
links.append('http://dwforumuugiyderhybcpfxmlmoawgq6z3w6hk45nrnem3p7kwszhybad.onion/forums/combolists-and-configs.58/')
# # Paid Software and Antivirus
links.append('http://dwforumuugiyderhybcpfxmlmoawgq6z3w6hk45nrnem3p7kwszhybad.onion/forums/paid-softwares-and-antivirus.59/')
return links
def crawlForum(driver):
print("Crawling the DWForums forum")
linksToCrawl = getInterestedLinks()
visited = set(linksToCrawl)
initialTime = time.time()
i = 0
count = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
while has_next_page:
list = topicPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
driver.back()
# comment out
break
# comment out
if count == 1:
count = 0
break
try:
temp = driver.find_element_by_link_text("Next")
link = temp.get_attribute('href')
if link == "":
raise NoSuchElementException
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
count += 1
except NoSuchElementException:
has_next_page = False
except Exception as e:
print(link, e.message)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling DWForums forum done sucessfully. Press ENTER to continue\n")
# Returns 'True' if the link is Topic link
def isDescriptionLink(url):
if '/threads/' in url:
return True
return False
# Returns True if the link is a listingPage link
def isListingLink(url):
if '/forums/' in url:
return True
return False
# calling the parser to define the links
def topicPages(html):
soup = BeautifulSoup(html, "html.parser")
#print(soup.find('div', id="container").find('div', id="content").find('table', {"class": "tborder clear"}).find('tbody').find('tr',{"class": "inline_row"}).find('strong').text)
return dwForums_links_parser(soup)
def crawler():
startCrawling()
# print("Crawling and Parsing BestCardingWorld .... DONE!")

+ 0
- 462
Forums/DWForums/geckodriver.log View File

@ -1,462 +0,0 @@
1655762162153 geckodriver INFO Listening on 127.0.0.1:50333
1655762166434 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50334" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileqUeAAN"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655762166904 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50334/devtools/browser/fb880205-35da-44a4-83b8-a861ce7125f1
1655762168590 Marionette INFO Listening on port 50341
1655762169104 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655762207624 Marionette INFO Stopped listening on port 50341
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1655762207923 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655762990320 geckodriver INFO Listening on 127.0.0.1:50884
1655762994595 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50885" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileurjxEe"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655762995244 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50885/devtools/browser/36503f58-7ab7-4188-81b0-fa1b81f32c0a
1655762996997 Marionette INFO Listening on port 50890
JavaScript error: resource://gre/modules/ExtensionContent.jsm, line 575: TypeError: PrecompiledScript.executeInGlobal: Argument 1 is not an object.
1655762997277 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655763083091 Marionette INFO Stopped listening on port 50890
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655763083216 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655763089557 geckodriver INFO Listening on 127.0.0.1:50923
1655763093759 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50924" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile5mY1qq"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655763094208 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50924/devtools/browser/68ce2df9-521f-4258-94f7-c2e2b199701a
1655763095918 Marionette INFO Listening on port 50929
1655763096408 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655763143935 Marionette INFO Stopped listening on port 50929
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655763144495 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655763166975 geckodriver INFO Listening on 127.0.0.1:50947
1655763171175 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50948" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofilenAGZM5"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655763171719 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50948/devtools/browser/0b9966bd-0cec-45cc-9d11-02a2580233f0
1655763173307 Marionette INFO Listening on port 50953
1655763173368 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655763208922 Marionette INFO Stopped listening on port 50953
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
1655763209261 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655763387409 geckodriver INFO Listening on 127.0.0.1:49752
1655763391806 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "49753" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofilevAHJSJ"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655763392827 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:49753/devtools/browser/7fe49a16-940f-4ee8-9a2f-48739a92db78
1655763394700 Marionette INFO Listening on port 49759
1655763395103 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655763430848 Marionette INFO Stopped listening on port 49759
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
1655763431145 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655765589286 geckodriver INFO Listening on 127.0.0.1:50102
1655765621677 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50103" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile8ecUpb"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655765622149 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:50103/devtools/browser/e5a916fc-987c-43d4-9c12-9c9b88cca242
1655765623776 Marionette INFO Listening on port 50109
1655765623892 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655766125168 Marionette INFO Stopped listening on port 50109
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655766125303 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655766505099 geckodriver INFO Listening on 127.0.0.1:50156
1655766509351 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50157" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileVqvmzh"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655766509876 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50157/devtools/browser/13084c14-64d3-48a5-8b99-4c514e961d4f
1655766511328 Marionette INFO Listening on port 50162
1655766511395 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655766552298 Marionette INFO Stopped listening on port 50162
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
1655766552578 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767657126 geckodriver INFO Listening on 127.0.0.1:50257
1655767661375 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50258" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileB9Dzeh"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655767661843 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50258/devtools/browser/867cd318-6430-4b3f-9b63-8c6bdf17636a
1655767663330 Marionette INFO Listening on port 50263
1655767663431 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655767668263 Marionette INFO Stopped listening on port 50263
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1655767668591 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767689480 geckodriver INFO Listening on 127.0.0.1:50276
1655767693635 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50277" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile8ZOQe7"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655767694215 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50277/devtools/browser/a70a3f5b-19af-4a68-99c8-c46086ba9599
1655767695725 Marionette INFO Listening on port 50282
1655767695830 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655767701272 Marionette INFO Stopped listening on port 50282
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655767701594 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767791360 geckodriver INFO Listening on 127.0.0.1:50296
1655767795575 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50297" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile4GEqBr"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655767796001 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50297/devtools/browser/e2d4e57a-16ce-429d-9804-e9e391199ddf
1655767797534 Marionette INFO Listening on port 50302
1655767797661 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655767802833 Marionette INFO Stopped listening on port 50302
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655767803151 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767902908 geckodriver INFO Listening on 127.0.0.1:50320
1655767907009 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50321" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileqlJ8ZA"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655767907484 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:50321/devtools/browser/49e77669-01dd-447e-a804-36d42b3400cf
1655767908782 Marionette INFO Listening on port 50326
1655767909069 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655767916218 Marionette INFO Stopped listening on port 50326
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655767916523 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767958348 geckodriver INFO Listening on 127.0.0.1:50340
1655767962557 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50341" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofilekpCdfs"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655767963032 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50341/devtools/browser/b4d1728c-d26f-4537-adc2-5c954b171b13
1655767964591 Marionette INFO Listening on port 50346
1655767964633 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655767973210 Marionette INFO Stopped listening on port 50346
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655767973501 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767998084 geckodriver INFO Listening on 127.0.0.1:50358
1655768002314 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50359" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileujYNBj"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655768002757 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50359/devtools/browser/04112c45-204d-43f1-9615-34782fd06632
1655768004165 Marionette INFO Listening on port 50364
1655768004375 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655768009787 Marionette INFO Stopped listening on port 50364
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1655768010100 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655768083966 geckodriver INFO Listening on 127.0.0.1:50386
1655768088234 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50387" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile39Pl1f"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655768088664 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50387/devtools/browser/3d6cda42-cb60-4c67-8d76-1d186a51887f
1655768090263 Marionette INFO Listening on port 50392
1655768090299 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655768091655 Marionette INFO Stopped listening on port 50392
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][MessageChannel] Error: (msgtype=0x390076,name=PContent::Msg_DestroyBrowsingContextGroup) Closed channel: cannot send/recv
###!!! [Parent][MessageChannel] Error: (msgtype=0x390076,name=PContent::Msg_DestroyBrowsingContextGroup) Closed channel: cannot send/recv
###!!! [Parent][MessageChannel] Error: (msgtype=0x390076,name=PContent::Msg_DestroyBrowsingContextGroup) Closed channel: cannot send/recv
1655768092659 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655768101201 geckodriver INFO Listening on 127.0.0.1:50404
1655768105373 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50405" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile2YRGSz"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655768105853 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50405/devtools/browser/9c4a4d71-25ff-4980-a54d-8545d6200790
1655768107431 Marionette INFO Listening on port 50410
1655768107530 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655768119155 Marionette INFO Stopped listening on port 50410
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655768119438 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655768229472 geckodriver INFO Listening on 127.0.0.1:50431
1655768233458 webdriver::server WARN Rejected request with missing Host header

+ 0
- 330
Forums/DWForums/parser.py View File

@ -1,330 +0,0 @@
__author__ = 'DarkWeb'
# Here, we are importing the auxiliary functions to clean or convert data
from Forums.Utilities.utilities import *
from datetime import date
from datetime import timedelta
import re
# Here, we are importing BeautifulSoup to search through the HTML tree
from bs4 import BeautifulSoup
# This is the method to parse the Description Pages (one page to each topic in the Listing Pages)
def dwForums_description_parser(soup):
# Fields to be parsed
topic = "-1" # topic name
user = [] # all users of each post
addDate = [] # all dated of each post
feedback = [] # all feedbacks of each vendor (this was found in just one Forum and with a number format)
status = [] # all user's authority in each post such as (adm, member, dangerous)
reputation = [] # all users's karma in each post (usually found as a number)
sign = [] # all user's signature in each post (usually a standard message after the content of the post)
post = [] # all messages of each post
interest = [] # all user's interest in each post
# Finding the topic (should be just one coming from the Listing Page)
li = soup.find("h1", {"class": "p-title-value"})
topic = li.text
topic = topic.replace(u'\xa0', ' ')
topic = topic.replace(",","")
topic = topic.replace("\n","")
topic = cleanString(topic.strip())
# print(topic)
# Finding the repeated tag that corresponds to the listing of posts
# posts = soup.find("form", {"name": "quickModForm"}).findAll('div', {"class": "windowbg"}) + \
# soup.find("form", {"name": "quickModForm"}).findAll('div', {"class": "windowbg2"})
try:
posts = soup.find('div', {"class": "js-replyNewMessageContainer"}).find_all(
'article', {"class": "js-post"}, recursive=False)
# print(len(posts))
# For each message (post), get all the fields we are interested to:
for ipost in posts:
# Finding a first level of the HTML page
# post_wrapper = ipost.find('div', {"class": "post_wrapper"}).find('div', {"class": "poster"})
post_wrapper = ipost.find('h4', {"class": "message-name"})
# Finding the author (user) of the post
# author = post_wrapper.find('h4')
author = post_wrapper.text.strip()
# print("author " + author)
user.append(cleanString(author)) # Remember to clean the problematic characters
# Finding the status of the author
# Testing here two possibilities to find this status and combine them
# if ipost.find('h5', {"class": "deleted_post_author"}):
# status.append(-1)
# interest.append(-1)
# reputation.append(-1)
# addDate.append(-1)
# post.append("THIS POST HAS BEEN REMOVED!")
# sign.append(-1)
# feedback.append(-1)
# continue
# CryptBB does have membergroup and postgroup
membergroup = ipost.find('h5', {"class": "userTitle"})
# DWForums doesnt have postgroups
postgroup = None
if membergroup != None:
membergroup = membergroup.text.strip()
if postgroup != None:
postgroup = postgroup.text.strip()
membergroup = membergroup + " - " + postgroup
else:
if postgroup != None:
membergroup = postgroup.text.strip()
else:
membergroup = "-1"
status.append(cleanString(membergroup))
# print("status " + cleanString(membergroup))
# Finding the interest of the author
# DWForums does not have blurb
blurb = ipost.find('li', {"class": "blurb"})
if blurb != None:
blurb = blurb.text.strip()
else:
blurb = "-1"
interest.append(cleanString(blurb))
# Finding the reputation of the user
# CryptBB does have reputation
author_stats = ipost.find('div', {"class": "message-userExtras"})
if author_stats != None:
karma = author_stats.find_all('dl', {"class": "pairs"})[2]
else:
karma = None
if karma != None:
karma = karma.text
karma = karma.replace("Reaction score","")
karma = karma.replace(":", "")
karma = karma.strip()
else:
karma = "-1"
reputation.append(cleanString(karma))
# print("karma " + cleanString(karma))
# Getting here another good tag to find the post date, post content and users' signature
postarea = ipost.find('div', {"class": "message-attribution-main"})
dt = postarea.find('time', {"class": "u-dt"})['datetime']
# dt = dt.strip().split()
dt = dt.strip()[:16]
dt = dt.replace("T",", ")
day=date.today()
if "Yesterday" in dt:
yesterday = day - timedelta(days=1)
yesterday = yesterday.strftime('%m-%d-%Y')
stime = dt.replace('Yesterday,','').strip()
date_time_obj = yesterday+ ', '+stime
date_time_obj = datetime.strptime(date_time_obj,'%m-%d-%Y, %H:%M')
elif "hours ago" in dt:
day = day.strftime('%m-%d-%Y')
date_time_obj = postarea.find('span', {"class": "post_date"}).find('span')['title']
date_time_obj = datetime.strptime(date_time_obj, '%m-%d-%Y, %H:%M')
else:
date_time_obj = datetime.strptime(dt, '%Y-%m-%d, %H:%M')
stime = date_time_obj.strftime('%b %d, %Y')
sdate = date_time_obj.strftime('%I:%M %p')
addDate.append(date_time_obj)
# print("date " + str(date_time_obj))
# Finding the date of the post
# date_time_obj = datetime.strptime(dt, '%a %b %d, %Y %I:%M %p')
# smalltext = postarea.find('div', {"class": "flow_hidden"}).find('div', {"class": "keyinfo"})\
# .find('div', {"class": "smalltext"})
# sdatetime = smalltext.text
# sdatetime = sdatetime.replace(u"\xab","") # Removing unnecessary characters
# sdatetime = sdatetime.replace(u"\xbb","") # Removing unnecessary characters
# sdatetime = sdatetime.split("on: ") # Removing unnecessary characters
# sdatetime = sdatetime[1].strip()
# stime = sdatetime[:-12:-1] # Finding the time of the post
# stime = stime[::-1]
# sdate = sdatetime.replace(stime,"") # Finding the date of the post
# sdate = sdate.replace(",","")
# sdate = sdate.strip()
# Covert the date of the post that can be informed as: "12 February 2016", "today", "yesterday". We need
# a date format here as "mm/dd/yyyy"
# addDate.append(convertDate(sdate,"english", crawlerDate) + " " + stime)
# Finding the post
inner = ipost.find('article', {"class": "message-body"})
inner = inner.text.strip()
# print(inner)
post.append(cleanString(inner))
# Finding the users's signature
# signature = ipost.find('div', {"class": "post_wrapper"}).find('div', {"class": "moderatorbar"}).find('div', {"class": "signature"})
signature = ipost.find('aside', {"class": "message-signature"})
if signature != None:
signature = signature.text.strip()
# print(signature)
else:
signature = "-1"
sign.append(cleanString(signature))
# As no information about users's feedback was found, just assign "-1" to the variable
feedback.append("-1")
except:
print("error when parsing posts")
# if soup.find('td', {"class": "trow1"}).text == " You do not have permission to access this page. ":
# user.append("-1")
# status.append(-1)
# interest.append(-1)
# reputation.append(-1)
# addDate.append(-1)
# post.append("NO ACCESS TO THIS PAGE!")
# sign.append(-1)
# feedback.append(-1)
# Populate the final variable (this should be a list with all fields scraped)
row = (topic, post, user, addDate, feedback, status, reputation, sign, interest)
# Sending the results
return row
# This is the method to parse the Listing Pages (one page with many posts)
def dwForums_listing_parser(soup):
board = "-1" # board name (the previous level of the topic in the Forum categorization tree.
# For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
nm = 0 # this variable should receive the number of topics
topic = [] # all topics
user = [] # all users of each topic
post = [] # number of posts of each topic
view = [] # number of views of each topic
addDate = [] # when the topic was created (difficult to find)
href = [] # this variable should receive all cleaned urls (we will use this to do the marge between
# Listing and Description pages)
# Finding the board (should be just one)
board = soup.find('h1', {"class": "p-title-value"}).text
board = cleanString(board.strip())
# Finding the repeated tag that corresponds to the listing of topics
regex = re.compile('.*structItem--thread.*')
itopics = soup.find_all("div", {"class": regex})
index = 0
for itopic in itopics:
# For each topic found, the structure to get the rest of the information can be of two types. Testing all of them
# to don't miss any topic
# tds = itopic.findAll('td', {"class": "subject stickybg2"})
#
# if len(tds) > 0:
# tag.append("strong")
# tag.append("subject stickybg2")
# tag.append("stats stickybg")
# else:
# tds = itopic.findAll('td', {"class": "subject windowbg2"})
# if len(tds) > 0:
# tag.append("span")
# tag.append("subject windowbg2")
# tag.append("stats windowbg")
# Adding the topic to the topic list
topics = itopic.find("div", {"class": "structItem-title"}).text
topics = topics.replace(",", "")
topics = topics.replace("\n", "")
topic.append(cleanString(topics.strip()))
# Counting how many topics we have found so far
nm = len(topic)
# Adding the url to the list of urls
link = itopic.select_one('a[href^="/threads/"]')
link = link['href']
link = cleanLink(link)
href.append(link)
# Finding the author of the topic
ps = itopic.find('a', {"class": "username"}).text
author = ps.strip()
user.append(cleanString(author))
# Finding the number of replies
meta = itopic.find("div", {"class": "structItem-cell--meta"})
meta = meta.find_all("dl")
posts = meta[0].find("dd").text
post.append(cleanString(posts))
# Finding the number of Views
tview = meta[1].find("dd").text
view.append(cleanString(tview))
# If no information about when the topic was added, just assign "-1" to the variable
minor = itopic.find("div", {"class": "structItem-minor"})
dt = minor.find('time')['datetime']
dt = dt.strip()[:16]
dt = dt.replace("T", ", ")
day = date.today()
if "Yesterday" in dt:
yesterday = day - timedelta(days=1)
yesterday = yesterday.strftime('%m-%d-%Y')
stime = dt.replace('Yesterday,', '').strip()
date_time_obj = yesterday + ', ' + stime
date_time_obj = datetime.strptime(date_time_obj, '%m-%d-%Y, %H:%M')
else:
date_time_obj = datetime.strptime(dt, '%Y-%m-%d, %H:%M')
stime = date_time_obj.strftime('%b %d, %Y')
sdate = date_time_obj.strftime('%I:%M %p')
addDate.append(date_time_obj)
index += 1
return organizeTopics("DWForums", nm, topic, board, view, post, user, addDate, href)
def dwForums_links_parser(soup):
# Returning all links that should be visited by the Crawler
href = []
#print(soup.find('table', {"class": "tborder clear"}).find(
# 'tbody').find_all('tr', {"class": "inline_row"}))
regex = re.compile('.*structItem--thread.*')
listing = soup.find_all("div", {"class": regex})
for a in listing:
link = a.select_one('a[href^="/threads/"]')
link = link['href']
href.append(link)
return href

+ 0
- 0
Forums/Dread/__init__.py View File


BIN
Forums/Dread/captcha.png View File

Before After
Width: 168  |  Height: 168  |  Size: 46 KiB

+ 0
- 299
Forums/Dread/crawler_selenium.py View File

@ -1,299 +0,0 @@
__author__ = 'DarkWeb'
'''
Dread Forum Crawler (Selenium)
'''
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import urllib.parse as urlparse
import os, re, time
from datetime import date
import subprocess
from bs4 import BeautifulSoup
from Forums.Initialization.prepare_parser import new_parse
from Forums.Dread.parser import dread_links_parser
from Forums.Utilities.utilities import cleanHTML
counter = 1
baseURL = 'http://dreadytofatroptsdj6io7l3xptbet6onoyno2yv7jicoxknyazubrad.onion/'
# Opens Tor Browser, crawls the website
def startCrawling():
# opentor()
# forumName = getForumName()
driver = getAccess()
if driver != 'down':
login(driver)
crawlForum(driver)
closetor(driver)
# new_parse(forumName, False)
# Opens Tor Browser
def opentor():
global pid
print("Connecting Tor...")
path = open('../../path.txt').readline().strip()
pro = subprocess.Popen(path)
pid = pro.pid
time.sleep(7.5)
input('Tor Connected. Press ENTER to continue\n')
return
# Login using premade account credentials and do login captcha manually
def login(driver):
'''
# code for captcha, for now, it runs too slow so captcha expires
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.CSS_SELECTOR, ".image")))
inputBoxes = driver.find_elements(by=By.TAG_NAME, value='input')
for index, inputBox in enumerate(inputBoxes):
driver.find_element(by=By.CSS_SELECTOR, value='.image').screenshot(r'..\Dread\captcha.png')
im = Image.open(r'..\Dread\captcha.png')
im.show()
userIn = input("Enter character: ")
inputBox.send_keys(userIn)
im.close()
if index != 5:
inputBoxes[index+1].click()
driver.find_element(by=By.XPATH, value="/html/body/div/div[2]/div/form/div/input").click()
'''
input("Press ENTER when CAPTCHA is completed\n")
#entering username and password into input boxes
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div/div[2]")))
# Returns the name of the website
def getForumName():
name = 'Dread'
return name
# Return the link of the website
def getFixedURL():
url = 'http://dreadytofatroptsdj6io7l3xptbet6onoyno2yv7jicoxknyazubrad.onion/'
return url
# Closes Tor Browser
def closetor(driver):
global pid
# os.system("taskkill /pid " + str(pro.pid))
os.system("taskkill /t /f /im tor.exe")
print('Closing Tor...')
driver.close()
time.sleep(3)
return
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
file = open('../../path.txt', 'r')
lines = file.readlines()
ff_binary = FirefoxBinary(lines[0].strip())
ff_prof = FirefoxProfile(lines[1].strip())
ff_prof.set_preference("places.history.enabled", False)
ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
ff_prof.set_preference("signon.rememberSignons", False)
ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
ff_prof.set_preference("network.dns.disablePrefetch", True)
# ff_prof.set_preference("network.http.sendRefererHeader", 0)
ff_prof.set_preference("permissions.default.image", 3)
ff_prof.set_preference("browser.download.folderList", 2)
ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
ff_prof.set_preference('network.proxy.type', 1)
ff_prof.set_preference("network.proxy.socks_version", 5)
ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
ff_prof.set_preference('network.proxy.socks_port', 9150)
ff_prof.set_preference('network.proxy.socks_remote_dns', True)
ff_prof.set_preference("javascript.enabled", True)
ff_prof.set_preference("xpinstall.signatures.required", False);
ff_prof.update_preferences()
service = Service(lines[2].strip())
driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
return driver
def getAccess():
url = getFixedURL()
driver = createFFDriver()
try:
driver.get(url)
return driver
except:
return 'down'
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
return
# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\Dread\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
else:
fullPath = r'..\Dread\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
return fullPath
# Creates the file name from passed URL
def getNameFromURL(url):
global counter
name = ''.join(e for e in url if e.isalnum())
if (name == ''):
name = str(counter)
counter = counter + 1
return name
def getInterestedLinks():
links = []
# OpSec
# links.append('http://dreadytofatroptsdj6io7l3xptbet6onoyno2yv7jicoxknyazubrad.onion/d/OpSec?p=40')
# links.append('http://dreadytofatroptsdj6io7l3xptbet6onoyno2yv7jicoxknyazubrad.onion/d/OpSec')
# # Hacking 180
links.append('http://dreadytofatroptsdj6io7l3xptbet6onoyno2yv7jicoxknyazubrad.onion/d/hacking')
# # Jobs4Crypto
links.append('http://dreadytofatroptsdj6io7l3xptbet6onoyno2yv7jicoxknyazubrad.onion/d/Jobs4Crypto')
# # Hacktown
links.append('http://dreadytofatroptsdj6io7l3xptbet6onoyno2yv7jicoxknyazubrad.onion/d/HackTown')
# # Malware
# links.append('http://dreadytofatroptsdj6io7l3xptbet6onoyno2yv7jicoxknyazubrad.onion/d/malware')
# # Programming
links.append('http://dreadytofatroptsdj6io7l3xptbet6onoyno2yv7jicoxknyazubrad.onion/d/programming')
return links
def crawlForum(driver):
print("Crawling the Dread forum")
linksToCrawl = getInterestedLinks()
visited = set(linksToCrawl)
initialTime = time.time()
i = 0
count = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
while has_next_page:
list = topicPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
driver.back()
break
if count == 1:
count = 0
break
try:
temp = driver.find_element(by=By.CLASS_NAME, value="pagination")
link = temp.find_element(by=By.CLASS_NAME, value="next").get_attribute('href')
if link == "":
raise NoSuchElementException
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
count += 1
except NoSuchElementException:
has_next_page = False
except Exception as e:
print(link, e.message)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling Dread forum done sucessfully. Press ENTER to continue\n")
# Returns 'True' if the link is Topic link
def isDescriptionLink(url):
if '/post/' in url:
return True
return False
# Returns True if the link is a listingPage link
def isListingLink(url):
if '/d/' in url:
return True
return False
# calling the parser to define the links
def topicPages(html):
soup = BeautifulSoup(html, "html.parser")
#print(soup.find('div', id="container").find('div', id="content").find('table', {"class": "tborder clear"}).find('tbody').find('tr',{"class": "inline_row"}).find('strong').text)
return dread_links_parser(soup)
def crawler():
startCrawling()
# print("Crawling and Parsing BestCardingWorld .... DONE!")

+ 0
- 462
Forums/Dread/geckodriver.log View File

@ -1,462 +0,0 @@
1655762162153 geckodriver INFO Listening on 127.0.0.1:50333
1655762166434 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50334" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileqUeAAN"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655762166904 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50334/devtools/browser/fb880205-35da-44a4-83b8-a861ce7125f1
1655762168590 Marionette INFO Listening on port 50341
1655762169104 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655762207624 Marionette INFO Stopped listening on port 50341
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1655762207923 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655762990320 geckodriver INFO Listening on 127.0.0.1:50884
1655762994595 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50885" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileurjxEe"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655762995244 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50885/devtools/browser/36503f58-7ab7-4188-81b0-fa1b81f32c0a
1655762996997 Marionette INFO Listening on port 50890
JavaScript error: resource://gre/modules/ExtensionContent.jsm, line 575: TypeError: PrecompiledScript.executeInGlobal: Argument 1 is not an object.
1655762997277 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655763083091 Marionette INFO Stopped listening on port 50890
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655763083216 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655763089557 geckodriver INFO Listening on 127.0.0.1:50923
1655763093759 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50924" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile5mY1qq"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655763094208 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50924/devtools/browser/68ce2df9-521f-4258-94f7-c2e2b199701a
1655763095918 Marionette INFO Listening on port 50929
1655763096408 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655763143935 Marionette INFO Stopped listening on port 50929
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655763144495 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655763166975 geckodriver INFO Listening on 127.0.0.1:50947
1655763171175 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50948" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofilenAGZM5"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655763171719 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50948/devtools/browser/0b9966bd-0cec-45cc-9d11-02a2580233f0
1655763173307 Marionette INFO Listening on port 50953
1655763173368 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655763208922 Marionette INFO Stopped listening on port 50953
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
1655763209261 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655763387409 geckodriver INFO Listening on 127.0.0.1:49752
1655763391806 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "49753" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofilevAHJSJ"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655763392827 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:49753/devtools/browser/7fe49a16-940f-4ee8-9a2f-48739a92db78
1655763394700 Marionette INFO Listening on port 49759
1655763395103 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655763430848 Marionette INFO Stopped listening on port 49759
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
1655763431145 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655765589286 geckodriver INFO Listening on 127.0.0.1:50102
1655765621677 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50103" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile8ecUpb"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655765622149 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:50103/devtools/browser/e5a916fc-987c-43d4-9c12-9c9b88cca242
1655765623776 Marionette INFO Listening on port 50109
1655765623892 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655766125168 Marionette INFO Stopped listening on port 50109
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655766125303 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655766505099 geckodriver INFO Listening on 127.0.0.1:50156
1655766509351 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50157" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileVqvmzh"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655766509876 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50157/devtools/browser/13084c14-64d3-48a5-8b99-4c514e961d4f
1655766511328 Marionette INFO Listening on port 50162
1655766511395 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655766552298 Marionette INFO Stopped listening on port 50162
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
1655766552578 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767657126 geckodriver INFO Listening on 127.0.0.1:50257
1655767661375 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50258" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileB9Dzeh"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655767661843 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50258/devtools/browser/867cd318-6430-4b3f-9b63-8c6bdf17636a
1655767663330 Marionette INFO Listening on port 50263
1655767663431 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655767668263 Marionette INFO Stopped listening on port 50263
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1655767668591 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767689480 geckodriver INFO Listening on 127.0.0.1:50276
1655767693635 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50277" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile8ZOQe7"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655767694215 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50277/devtools/browser/a70a3f5b-19af-4a68-99c8-c46086ba9599
1655767695725 Marionette INFO Listening on port 50282
1655767695830 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655767701272 Marionette INFO Stopped listening on port 50282
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655767701594 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767791360 geckodriver INFO Listening on 127.0.0.1:50296
1655767795575 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50297" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile4GEqBr"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655767796001 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50297/devtools/browser/e2d4e57a-16ce-429d-9804-e9e391199ddf
1655767797534 Marionette INFO Listening on port 50302
1655767797661 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655767802833 Marionette INFO Stopped listening on port 50302
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655767803151 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767902908 geckodriver INFO Listening on 127.0.0.1:50320
1655767907009 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50321" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileqlJ8ZA"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655767907484 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:50321/devtools/browser/49e77669-01dd-447e-a804-36d42b3400cf
1655767908782 Marionette INFO Listening on port 50326
1655767909069 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655767916218 Marionette INFO Stopped listening on port 50326
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655767916523 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767958348 geckodriver INFO Listening on 127.0.0.1:50340
1655767962557 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50341" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofilekpCdfs"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655767963032 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50341/devtools/browser/b4d1728c-d26f-4537-adc2-5c954b171b13
1655767964591 Marionette INFO Listening on port 50346
1655767964633 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655767973210 Marionette INFO Stopped listening on port 50346
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655767973501 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655767998084 geckodriver INFO Listening on 127.0.0.1:50358
1655768002314 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50359" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileujYNBj"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655768002757 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50359/devtools/browser/04112c45-204d-43f1-9615-34782fd06632
1655768004165 Marionette INFO Listening on port 50364
1655768004375 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655768009787 Marionette INFO Stopped listening on port 50364
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1655768010100 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655768083966 geckodriver INFO Listening on 127.0.0.1:50386
1655768088234 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50387" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile39Pl1f"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655768088664 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50387/devtools/browser/3d6cda42-cb60-4c67-8d76-1d186a51887f
1655768090263 Marionette INFO Listening on port 50392
1655768090299 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655768091655 Marionette INFO Stopped listening on port 50392
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][MessageChannel] Error: (msgtype=0x390076,name=PContent::Msg_DestroyBrowsingContextGroup) Closed channel: cannot send/recv
###!!! [Parent][MessageChannel] Error: (msgtype=0x390076,name=PContent::Msg_DestroyBrowsingContextGroup) Closed channel: cannot send/recv
###!!! [Parent][MessageChannel] Error: (msgtype=0x390076,name=PContent::Msg_DestroyBrowsingContextGroup) Closed channel: cannot send/recv
1655768092659 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655768101201 geckodriver INFO Listening on 127.0.0.1:50404
1655768105373 mozrunner::runner INFO Running command: "C:\\Users\\CALSysLab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50405" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile2YRGSz"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: Init"
console.log: "TorConnect: observed profile-after-change"
console.log: "TorConnect: observing topic 'TorBootstrapStatus'"
console.log: "TorConnect: observing topic 'TorBootstrapError'"
console.log: "TorConnect: observing topic 'TorProcessExited'"
console.log: "TorConnect: observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: observing topic 'torsettings:ready'"
console.log: "TorSettings: observed profile-after-change"
1655768105853 Marionette INFO Marionette enabled
console.log: "TorConnect: will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50405/devtools/browser/9c4a4d71-25ff-4980-a54d-8545d6200790
1655768107431 Marionette INFO Listening on port 50410
1655768107530 RemoteAgent WARN TLS certificate errors will be ignored for this session
1655768119155 Marionette INFO Stopped listening on port 50410
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
1655768119438 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1655768229472 geckodriver INFO Listening on 127.0.0.1:50431
1655768233458 webdriver::server WARN Rejected request with missing Host header

+ 0
- 357
Forums/Dread/parser.py View File

@ -1,357 +0,0 @@
__author__ = 'DarkWeb'
# Here, we are importing the auxiliary functions to clean or convert data
import datetime
from Forums.Utilities.utilities import *
from datetime import date
from datetime import timedelta
import re
import traceback
# Here, we are importing BeautifulSoup to search through the HTML tree
from bs4 import BeautifulSoup
# This is the method to parse the Description Pages (one page to each topic in the Listing Pages)
def dread_description_parser(soup):
# Fields to be parsed
topic = "-1" # topic name
user = [] # all users of each post
addDate = [] # all dated of each post
feedback = [] # all feedbacks of each vendor (this was found in just one Forum and with a number format)
status = [] # all user's authority in each post such as (adm, member, dangerous)
reputation = [] # all users's karma in each post (usually found as a number)
sign = [] # all user's signature in each post (usually a standard message after the content of the post)
post = [] # all messages of each post
interest = [] # all user's interest in each post
# Finding the topic (should be just one coming from the Listing Page)
container = soup.find('div', {"class": "content"})
li = container.find("a", {"class": "title"})
if li == None:
return None
topic = li.text
topic = topic.replace(u'\xa0', ' ')
topic = topic.replace(",","")
topic = topic.replace("\n","")
topic = cleanString(topic.strip())
# print(topic)
# Finding the repeated tag that corresponds to the listing of posts
# posts = soup.find("form", {"name": "quickModForm"}).findAll('div', {"class": "windowbg"}) + \
# soup.find("form", {"name": "quickModForm"}).findAll('div', {"class": "windowbg2"})
try:
# putting the initial post data since it is separated from comments
# author name
init_post = container.find('div', {"class": "item"})
author = init_post.find('div', {"class": "author"}).select_one('a[href^="/u/"]').text
flair = init_post.find('div', {"class": "author"}).find("span", {"class": "flair"})
try:
flair = flair.text.strip()
author = author.replace(flair, '')
except:
pass
author = author.strip()
user.append(cleanString(author))
# status
flair = init_post.find("span", {"class": "flair"})
if flair != None:
flair = flair.text.strip()
else:
flair = "-1"
status.append(cleanString(flair))
# no blurb
interest.append(-1)
# points for post
karma = init_post.find("div", {"class": "voteCount"})
if karma != None:
karma = karma.text
karma = karma.replace("points", "")
karma = karma.replace(":", "")
karma = karma.strip()
else:
karma = "-1"
reputation.append(cleanString(karma))
# date
spans = init_post.find('div', {"class": "author"}).find('span', recursive=False)
dt = spans['title']
month = find_month(dt)
split_text = dt.split()
day = int(re.search(r'\d+', split_text[0]).group())
year = int(split_text[2])
hm = re.findall(r'\d+', split_text[-1])
hm[0] = int(hm[0])
hm[1] = int(hm[1])
date_time_obj = datetime(year, month, day, hour=hm[0], minute=hm[1])
addDate.append(date_time_obj)
# content
inner = init_post.find("div", {"class": "postContent"})
inner = inner.text.strip()
post.append(cleanString(inner))
# no signature
sign.append(-1)
# no feedback
feedback.append(-1)
comments = soup.find('div', {"class": "postComments"})
if comments == None:
row = (topic, post, user, addDate, feedback, status, reputation, sign, interest)
return row
else:
comments = soup.find('div', {"class": "postComments"}).find_all('div', "comment")
# print(len(posts))
# For each message (post), get all the fields we are interested to:
for ipost in comments:
# Finding a first level of the HTML page
# post_wrapper = ipost.find('div', {"class": "post_wrapper"}).find('div', {"class": "poster"})
cc = ipost.find('div', {"class": "commentContent"})
post_wrapper = cc.find('a', {"class": "username"}).text
flair = cc.find("span", {"class": "flair"})
try:
flair = flair.text.strip()
post_wrapper = post_wrapper.replace(flair, '')
except:
pass
author = post_wrapper.strip()
user.append(cleanString(author))
# Finding the status of the author
# Dread does not have membergroup and postgroup, but it has flair, similar enough
postgroup = None
if flair != None:
if postgroup != None:
postgroup = postgroup.text.strip()
flair = flair + " - " + postgroup
else:
if postgroup != None:
flair = postgroup.text.strip()
else:
flair = "-1"
status.append(cleanString(flair))
# print("status " + cleanString(membergroup))
# Finding the interest of the author
# Dread does not have blurb
interest.append(-1)
# Finding the reputation of the user
# Dread doesn't have reputation per user, but instead each post has its own point system
karma = cc.find('div', {"class": "votes"})
if karma != None:
karma = karma.text
karma = karma.replace("points","")
karma = karma.replace(":", "")
karma = karma.strip()
else:
karma = "-1"
reputation.append(cleanString(karma))
# print("karma " + cleanString(karma))
# Getting here another good tag to find the post date, post content and users' signature
postarea = ipost.find('div', {"class": "timestamp"}).find('span', recursive=False)
dt = postarea['title']
month = find_month(dt)
split_text = dt.split()
day = int(re.search(r'\d+', split_text[0]).group())
year = int(split_text[2])
hm = re.findall(r'\d+', split_text[-1])
hm[0] = int(hm[0])
hm[1] = int(hm[1])
date_time_obj = datetime(year, month, day, hour=hm[0], minute=hm[1])
addDate.append(date_time_obj)
# Finding the post
inner = ipost.find('div', {"class": "commentBody"})
inner = inner.text.strip()
# print(inner)
post.append(cleanString(inner))
# No signature for Dread
sign.append(-1)
# As no information about users's feedback was found, just assign "-1" to the variable
feedback.append("-1")
except :
traceback.print_exc()
# if soup.find('td', {"class": "trow1"}).text == " You do not have permission to access this page. ":
# user.append("-1")
# status.append(-1)
# interest.append(-1)
# reputation.append(-1)
# addDate.append(-1)
# post.append("NO ACCESS TO THIS PAGE!")
# sign.append(-1)
# feedback.append(-1)
# Populate the final variable (this should be a list with all fields scraped)
row = (topic, post, user, addDate, feedback, status, reputation, sign, interest)
# Sending the results
return row
# This is the method to parse the Listing Pages (one page with many posts)
def dread_listing_parser(soup):
board = "-1" # board name (the previous level of the topic in the Forum categorization tree.
# For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
nm = 0 # this variable should receive the number of topics
topic = [] # all topics
user = [] # all users of each topic
post = [] # number of posts of each topic
view = [] # number of views of each topic
addDate = [] # when the topic was created (difficult to find)
href = [] # this variable should receive all cleaned urls (we will use this to do the marge between
# Listing and Description pages)
# Finding the board (should be just one)
board = soup.find('a', {"class": "banner-top"}).text
board = cleanString(board.strip())
# Finding the repeated tag that corresponds to the listing of topics
itopics = soup.find("div", {"class": "postBoard"}).find_all("div", {"class": "item"}, recursive=False)
index = 0
for itopic in itopics:
# For each topic found, the structure to get the rest of the information can be of two types. Testing all of them
# to don't miss any topic
# Adding the topic to the topic list
topic_title = itopic.find("a", {"class": "title"})
title_flair = topic_title.find('span', {"class": "flair"})
topics = topic_title.text
try:
title_flair = title_flair.text.strip()
topics = topics.replace(title_flair, '')
except:
pass
topics = topics.replace(u'\xa0', ' ')
topics = topics.replace(",", "")
topics = topics.replace("\n", "")
topic.append(cleanString(topics.strip()))
# Counting how many topics we have found so far
nm = len(topic)
# Adding the url to the list of urls
link = topic_title['href']
link = cleanLink(link)
href.append(link)
# Finding the author of the topic
ps = itopic.find('div', {"class": "author"})
post_wrapper = ps.select_one('a[href^="/u/"]').text
flair = ps.find("span", {"class": "flair"})
try:
flair = flair.text.strip()
post_wrapper = post_wrapper.replace(flair, '')
except:
pass
author = post_wrapper.strip()
user.append(cleanString(author))
# Finding the number of replies
meta = itopic.find("div", {"class": "postMain"})
posts = meta.find("a").text
posts = posts.replace("comments", '').strip()
post.append(cleanString(posts))
# Finding the number of Views - not shown in Dread
view.append("-1")
# If no information about when the topic was added, just assign "-1" to the variable
spans = itopic.find('div', {"class": "author"}).find('span', recursive=False)
dt = spans['title']
month = find_month(dt)
split_text = dt.split()
day = int(re.search(r'\d+', split_text[0]).group())
year = int(split_text[2])
hm = re.findall(r'\d+', split_text[-1])
hm[0] = int(hm[0])
hm[1] = int(hm[1])
date_time_obj = datetime(year, month, day, hour=hm[0], minute=hm[1])
addDate.append(date_time_obj)
index += 1
return organizeTopics("Dread", nm, topic, board, view, post, user, addDate, href)
def dread_links_parser(soup):
# Returning all links that should be visited by the Crawler
href = []
#print(soup.find('table', {"class": "tborder clear"}).find(
# 'tbody').find_all('tr', {"class": "inline_row"}))
listing = soup.find("div", {"class": "postBoard"}).find_all("div",{"class": "item"}, recursive=False)
for a in listing:
link = a.find("a", {"class": "title"})
link = link['href']
href.append(link)
return href
def find_month(s):
if 'January' in s:
return 1
elif 'February' in s:
return 2
elif 'March' in s:
return 3
elif 'April' in s:
return 4
elif 'May' in s:
return 5
elif 'June' in s:
return 6
elif 'July' in s:
return 7
elif 'August' in s:
return 8
elif 'September' in s:
return 9
elif 'October' in s:
return 10
elif 'November' in s:
return 11
elif 'December' in s:
return 12

BIN
Forums/Helium/captcha.png View File

Before After
Width: 170  |  Height: 36  |  Size: 5.4 KiB

+ 0
- 327
Forums/Helium/crawler_selenium.py View File

@ -1,327 +0,0 @@
__author__ = 'DarkWeb'
'''
Helium Forum Crawler (Selenium)
'''
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from PIL import Image
import urllib.parse as urlparse
import os, time
from datetime import date
import subprocess
from bs4 import BeautifulSoup
from Forums.Initialization.prepare_parser import new_parse
from Forums.Helium.parser import helium_links_parser
from Forums.Utilities.utilities import cleanHTML
counter = 1
baseURL = 'http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/'
# Opens Tor Browser, crawls the website
def startCrawling():
# opentor()
# forumName = getForumName()
driver = getAccess()
if driver != 'down':
login(driver)
crawlForum(driver)
closetor(driver)
# new_parse(forumName, False)
# Opens Tor Browser
def opentor():
global pid
print("Connecting Tor...")
path = open('../../path.txt').readline().strip()
pro = subprocess.Popen(path)
pid = pro.pid
time.sleep(7.5)
input('Tor Connected. Press ENTER to continue\n')
return
# Login using premade account credentials and do login captcha manually
def login(driver):
#wait for login page
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[2]/div/div[1]/div/div/div[2]/form/div[5]/div/button")))
#entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
#Username here
usernameBox.send_keys('holyre')
passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="password"]')
#Password here
passwordBox.send_keys('PlatinumBorn2')
'''
# wait for captcha page show up
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//*[@id="captcha_img"]')))
# save captcha to local
driver.find_element(by=By.XPATH, value='//*[@id="captcha_img"]').screenshot(r'..\Helium\captcha.png')
# This method will show image in any image viewer
im = Image.open(r'..\Helium\captcha.png')
im.show()
# wait until input space show up
inputBox = driver.find_element(by=By.XPATH, value='//*[@id="captcha"]')
# ask user input captcha solution in terminal
userIn = input("Enter solution: ")
# send user solution into the input space
inputBox.send_keys(userIn)
# click the verify(submit) button
driver.find_element(by=By.XPATH, value="/html/body/div[2]/div/div[1]/div/div/div[2]/form/div[5]/div/button").click()
'''
input("Press ENTER when CAPTCHA is completed\n")
# wait for listing page show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 50).until(EC.visibility_of_element_located(
(By.XPATH, '/html/body/div[2]/div/p')))
# Returns the name of the website
def getForumName():
name = 'Helium'
return name
# Return the link of the website
def getFixedURL():
url = 'http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/login'
return url
# Closes Tor Browser
def closetor(driver):
global pid
# os.system("taskkill /pid " + str(pro.pid))
os.system("taskkill /t /f /im tor.exe")
print('Closing Tor...')
driver.close()
time.sleep(3)
return
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
file = open('../../path.txt', 'r')
lines = file.readlines()
ff_binary = FirefoxBinary(lines[0].strip())
ff_prof = FirefoxProfile(lines[1].strip())
ff_prof.set_preference("places.history.enabled", False)
ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
ff_prof.set_preference("signon.rememberSignons", False)
ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
ff_prof.set_preference("network.dns.disablePrefetch", True)
ff_prof.set_preference("network.http.sendRefererHeader", 0)
# ff_prof.set_preference("permissions.default.image", 2)
ff_prof.set_preference("browser.download.folderList", 2)
ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
ff_prof.set_preference('network.proxy.type', 1)
ff_prof.set_preference("network.proxy.socks_version", 5)
ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
ff_prof.set_preference('network.proxy.socks_port', 9150)
ff_prof.set_preference('network.proxy.socks_remote_dns', True)
ff_prof.set_preference("javascript.enabled", True)
ff_prof.update_preferences()
service = Service(lines[2].strip())
driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
return driver
def getAccess():
url = getFixedURL()
driver = createFFDriver()
try:
driver.get(url)
return driver
except:
return 'down'
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
return
# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\Helium\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
else:
fullPath = r'..\Helium\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
return fullPath
# Creates the file name from passed URL
def getNameFromURL(url):
global counter
name = ''.join(e for e in url if e.isalnum())
if name == '':
name = str(counter)
counter = counter + 1
return name
def getInterestedLinks():
links = []
# General Discussion
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/6')
# Anonymity and Security
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/8')
# Programming
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/9')
# Carding Discussions
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/10')
# Hacked Database (free)
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/11')
# Hacking tools, exploits and POC
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/17')
# Hacked Database
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/12')
# Hacking and other Services
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/13')
# Selling/Buying Malware, Exploits etc
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/22')
# General Tutorials
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/18')
# Hacking Tutorials
links.append('http://fahue6hb7odzns36vfoi2dqfvqvjq4btt7vo52a67jivmyz6a6h3vzqd.onion/board/19')
return links
def crawlForum(driver):
print("Crawling the Helium forum")
linksToCrawl = getInterestedLinks()
# visited = set(linksToCrawl)
# initialTime = time.time()
i = 0
count = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
while has_next_page:
list = topicPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
driver.back()
# comment out
break
# comment out
if count == 1:
count = 0
break
try:
bar = driver.find_element(by=By.XPATH, value=
'/html/body/div[2]/div/div[3]/ul')
li = bar.find_elements(By.TAG_NAME, 'li')[-1]
link = li.find_element(By.TAG_NAME, 'a').get_attribute('href')
if link == "":
raise NoSuchElementException
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
count += 1
except NoSuchElementException:
has_next_page = False
except Exception as e:
print(link, e.message)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling Helium forum done successfully. Press ENTER to continue\n")
# Returns 'True' if the link is Topic link
def isDescriptionLink(url):
if 'topic' in url:
return True
return False
# Returns True if the link is a listingPage link
def isListingLink(url):
if 'board' in url:
return True
return False
# calling the parser to define the links
def topicPages(html):
soup = BeautifulSoup(html, "html.parser")
return helium_links_parser(soup)
def crawler():
startCrawling()
# print("Crawling and Parsing BestCardingWorld .... DONE!")

+ 0
- 248
Forums/Helium/parser.py View File

@ -1,248 +0,0 @@
__author__ = 'DarkWeb'
# Here, we are importing the auxiliary functions to clean or convert data
from Forums.Utilities.utilities import *
# Here, we are importing BeautifulSoup to search through the HTML tree
from bs4 import BeautifulSoup
# This is the method to parse the Description Pages (one page to each topic in the Listing Pages)
def helium_description_parser(soup):
# Fields to be parsed
topic = "-1" # topic name
user = [] # all users of each post
addDate = [] # all dated of each post
feedback = [] # all feedbacks of each vendor (this was found in just one Forum and with a number format)
status = [] # all user's authority in each post such as (adm, member, dangerous)
reputation = [] # all users's karma in each post (usually found as a number)
sign = [] # all user's signature in each post (usually a standard message after the content of the post)
post = [] # all messages of each post
interest = [] # all user's interest in each post
# Finding the topic (should be just one coming from the Listing Page)
li = soup.find("h4", {"class": "text-truncated"})
topic = li.text
topic = topic.replace("Topic:", "")
topic = topic.replace("Post Reply", "")
topic = topic.replace(",", "")
topic = topic.replace("\n", "")
topic = cleanString(topic.strip())
# Finding the repeated tag that corresponds to the listing of posts
posts = soup.findAll('div', {"id": "a9"})
# For each message (post), get all the fields we are interested to:
for ipost in posts:
# Finding a first level of the HTML page
# Finding the author (user) of the post
heading = ipost.find('div', {"class": "panel-heading"})
title = heading.find('div', {"class": "panel-title"}).text
author = title.replace("User:", "")
author = author.strip()
user.append(cleanString(author)) # Remember to clean the problematic characters
# Finding the status of the author
# Testing here two possibilities to find this status and combine them
# Helium does not have membergroup and postgroup
membergroup = heading.find('li', {"class": "membergroup"})
postgroup = heading.find('li', {"class": "postgroup"})
if membergroup != None:
membergroup = membergroup.text.strip()
if postgroup != None:
postgroup = postgroup.text.strip()
membergroup = membergroup + " - " + postgroup
else:
if postgroup != None:
membergroup = postgroup.text.strip()
else:
membergroup = "-1"
status.append(cleanString(membergroup))
# Finding the interest of the author
# Helium does not have blurb
blurb = heading.find('li', {"class": "blurb"})
if blurb != None:
blurb = blurb.text.strip()
else:
blurb = "-1"
interest.append(cleanString(blurb))
# Finding the reputation of the user
# Helium does not have karma
karma = heading.find('li', {"class": "karma"})
if karma != None:
karma = karma.text
karma = karma.replace("Community Rating: ","")
karma = karma.replace("Karma: ","")
karma = karma.strip()
else:
karma = "-1"
reputation.append(cleanString(karma))
# Getting here another good tag to find the post date, post content and users' signature
postarea = ipost.find('div', {"class": "content_body"})
# Finding the date of the post
# Helium does not have date
addDate.append("-1")
# dt = ipost.find('p', {"class": "author"}).text.split('»')[1]
# # dt = dt.strip().split()
# dt = dt.strip()
# date_time_obj = datetime.strptime(dt, '%a %b %d, %Y %I:%M %p')
# stime = date_time_obj.strftime('%a %b %d, %Y')
# sdate = date_time_obj.strftime('%I:%M %p')
# addDate.append(date_time_obj)
# date_time_obj = datetime.strptime(dt, '%a %b %d, %Y %I:%M %p')
# smalltext = postarea.find('div', {"class": "flow_hidden"}).find('div', {"class": "keyinfo"})\
# .find('div', {"class": "smalltext"})
# sdatetime = smalltext.text
# sdatetime = sdatetime.replace(u"\xab","") # Removing unnecessary characters
# sdatetime = sdatetime.replace(u"\xbb","") # Removing unnecessary characters
# sdatetime = sdatetime.split("on: ") # Removing unnecessary characters
# sdatetime = sdatetime[1].strip()
# stime = sdatetime[:-12:-1] # Finding the time of the post
# stime = stime[::-1]
# sdate = sdatetime.replace(stime,"") # Finding the date of the post
# sdate = sdate.replace(",","")
# sdate = sdate.strip()
# Covert the date of the post that can be informed as: "12 February 2016", "today", "yesterday". We need
# a date format here as "mm/dd/yyyy"
#addDate.append(convertDate(sdate,"english", crawlerDate) + " " + stime)
# Finding the post
paragraphs = postarea.find_all('p')
p = ""
for paragraph in paragraphs:
p += paragraph.text.strip() + " "
quote = postarea.find('div', {"class": "standard_quote"})
if quote != None:
q = quote.text.strip()
p.replace(q, "")
post.append(cleanString(p.strip()))
# Finding the users's signature
# Helium does not have signature
#signature = ipost.find('div', {"class": "post_wrapper"}).find('div', {"class": "moderatorbar"}).find('div', {"class": "signature"})
signature = ipost.find('div', {"class": "post_wrapper"})
if signature != None:
signature = signature.text.strip()
else:
signature = "-1"
sign.append(cleanString(signature))
# As no information about users's feedback was found, just assign "-1" to the variable
feedback.append("-1")
# Populate the final variable (this should be a list with all fields scraped)
row = (topic, post, user, addDate, feedback, status, reputation, sign, interest)
# Sending the results
return row
# This is the method to parse the Listing Pages (one page with many posts)
def helium_listing_parser(soup):
board = "-1" # board name (the previous level of the topic in the Forum categorization tree.
# For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
nm = 0 # this variable should receive the number of topics
topic = [] # all topics
user = [] # all users of each topic
post = [] # number of posts of each topic
view = [] # number of views of each topic
addDate = [] # when the topic was created (difficult to find)
href = [] # this variable should receive all cleaned urls (we will use this to do the marge between
# Listing and Description pages)
# Finding the board (should be just one)
parents = soup.find('div', {"class": "col-md-12"}).findAll('li')
board = parents[1].text + u"->" + parents[2].get('title')
board = board.replace("\n", "")
board = cleanString(board.strip())
# Finding the repeated tag that corresponds to the listing of topics
itopics = soup.find('table', {"class": "table"}).find('tbody').findAll('td', {"class": "col-md-8"})
repliesViews = soup.find('table', {"class": "table"}).find('tbody').findAll('td', {"class": "col-md-2"})
# Counting how many topics we have found so far
nm = len(itopics)
index = 0
for itopic in itopics:
# Adding the topic to the topic list
topics = itopic.find('a').get('title')
topics = topics.replace(",", "")
topic.append(cleanString(topics.strip()))
# Adding the url to the list of urls
link = itopic.find('a').get('href')
link = cleanLink(link)
href.append(link)
# Finding the author of the topic
author = itopic.find('strong').text
user.append(cleanString(author.strip()))
rv = repliesViews[index].find('p').text.split()
# Finding the number of replies
posts = rv[0].replace("Replies", "")
post.append(cleanString(posts.strip()))
# Finding the number of Views
tview = rv[1].replace("Views", "")
view.append(cleanString(tview.strip()))
# If no information about when the topic was added, just assign "-1" to the variable
# dt = itopic.find('div', {"class": "responsive-hide"}).text.split('»')[1]
# dt = dt.strip()
# date_time_obj = datetime.strptime(dt,'%a %b %d, %Y %I:%M %p')
# addDate.append(date_time_obj)
addDate.append("-1")
index += 1
return organizeTopics("Helium", nm, topic, board, view, post, user, addDate, href)
def helium_links_parser(soup):
# Returning all links that should be visited by the Crawler
href = []
listing = soup.find('table', {"class": "table"}).find('tbody').findAll('td', {"class": "col-md-8"})
for a in listing:
bae = a.find('a', href=True)
link = bae['href']
href.append(link)
return href

BIN
Forums/Initialization/desc_transformer.pickle View File


+ 0
- 3
Forums/Initialization/forumsList.txt View File

@ -1,4 +1 @@
CryptBB
Dread
DWForums
Helium

+ 6
- 18
Forums/Initialization/forums_mining.py View File

@ -8,10 +8,6 @@ import os
from datetime import *
from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld
from Forums.CryptBB.crawler_selenium import crawler as crawlerCryptBB
from Forums.DWForums.crawler_selenium import crawler as crawlerDWForums
from Forums.Dread.crawler_selenium import crawler as crawlerDread
from Forums.Helium.crawler_selenium import crawler as crawlerHelium
# from Forums.Nulled.crawler_selenium import crawler as crawlerNulled
import time
@ -90,20 +86,12 @@ if __name__ == '__main__':
print("Creating listing and description directories ...")
createDirectory(forum)
time.sleep(5)
# input("Directories created successfully. Press ENTER to continue\n")
# if forum == "BestCardingWorld":
# crawlerBestCardingWorld()
# elif forum == "CryptBB":
# crawlerCryptBB()
# elif forum == "DWForums":
# crawlerDWForums()
# elif forum == "Dread":
# crawlerDread()
# elif forum == "Helium":
# crawlerHelium()
# elif forum == "Nulled":
# crawlerNulled()
input("Directories created successfully. Press ENTER to continue\n")
if forum == "BestCardingWorld":
crawlerBestCardingWorld()
elif forum == "CryptBB":
crawlerCryptBB()
print("Scraping process completed successfully!")


+ 0
- 20
Forums/Initialization/prepare_parser.py View File

@ -7,10 +7,6 @@ import shutil
from Forums.DB_Connection.db_connection import *
from Forums.BestCardingWorld.parser import *
from Forums.CryptBB.parser import *
from Forums.DWForums.parser import *
from Forums.Dread.parser import *
from Forums.Helium.parser import *
# from Forums.Nulled.parser import *
from Forums.Classifier.classify_product import predict
#from DarkWebMining_Sample.Forums.Classifier.classify_product import predict_semi
@ -152,14 +148,6 @@ def new_parse(forum, createLog):
rmm = bestcardingworld_description_parser(soup)
elif forum == "CryptBB":
rmm = cryptBB_description_parser(soup)
elif forum == "DWForums":
rmm = dwForums_description_parser(soup)
elif forum == "Dread":
rmm = dread_description_parser(soup)
elif forum == "Helium":
rmm = helium_description_parser(soup)
# elif forum == "Nulled":
# rmm = nulled_description_parser(soup)
# key = u"Top:" + rmm[0].upper().strip() + u" User:" + rmm[2][0].upper().strip()
key = u"Url:" + os.path.basename(line2).replace(".html", "")
@ -207,14 +195,6 @@ def new_parse(forum, createLog):
rw = bestcardingworld_listing_parser(soup)
elif forum == "CryptBB":
rw = cryptBB_listing_parser(soup)
elif forum == "DWForums":
rw = dwForums_listing_parser(soup)
elif forum == "Dread":
rw = dread_listing_parser(soup)
elif forum == "Helium":
rw = helium_listing_parser(soup)
# elif forum == "Nulled":
# rw = nulled_listing_parser(soup)
except:


+ 7
- 8
MarketPlaces/DarkFox/crawler_selenium.py View File

@ -35,12 +35,14 @@ def startCrawling():
# driver = getAccess()
# if driver != 'down':
# captcha(driver)
# crawlForum(driver)
# new_parse(mktName, False)
# try:
# captcha(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
new_parse(mktName, False)
# closetor(driver)
# Opens Tor Browser
@ -130,14 +132,11 @@ def createFFDriver():
def getAccess():
url = getFixedURL()
driver = createFFDriver()
try:
driver.get(url)
return driver
except:
driver.close()
return 'down'


+ 0
- 192
MarketPlaces/DarkFox/crawler_seleniumtest.py View File

@ -1,192 +0,0 @@
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.service import Service
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from PIL import Image
import codecs
import time
from datetime import date
import urllib.parse as urlparse
import os
from bs4 import BeautifulSoup
from MarketPlaces.DarkFox.parser import darkfox_links_parser
file = open('../../path.txt', 'r')
lines = file.readlines()
# torexe = os.popen(lines[0].strip()) # path for tor.exe
binary = FirefoxBinary(lines[0].strip()) # full path for firefox.exe
# options = Options()
profile = FirefoxProfile(lines[1].strip()) # full path for profile.default
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.socks', '127.0.0.1')
profile.set_preference('network.proxy.socks_port', 9150)
profile.set_preference("network.proxy.socks_remote_dns", True)
profile.update_preferences()
service = Service(lines[2].strip()) # full path for geckodriver.exe
driver = webdriver.Firefox(firefox_binary=binary, firefox_profile=profile,
service=service)
# Manual captcha solver
def captcha(driver):
# wait for captcha page show up
WebDriverWait(driver, 100).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div/div/form/button[1]")))
# save captcha to local
driver.find_element(by=By.XPATH, value="/html/body/div/div/form/div[1]/div[1]").screenshot("captcha.png")
# open method used to open different extension image file
im = Image.open(r'C:\Users\CALSysLab\Documents\threatIntelligence-main\DarkWebMining_Working\MarketPlaces\DarkFox\captcha.png')
# This method will show image in any image viewer
im.show()
# wait until input space show up
inputBox = driver.find_element(by=By.XPATH, value="/html/body/div/div/form/div[1]/div[2]/input")
# ask user input captha solution in terminal
userIn = input("Enter solution: ")
# send user solution into the input space
inputBox.send_keys(userIn)
# click the verify(submit) button
driver.find_element(by=By.XPATH, value="/html/body/div/div/form/button[1]").click()
# wait for listing page show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/main/div/div/div[2]/div[1]/div[1]/form/div[1]/h1")))
# Saves the crawled html page
def savePage(page, url):
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(page)
return
# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'C:\Users\CALSysLab\Documents\threatIntelligence-main\DarkWebMining_Working\MarketPlaces\DarkFox\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
else:
fullPath = r'C:\Users\CALSysLab\Documents\threatIntelligence-main\DarkWebMining_Working\MarketPlaces\DarkFox\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
return fullPath
# Creates the name of the file based on URL
def getNameFromURL(url):
global counter
name = ''.join(e for e in url if e.isalnum())
if (name == ''):
name = str(counter)
counter = counter + 1
return name
def getInterestedLinks():
links = []
# Guides and Tutorials
links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/30739153-1fcd-45cd-b919-072b439c6e06')
# Digital Products
links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/0e384d5f-26ef-4561-b5a3-ff76a88ab781')
# Software and Malware
links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/6b71210f-f1f9-4aa3-8f89-bd9ee28f7afc')
# Services
links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/b9dc5846-5024-421e-92e6-09ba96a03280')
# Miscellaneous
links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/fd1c989b-1a74-4dc0-92b0-67d8c1c487cb')
# Hosting and Security
links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/5233fd6a-72e6-466d-b108-5cc61091cd14')
return links
def isDescriptionLink(url):
if 'product' in url:
return True
return False
def isListingLink(url):
if 'category' in url:
return True
return False
def productPages(html):
soup = BeautifulSoup(html, "html.parser")
return darkfox_links_parser(soup)
def isSignOut(url):
#absURL = urlparse.urljoin(url.base_url, url.url)
if 'signout' in url.lower() or 'logout' in url.lower():
return True
return False
# dark fox seed url
baseurl = 'http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion'
driver.get(baseurl)
captcha(driver)
# visited = set()
# visited.add(br.geturl())
linksToCrawl = getInterestedLinks()
initialTime = time.time()
i = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
driver.get(link)
html = driver.page_source.encode('utf-8', link)
savePage(html, link)
'''
has_next_page = True
while has_next_page:
j = 0
list = productPages(html)
for item in list:
if j == 1:
break
itemURL = str(item)
driver.get(itemURL)
savePage(driver.page_source.encode('utf-8'), item)
driver.back()
j += 1
try:
link = driver.find_element(by=By.XPATH, value=
'/html/body/main/div/div[2]/div/div[2]/div/div/div/nav/a[2]').get_attribute('href')
driver.get(link)
html = driver.page_source.encode('utf-8', link)
savePage(html, link)
except NoSuchElementException:
has_next_page = False
'''
except Exception as e:
print(link, e.message)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling DarkFox marketplace done sucessfully. Press ENTER to continue\n")

+ 0
- 6
MarketPlaces/Initialization/marketsList.txt View File

@ -1,7 +1 @@
Ares
Quest
Royal
Kingdom
Tor2door
WeTheNorth
Bohemia

+ 6
- 25
MarketPlaces/Initialization/markets_mining.py View File

@ -7,14 +7,7 @@ Starting point of the Darkweb Mining Platform
import os
from datetime import *
from MarketPlaces.DarkFox.crawler_selenium import crawler as crawlerDarkFox
from MarketPlaces.Ares.crawler_selenium import crawler as crawlerAres
from MarketPlaces.Quest.crawler_selenium import crawler as crawlerQuest
from MarketPlaces.Royal.crawler_selenium import crawler as crawlerRoyal
from MarketPlaces.Kingdom.crawler_selenium import crawler as crawlerKingdom
from MarketPlaces.Tor2door.crawler_selenium import crawler as crawlerTor2door
from MarketPlaces.WeTheNorth.crawler_selenium import crawler as crawlerWeTheNorth
from MarketPlaces.Bohemia.crawler_selenium import crawler as crawlerBohemia
# from MarketPlaces.Kerberos.crawler_selenium import crawler as crawlerKerberos
import time
@ -67,23 +60,11 @@ if __name__ == '__main__':
print("Creating listing and description directories ...")
createDirectory(mkt)
time.sleep(5)
# input("Directories created successfully. Press ENTER to continue\n")
# if mkt == "DarkFox":
# crawlerDarkFox()
# elif mkt == "Ares":
# crawlerAres()
# elif mkt == "Quest":
# crawlerQuest()
# elif mkt == "Royal":
# crawlerRoyal()
# elif mkt == 'Kingdom':
# crawlerKingdom()
# elif mkt == 'Tor2door':
# crawlerTor2door()
# elif mkt == 'WeTheNorth':
# crawlerWeTheNorth()
# elif mkt == 'Bohemia':
# crawlerBohemia()
input("Directories created successfully. Press ENTER to continue\n")
if mkt == "DarkFox":
crawlerDarkFox()
elif mkt == 'Tor2door':
crawlerTor2door()
print("Scraping process completed successfully!")

+ 0
- 30
MarketPlaces/Initialization/prepare_parser.py View File

@ -6,13 +6,7 @@ import codecs
import shutil
from MarketPlaces.DB_Connection.db_connection import *
from MarketPlaces.DarkFox.parser import *
from MarketPlaces.Ares.parser import *
from MarketPlaces.Quest.parser import *
from MarketPlaces.Royal.parser import *
from MarketPlaces.Kingdom.parser import *
from MarketPlaces.Tor2door.parser import *
from MarketPlaces.WeTheNorth.parser import *
from MarketPlaces.Bohemia.parser import *
from MarketPlaces.Classifier.classify_product import predict
@ -175,20 +169,8 @@ def new_parse(marketPlace, createLog):
if marketPlace == "DarkFox":
rmm = darkfox_description_parser(soup)
elif marketPlace == "Ares":
rmm = ares_description_parser(soup)
elif marketPlace == "Quest":
rmm = quest_description_parser(soup)
elif marketPlace == "Royal":
rmm = royal_description_parser(soup)
elif marketPlace == 'Kingdom':
rmm = kingdom_description_parser(soup)
elif marketPlace == "Tor2door":
rmm = tor2door_description_parser(soup)
elif marketPlace == "WeTheNorth":
rmm = wethenorth_description_parser(soup)
elif marketPlace == "Bohemia":
rmm = bohemia_description_parser(soup)
# key = u"Pr:" + rmm[0].upper()[:desc_lim1] + u" Vendor:" + rmm[13].upper()[:desc_lim2]
key = u"Url:" + os.path.basename(line2).replace(".html", "")
@ -234,20 +216,8 @@ def new_parse(marketPlace, createLog):
if marketPlace == "DarkFox":
rw = darkfox_listing_parser(soup)
elif marketPlace == "Ares":
rw = ares_listing_parser(soup)
elif marketPlace == "Quest":
rw = quest_listing_parser(soup)
elif marketPlace == "Royal":
rw = royal_listing_parser(soup)
elif marketPlace == "Kingdom":
rw = kingdom_listing_parser(soup)
elif marketPlace == "Tor2door":
rw = tor2door_listing_parser(soup)
elif marketPlace == "WeTheNorth":
rw = wethenorth_listing_parser(soup)
elif marketPlace == "Bohemia":
rw = bohemia_listing_parser(soup)
else:
parseError = True


+ 6
- 6
MarketPlaces/Tor2door/crawler_selenium.py View File

@ -34,8 +34,11 @@ def startCrawling():
driver = getAccess()
if driver != 'down':
login(driver)
crawlForum(driver)
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# new_parse(marketName, False)
@ -164,14 +167,11 @@ def createFFDriver():
def getAccess():
url = getFixedURL()
driver = createFFDriver()
try:
driver.get(url)
return driver
except:
driver.close()
return 'down'


Loading…
Cancel
Save