Browse Source

resolved all merge conflicts

main
Khoi 1 year ago
parent
commit
e066d3776a
21 changed files with 1743 additions and 1706 deletions
  1. +39
    -55
      Forums/Altenens/crawler_selenium.py
  2. +14
    -7
      Forums/Altenens/parser.py
  3. +23
    -49
      Forums/Cardingleaks/crawler_selenium.py
  4. +25
    -45
      Forums/CryptBB/crawler_selenium.py
  5. +24
    -49
      Forums/HiddenAnswers/crawler_selenium.py
  6. +1
    -1
      Forums/Initialization/forumsList.txt
  7. +5
    -2
      Forums/Initialization/forums_mining.py
  8. +247
    -0
      Forums/Initialization/geckodriver.log
  9. +14
    -12
      Forums/Initialization/prepare_parser.py
  10. +40
    -67
      Forums/Libre/crawler_selenium.py
  11. +28
    -53
      Forums/OnniForums/crawler_selenium.py
  12. +33
    -49
      Forums/Procrax/crawler_selenium.py
  13. +15
    -14
      MarketPlaces/AnonymousMarketplace/crawler_selenium.py
  14. +1
    -0
      MarketPlaces/AnonymousMarketplace/parser.py
  15. +911
    -863
      MarketPlaces/Initialization/geckodriver.log
  16. +15
    -9
      MarketPlaces/Initialization/prepare_parser.py
  17. +16
    -16
      MarketPlaces/M00nkeyMarket/crawler_selenium.py
  18. +198
    -239
      MarketPlaces/M00nkeyMarket/parser.py
  19. +2
    -2
      MarketPlaces/Tor2door/crawler_selenium.py
  20. +12
    -12
      MarketPlaces/TorBay/crawler_selenium.py
  21. +80
    -162
      MarketPlaces/TorBay/parser.py

+ 39
- 55
Forums/Altenens/crawler_selenium.py View File

@ -1,8 +1,7 @@
__author__ = 'Helium'
'''
Altenens Forum Crawler (Selenium);
Untested due to CAPTCHAS and blocking the network
Altenens Forum Crawler (Selenium)
'''
from selenium import webdriver
@ -31,18 +30,18 @@ baseURL = 'https://altenens.is/'
# Opens Tor Browser, crawls the website
def startCrawling():
opentor()
# opentor()
forumName = getForumName()
driver = getAccess()
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# driver = getAccess()
#
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
#
new_parse(forumName, baseURL, False)
@ -73,12 +72,12 @@ def login(driver):
#Password here
passwordBox.send_keys('johnnyTest@18')# sends string to passwordBox
input("Press ENTER when you complete the CAPTCHA and press login\n")
input("Press ENTER when CAPTCHA is completed\n")
# wait for listing page show up (This Xpath may need to change based on different seed url)
# wait for 50 sec until id = tab_content is found, then cont
# WebDriverWait(driver, 50).until(EC.visibility_of_element_located(
# (By.XPATH, '/html/body/div[1]/div[4]/div/div/div[3]/div/div/div[4]/div/div/div[1]/div/div[1]')))
WebDriverWait(driver, 50).until(EC.visibility_of_element_located(
(By.XPATH, '/html/body/div[1]/div[1]/div/div/div/div[1]/a[1]')))
# Returns the name of the website
@ -200,81 +199,69 @@ def getInterestedLinks():
return links
# newest version of crawling
def crawlForum(driver):
print("Crawling the Altenens forum")
linksToCrawl = getInterestedLinks()
visited = set(linksToCrawl)
initialTime = time.time()
i = 0
count = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)# open
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
count = 0
#loop through the topics
while has_next_page:
list = topicPages(html)# for multiple pages
for item in list:
#variable to check if there is a next page for the topic
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
topics = topicPages(html)
for topic in topics:
has_next_topic_page = True
counter = 1
page = topic
# check if there is a next page for the topics
while has_next_topic_page:
# try to access next page of th topic
itemURL = urlparse.urljoin(baseURL, str(item))
itemURL = urlparse.urljoin(baseURL, str(page))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
savePage(driver.page_source, topic + f"page{counter}") # very important
# if there is a next page then go and save....
# specific
try:
item = driver.find_element(By.LINK_TEXT, value='Next').get_attribute('href')
# comment out
if counter == 2:
break
if item == "":
try:
page = driver.find_element(By.LINK_TEXT, value='Next').get_attribute('href')
if page == "":
raise NoSuchElementException
has_next_topic_page = False
else:
counter += 1
counter += 1
except NoSuchElementException:
has_next_topic_page = False
#end of loop
for i in range(counter):
driver.back()
# comment out
break
# comment out
if count == 1:
count = 0
break
try:# change depending on web page, #next page
try:
link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
if link == "":
raise NoSuchElementException
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
count += 1
except NoSuchElementException:
@ -284,9 +271,6 @@ def crawlForum(driver):
print(link, e)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling Altenens forum done successfully. Press ENTER to continue\n")


+ 14
- 7
Forums/Altenens/parser.py View File

@ -27,7 +27,8 @@ def altenens_description_parser(soup):
topic = soup.find("h1", {"class": "p-title-value"}).text
topic = cleanString(topic.strip())
iposts = soup.find('div', {"class": "block-body js-replyNewMessageContainer"}).find_all('article')
body = soup.find('div', {"class": "block-container lbContainer"})
iposts = body.find_all('article', {"class": "message message--post js-post js-inlineModContainer"})
for ipost in iposts:
@ -54,12 +55,16 @@ def altenens_description_parser(soup):
sign.append(cleanString(signature))
inner = ipost.find('div', {"class": "bbWrapper"}).find(text=True, recursive=False)
post.append(cleanString(inner.strip()))
if inner is not None:
inner = inner.strip()
else:
inner = "-1"
post.append(cleanString(inner))
feedback.append("-1")
dt = ipost.find('time', {"class": "u-dt"})
date_time_obj = datetime.strptime(dt, '%m-%d-%Y, %I:%M %p')
dt = ipost.find('time', {"class": "u-dt"}).get('datetime')
date_time_obj = datetime.strptime(dt, '%Y-%m-%dT%H:%M:%S%z')
addDate.append(date_time_obj)
# Populate the final variable (this should be a list with all fields scraped)
@ -101,11 +106,11 @@ def altenens_listing_parser(soup):
link = itopic.find('a').get('href')
href.append(link)
user = itopic.find('div', {"class": "structItem-parts"}).find('a').text
user = itopic.find('ul', {"class": "structItem-parts"}).find('a').text
author.append(cleanString(user.strip()))
dt = itopic.find('li', {"class": "structItem-startDate"}).get('datetime')
date_time_obj = datetime.strptime(dt, '%m-%d-%Y, %I:%M %p')
dt = itopic.find('time', {"class": "u-dt"}).get('datetime')
date_time_obj = datetime.strptime(dt, '%Y-%m-%dT%H:%M:%S%z')
addDate.append(date_time_obj)
itopics = soup.find_all('div', {"class": "structItem-cell structItem-cell--meta"})
@ -113,10 +118,12 @@ def altenens_listing_parser(soup):
for itopic in itopics:
nposts = itopic.find('dl', {"class": "pairs pairs--justified"}).text
nposts = nposts.replace('Replies', '')
nposts = nposts.replace('K', '000')
posts.append(cleanString(nposts))
nviews = itopic.find('dl', {"class": "pairs pairs--justified structItem-minor"}).text
nviews = nviews.replace('Views', '')
nviews = nviews.replace('K', '000')
views.append(cleanString(nviews))


+ 23
- 49
Forums/Cardingleaks/crawler_selenium.py View File

@ -2,7 +2,7 @@ __author__ = 'DarkWeb'
'''
Cardingleaks Forum Crawler (Selenium)
FIXED
Crawler updated and fixed
'''
from selenium import webdriver
@ -207,67 +207,53 @@ def getInterestedLinks():
def crawlForum(driver):
print("Crawling the Cardingleaks forum")
print("Crawling the Cardinglinks forum")
linksToCrawl = getInterestedLinks()
visited = set(linksToCrawl)
initialTime = time.time()
i = 0
count = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
count = 0
while has_next_page:
list = topicPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
driver.back()
#variable to check if there is a next page for the topic
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
topics = topicPages(html)
for topic in topics:
has_next_topic_page = True
counter = 1
page = topic
# check if there is a next page for the topics
while has_next_topic_page:
# try to access next page of th topic
itemURL = urlparse.urljoin(baseURL, str(item))
itemURL = urlparse.urljoin(baseURL, str(page))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
savePage(driver.page_source, topic + f"page{counter}") # very important
# if there is a next page then go and save....
# Spec
try:
# temp = driver.find_element(By.XPATH, '/html/body/div[2]/div[4]/div/div[5]/div[2]/div/div[1]/div[1]/div/nav/div[1]') # /html/body/div/div[2]/div/div[2]/div/
item = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href') #/html/body/div/div[2]/div/div[2]/div
# comment out
if counter == 2:
break
if item == "":
try:
page = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
if page == "":
raise NoSuchElementException
else:
counter += 1
counter += 1
except NoSuchElementException:
has_next_topic_page = False
# end of loop
for i in range(counter):
driver.back()
@ -276,21 +262,12 @@ def crawlForum(driver):
# comment out
if count == 1:
count = 0
break
try:
# temp = driver.find_element(by=By.XPATH, value = '/html/body/div[2]/div[4]/div/div[5]/div[2]/div/div/div[1]/div/nav/div[1]')
link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
if link == "":
raise NoSuchElementException
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
count += 1
except NoSuchElementException:
@ -300,10 +277,7 @@ def crawlForum(driver):
print(link, e)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling Cardingleaks forum done successfully. Press ENTER to continue\n")
input("Crawling Cardingleaksforum done successfully. Press ENTER to continue\n")
# Returns 'True' if the link is Topic link, may need to change for every website


+ 25
- 45
Forums/CryptBB/crawler_selenium.py View File

@ -238,65 +238,55 @@ def getInterestedLinks():
def crawlForum(driver):
print("Crawling the CryptBB forum")
print("Crawling the CryptBB forum")
linksToCrawl = getInterestedLinks()
visited = set(linksToCrawl)
initialTime = time.time()
i = 0
count = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
count = 0
while has_next_page:
list = topicPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
driver.back()
#variable to check if there is a next page for the topic
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
topics = topicPages(html)
for topic in topics:
has_next_topic_page = True
counter = 1
page = topic
# check if there is a next page for the topics
while has_next_topic_page:
# try to access next page of th topic
itemURL = urlparse.urljoin(baseURL, str(item))
itemURL = urlparse.urljoin(baseURL, str(page))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
savePage(driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:
break
# if there is a next page then go and save....
# next page in the topic?
try:
temp = driver.find_element(By.XPATH, '/html/body/div/div[2]/div/div[2]/div') # /html/body/div/div[2]/div/div[2]/div/
item = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href') #/html/body/div/div[2]/div/div[2]/div
temp = driver.find_element(By.XPATH, '/html/body/div/div[2]/div/div[2]/div')
page = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')
if item == "":
if page == "":
raise NoSuchElementException
else:
counter += 1
counter += 1
except NoSuchElementException:
has_next_topic_page = False
# end of loop
for i in range(counter):
driver.back()
@ -305,21 +295,14 @@ def crawlForum(driver):
# comment out
if count == 1:
count = 0
break
try:
temp = driver.find_element(by=By.XPATH, value = '/html/body/div/div[2]/div/div[2]/div')
temp = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/div[2]/div')
link = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')
if link == "":
raise NoSuchElementException
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
count += 1
except NoSuchElementException:
@ -329,10 +312,7 @@ def crawlForum(driver):
print(link, e)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling CryptBB forum done successfully. Press ENTER to continue\n")
input("Crawling CrypttBB done successfully. Press ENTER to continue\n")
# Returns 'True' if the link is Topic link, may need to change for every website


+ 24
- 49
Forums/HiddenAnswers/crawler_selenium.py View File

@ -179,86 +179,65 @@ def crawlForum(driver):
print("Crawling the HiddenAnswers forum")
linksToCrawl = getInterestedLinks()
visited = set(linksToCrawl)
initialTime = time.time()
i = 0
count = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
count = 0
while has_next_page:
list = topicPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
driver.back()
'''
#variable to check if there is a next page for the topic
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
topics = topicPages(html)
for topic in topics:
has_next_topic_page = True
counter = 1
page = topic
# check if there is a next page for the topics
while has_next_topic_page:
# try to access next page of th topic
itemURL = urlparse.urljoin(baseURL, str(item))
itemURL = urlparse.urljoin(baseURL, str(page))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
savePage(driver.page_source, topic + f"page{counter}") # very important
# if there is a next page then go and save....
# next page in the topic?
try:
temp = driver.find_element(By.XPATH, '/html/body/div/div[2]/div/div[2]/div') # /html/body/div/div[2]/div/div[2]/div/
item = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href') #/html/body/div/div[2]/div/div[2]/div
# comment out
if counter == 2:
break
if item == "":
try:
page = "" # no next page so far may have some later on
if page == "":
raise NoSuchElementException
has_next_topic_page = False
else:
counter += 1
counter += 1
except NoSuchElementException:
has_next_topic_page = False
# end of loop
for i in range(counter):
driver.back()
'''
# comment out
break
# comment out
if count == 1:
count = 0
break
try:
link = driver.find_element(by=By.XPATH, value = '/html/body/div[2]/div[2]/div/div[3]/div[3]/ul/li[7]/a').get_attribute('href')
link = driver.find_element(by=By.XPATH, value='/html/body/div[2]/div[2]/div/div[3]/div[3]/ul/li[7]/a').get_attribute('href')
if link == "":
raise NoSuchElementException
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
count += 1
except NoSuchElementException:
@ -268,11 +247,7 @@ def crawlForum(driver):
print(link, e)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling HiddenAnswers forum done sucessfully. Press ENTER to continue\n")
input("Crawling HiddenAnswers done successfully. Press ENTER to continue\n")
# Returns 'True' if the link is Topic link


+ 1
- 1
Forums/Initialization/forumsList.txt View File

@ -1 +1 @@
Altenens
Procrax

+ 5
- 2
Forums/Initialization/forums_mining.py View File

@ -14,6 +14,7 @@ from Forums.Procrax.crawler_selenium import crawler as crawlerProcraxForum
from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
from Forums.Cardingleaks.crawler_selenium import crawler as crawlerCardingleaks
from Forums.Altenens.crawler_selenium import crawler as crawlerAltenens
from Forums.Libre.crawler_selenium import crawler as crawlerLibre
import configparser
import time
@ -113,12 +114,14 @@ if __name__ == '__main__':
crawlerAbyssForum()
elif forum == "HiddenAnswers":
crawlerHiddenAnswers()
elif forum == "Altenens":
crawlerAltenens()
elif forum == 'Procrax':
crawlerProcraxForum()
elif forum == 'Cardingleaks':
crawlerCardingleaks()
elif forum == 'Altenens':
crawlerAltenens()
elif forum == 'Libre':
crawlerLibre()


+ 247
- 0
Forums/Initialization/geckodriver.log View File

@ -10951,3 +10951,250 @@ unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689363209615 geckodriver INFO Listening on 127.0.0.1:60532
1689363216981 mozrunner::runner INFO Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "60533" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofile278pEs"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689363219049 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:60533/devtools/browser/8c990d4b-44eb-425d-b226-b8d4c1cffc2d
1689363224682 Marionette INFO Listening on port 60540
1689363225068 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: , line 0: NotFoundError: No such JSWindowActor 'MarionetteEvents'
JavaScript error: , line 0: NotFoundError: No such JSWindowActor 'MarionetteEvents'
1689363820376 Marionette INFO Stopped listening on port 60540
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofile278pEs\thumbnails) because it does not exist
[Parent 5080, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167
1689363820593 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:60789/devtools/browser/8539d316-2b33-4477-9e35-2f9e6eab09b6
1689363569998 Marionette INFO Listening on port 60796
1689363570244 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/, line 2: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 2: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 5: ReferenceError: lang is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 9: ReferenceError: use_xmlhttprequest is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86, line 3: ReferenceError: lang is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 6: ReferenceError: lang is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 19: ReferenceError: use_xmlhttprequest is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 25: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628&page=2, line 6: ReferenceError: lang is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628&page=2, line 19: ReferenceError: use_xmlhttprequest is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628&page=2, line 25: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 6: ReferenceError: lang is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 19: ReferenceError: use_xmlhttprequest is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 25: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86, line 3: ReferenceError: lang is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86&page=2, line 3: ReferenceError: lang is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=16778, line 6: ReferenceError: lang is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=16778, line 19: ReferenceError: use_xmlhttprequest is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=16778, line 25: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86&page=2, line 3: ReferenceError: lang is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86, line 3: ReferenceError: lang is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
1689363752505 Marionette INFO Stopped listening on port 60796
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofilecgBCTA\thumbnails) because it does not exist
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
Crash Annotation GraphicsCriticalError: |[C0][GFX1-]: Receive IPC close with reason=AbnormalShutdown (t=1346.28)
###!!! [Child][MessageChannel] Error: (msgtype=0x3900E5,name=PContent::Msg_GraphicsError) Channel closing: too late to send/recv, messages will be lost
[GFX1-]: Receive IPC close with reason=AbnormalShutdown
1689363753315 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689364130030 geckodriver INFO Listening on 127.0.0.1:61129
1689364135033 mozrunner::runner INFO Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "61130" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileZXcPSi"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689364136375 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:61130/devtools/browser/d0a00e7f-efab-4092-ba43-3afb5ec55bcc
1689364140122 Marionette INFO Listening on port 61138
1689364140225 RemoteAgent WARN TLS certificate errors will be ignored for this session
1689364164357 Marionette INFO Stopped listening on port 61138
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileZXcPSi\thumbnails) because it does not exist
[Parent 5336, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167
[Parent 5336, IPC I/O Parent] WARNING: pipe error: 232: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/chrome/common/ipc_channel_win.cc:544
1689364165253 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689364952139 geckodriver INFO Listening on 127.0.0.1:61327
1689364958550 mozrunner::runner INFO Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "61328" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileeX31Bg"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689364960322 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:61328/devtools/browser/d98ca77f-1ca8-49c2-b3d0-7c98e39d55e8
1689364964835 Marionette INFO Listening on port 61336
1689364965449 RemoteAgent WARN TLS certificate errors will be ignored for this session
1689365065931 Marionette INFO Stopped listening on port 61336
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileeX31Bg\thumbnails) because it does not exist
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1689365066887 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689365596202 geckodriver INFO Listening on 127.0.0.1:61665
1689365603047 mozrunner::runner INFO Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "61666" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofilegVxGn8"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689365604946 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:61666/devtools/browser/3f945d28-11cd-436c-832e-2085f8bb57e1
1689365609901 Marionette INFO Listening on port 61676
1689365610315 RemoteAgent WARN TLS certificate errors will be ignored for this session
1689365827541 Marionette INFO Stopped listening on port 61676
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PageThumbs.jsm, line 709: AbortError: IOUtils.profileBeforeChange getter: IOUtils: profileBeforeChange phase has already finished
[Parent 7204, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167
1689365828066 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689366358424 geckodriver INFO Listening on 127.0.0.1:62059
1689366363521 mozrunner::runner INFO Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "62060" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileSRNF4S"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689366364862 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:62060/devtools/browser/38410e90-6408-4c6e-a78a-4d8c6dabe5f5
1689366368448 Marionette INFO Listening on port 62067
###!!! [Child][MessageChannel] Error: (msgtype=0x390097,name=PContent::Msg_InitBackground) Channel closing: too late to send/recv, messages will be lost
###!!! [Child][MessageChannel] Error: (msgtype=0x390097,name=PContent::Msg_InitBackground) Channel closing: too late to send/recv, messages will be lost
1689366368939 RemoteAgent WARN TLS certificate errors will be ignored for this session
1689366462907 Marionette INFO Stopped listening on port 62067
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PageThumbs.jsm, line 709: AbortError: IOUtils.profileBeforeChange getter: IOUtils: profileBeforeChange phase has already finished
1689366464131 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138

+ 14
- 12
Forums/Initialization/prepare_parser.py View File

@ -8,6 +8,7 @@ from Forums.DB_Connection.db_connection import *
from Forums.BestCardingWorld.parser import *
from Forums.CryptBB.parser import *
from Forums.OnniForums.parser import *
from Forums.Altenens.parser import *
from Forums.Classifier.classify_product import predict
# from DarkWebMining_Sample.Forums.Classifier.classify_product import predict_semi
@ -151,27 +152,27 @@ def new_parse(forum, url, createLog):
rmm = cryptBB_description_parser(soup)
elif forum == "OnniForums":
rmm = onniForums_description_parser(soup)
elif forum == "Altenens":
rmm = altenens_description_parser(soup)
# key = u"Top:" + rmm[0].upper().strip() + u" User:" + rmm[2][0].upper().strip()
key = u"Url:" + os.path.basename(line2).replace(".html", "")
# check if page or page exists at the end of a string followed by a series of numbers
#if yes add to other if no add to first page dictionary
# save descritions into record in memory
check = re.compile(r'(?<=Page|page)[0-9]*')
# check if "page1" exists at the end of a string
# if yes add to first page directory if no add to other
check = re.compile(r'page1$')
if check.search(key):
# print(key, 'is an other page\n')
other[key] = {'rmm': rmm, 'filename': os.path.basename(line2)}
else:
# print(key, 'is a first page\n')
detPage[key] = {'rmm': rmm, 'files': [os.path.basename(line2)]}
else:
# print(key, 'is an other page\n')
other[key] = {'rmm': rmm, 'filename': os.path.basename(line2)}
except:
nError += 1
print("There was a problem to parse the file " + line2 + " in the Description section!")
traceback.print_exc()
if createLog:
logFile.write(str(nError) + ". There was a problem to parse the file " + line2 + " in the Description section.\n")
@ -195,7 +196,6 @@ def new_parse(forum, url, createLog):
other.pop(k)
# Parsing the Listing Pages and put the tag's content into a list
for index, line1 in enumerate(lines):
@ -231,6 +231,8 @@ def new_parse(forum, url, createLog):
rw = cryptBB_listing_parser(soup)
elif forum == "OnniForums":
rw = onniForums_listing_parser(soup)
elif forum == "Altenens":
rw = altenens_listing_parser(soup)
except:
@ -255,8 +257,8 @@ def new_parse(forum, url, createLog):
# print(rec)
# key = u"Top:" + rec[1].upper().strip() + u" User:" + rec[5].upper().strip()
key = u"Url:" + cleanLink(rec[6])
print(key)
key = u"Url:" + cleanLink(rec[6]) + "page1"
# print(key)
if key in detPage:


+ 40
- 67
Forums/Libre/crawler_selenium.py View File

@ -62,16 +62,14 @@ def login(driver):
input('Press enter when CAPTCHA is completed, and you\'re at the login page')
#entering username and password into input boxes
usernameBox = driver.find_element(by=By.NAME, value='login')
usernameBox = driver.find_element(by=By.NAME, value='username')
#Username here
usernameBox.send_keys('ct1234')#sends string to the username box
passwordBox = driver.find_element(by=By.NAME, value='password')
#Password here
passwordBox.send_keys('r5o0wqmw')# sends string to passwordBox
login = driver.find_element(by=By.CLASS_NAME, value='block-container')
login_link = login.find_element(by=By.TAG_NAME, value='button')
login_link.click()
input("Press the login button and solve the CAPTCHA then press enter\n")
# input('input')
@ -209,87 +207,65 @@ def crawlForum(driver):
print("Crawling the Libre forum")
linksToCrawl = getInterestedLinks()
visited = set(linksToCrawl)
initialTime = time.time()
i = 0
count = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
count = 0
while has_next_page:
list = topicPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
driver.back()
#variable to check if there is a next page for the topic
# has_next_topic_page = True
# counter = 1
# # check if there is a next page for the topics
# while has_next_topic_page:
# # try to access next page of th topic
# itemURL = urlparse.urljoin(baseURL, str(item))
# try:
# driver.get(itemURL)
# except:
# driver.refresh()
# savePage(driver.page_source, item)
#
# # if there is a next page then go and save....
# # Spec
# try:
# # temp = driver.find_element(By.XPATH, '/html/body/div[2]/div[4]/div/div[5]/div[2]/div/div[1]/div[1]/div/nav/div[1]') # /html/body/div/div[2]/div/div[2]/div/
# item = driver.find_element(by=By.LINK_TEXT, value='>').get_attribute('href') #/html/body/div/div[2]/div/div[2]/div
#
# if item == "":
# raise NoSuchElementException
# else:
# counter += 1
#
# except NoSuchElementException:
# has_next_topic_page = False
#
# # end of loop
# for i in range(counter):
# driver.back()
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
topics = topicPages(html)
for topic in topics:
has_next_topic_page = True
counter = 1
page = topic
while has_next_topic_page:
itemURL = urlparse.urljoin(baseURL, str(page))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:
break
try:
page = "" # no next page so far may have some later on
if page == "":
raise NoSuchElementException
counter += 1
except NoSuchElementException:
has_next_topic_page = False
for i in range(counter):
driver.back()
# comment out
break
# comment out
if count == 1:
count = 0
break
try:
# temp = driver.find_element(by=By.XPATH, value = '/html/body/div[2]/div[4]/div/div[5]/div[2]/div/div/div[1]/div/nav/div[1]')
link = driver.find_element(by=By.LINK_TEXT, value='>').get_attribute('href')
if link == "":
raise NoSuchElementException
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
count += 1
except NoSuchElementException:
@ -299,10 +275,7 @@ def crawlForum(driver):
print(link, e)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling Libre forum done successfully. Press ENTER to continue\n")
input("Crawling Libre done successfully. Press ENTER to continue\n")
# Returns 'True' if the link is Topic link, may need to change for every website


+ 28
- 53
Forums/OnniForums/crawler_selenium.py View File

@ -214,92 +214,71 @@ def getInterestedLinks():
def crawlForum(driver):
print("Crawling the OnniForums forum")
print("Crawling the OnniForums")
linksToCrawl = getInterestedLinks()
visited = set(linksToCrawl)
initialTime = time.time()
i = 0
count = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
count = 0
while has_next_page:
list = topicPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
#next page for topic
# variable to check if there is a next page for the topic
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
topics = topicPages(html)
for topic in topics:
has_next_topic_page = True
counter = 1
page = topic
# check if there is a next page for the topics
while has_next_topic_page:
# try to access next page of th topic
itemURL = urlparse.urljoin(baseURL, str(item))
itemURL = urlparse.urljoin(baseURL, str(page))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
savePage(driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:
break
# if there is a next page then go and save....
# next page in the topic?
try:
temp = driver.find_element(By.XPATH,
'/html/body/div/div[2]/div/div[3]/div') # /html/body/div/div[2]/div/div[2]/div/
item = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute(
'href') # /html/body/div/div[2]/div/div[2]/div
temp = driver.find_element(By.XPATH,'/html/body/div/div[2]/div/div[3]/div') # /html/body/div/div[2]/div/div[2]/div/
page = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href') # /html/body/div/div[2]/div/div[2]/div
if item == "":
if page == "":
raise NoSuchElementException
has_next_topic_page = False
else:
counter += 1
counter += 1
except NoSuchElementException:
has_next_topic_page = False
# end of loop
for i in range(counter):
driver.back()
# comment out, one topic per page
# comment out
break
# comment out, go through all pages
# comment out
if count == 1:
count = 0
break
try:
temp = driver.find_element(by=By.XPATH, value=
'/html/body/div/div[2]/div/div[3]/div') # /html/body/div/div[2]/div/div[3]/div
temp = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/div[3]/div') # /html/body/div/div[2]/div/div[3]/div
link = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')
if link == "":
raise NoSuchElementException
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
count += 1
except NoSuchElementException:
@ -309,11 +288,7 @@ def crawlForum(driver):
print(link, e)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling OnniForums forum done sucessfully. Press ENTER to continue\n")
input("Crawling OnniForums done successfully. Press ENTER to continue\n")
# Returns 'True' if the link is Topic link


+ 33
- 49
Forums/Procrax/crawler_selenium.py View File

@ -202,83 +202,70 @@ def getInterestedLinks():
def crawlForum(driver):
print("Crawling the Procrax forum")
print("Crawling the Procrax")
linksToCrawl = getInterestedLinks()
visited = set(linksToCrawl)
initialTime = time.time()
i = 0
count = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)# open
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
count = 0
#loop through the topics
while has_next_page:
list = topicPages(html)# for multiple pages
for item in list:
#variable to check if there is a next page for the topic
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
topics = topicPages(html)
for topic in topics:
has_next_topic_page = True
counter = 1
page = topic
# check if there is a next page for the topics
while has_next_topic_page:
# try to access next page of th topic
itemURL = urlparse.urljoin(baseURL, str(item))
itemURL = urlparse.urljoin(baseURL, str(page))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
savePage(driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:
break
# if there is a next page then go and save....
# specific
try:
# temp = driver.find_element(By.XPATH, value='/html/body/div[1]/div[3]/div[2]/div[3]/div/div')
item = driver.find_element(By.LINK_TEXT, value='Next').get_attribute('href')
page = driver.find_element(By.LINK_TEXT, value='Next').get_attribute('href')
if item == "":
if page == "":
raise NoSuchElementException
has_next_topic_page = False
else:
counter += 1
counter += 1
except NoSuchElementException:
has_next_topic_page = False
#end of loop
for i in range(counter):
driver.back()
# # comment out
# break
#
# # comment out
# if count == 1:
# count = 0
# break
try:# change depending on web page, #general
# /html/body/div[1]/div[3]/div[2]/div[3]/div/div/div/div[1]/div/nav/div[1]
# temp = driver.find_element(By.XPATH, value='/html/body/div[1]/div[3]/div[2]/div[3]/div/div/div/div[1]/div/nav/div[1]')
# comment out
break
# comment out
if count == 1:
break
try:
link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
if link == "":
raise NoSuchElementException
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
count += 1
except NoSuchElementException:
@ -288,10 +275,7 @@ def crawlForum(driver):
print(link, e)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling Procrax forum done successfully. Press ENTER to continue\n")
input("Crawling Procrax done successfully. Press ENTER to continue\n")
# Returns 'True' if the link is Topic link, may need to change for every website


+ 15
- 14
MarketPlaces/AnonymousMarketplace/crawler_selenium.py View File

@ -32,19 +32,19 @@ baseURL = 'http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion
# Opens Tor Browser, crawls the website, then parses, then closes tor
#acts like the main method for the crawler, another function at the end of this code calls this function later
def startCrawling():
# opentor()
opentor()
mktName = getMKTName()
# driver = getAccess()
driver = getAccess()
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
new_parse(mktName, baseURL, False)
# new_parse(mktName, baseURL, False)
# Opens Tor Browser
@ -188,9 +188,9 @@ def getInterestedLinks():
# carding
links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/carding/')
# # hacked paypal
# links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacked-paypal-accounts/')
links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacked-paypal-accounts/')
# # hacking services
# links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacking-services/')
links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacking-services/')
return links
@ -238,6 +238,7 @@ def crawlForum(driver):
# count = 0
# break
#left in in case site changes
try:
link = ""
if link == "":
@ -267,7 +268,7 @@ def crawlForum(driver):
#@param: url of any url crawled
#return: true if is a description page, false if not
def isDescriptionLink(url):
if 'product/' in url:
if '/product/' in url:
return True
return False
@ -276,7 +277,7 @@ def isDescriptionLink(url):
#@param: url of any url crawled
#return: true if is a Listing page, false if not
def isListingLink(url):
if 'product-' in url:
if 'category' in url:
return True
return False


+ 1
- 0
MarketPlaces/AnonymousMarketplace/parser.py View File

@ -171,6 +171,7 @@ def anonymous_links_parser(soup):
for a in listing:
bae = a.find('a', {"class": "woocommerce-LoopProduct-link woocommerce-loop-product__link"}, href=True)
link = bae['href']
href.append(link)

+ 911
- 863
MarketPlaces/Initialization/geckodriver.log
File diff suppressed because it is too large
View File


+ 15
- 9
MarketPlaces/Initialization/prepare_parser.py View File

@ -10,6 +10,8 @@ from MarketPlaces.Tor2door.parser import *
from MarketPlaces.Apocalypse.parser import *
from MarketPlaces.ThiefWorld.parser import *
from MarketPlaces.AnonymousMarketplace.parser import *
from MarketPlaces.TorBay.parser import *
from MarketPlaces.M00nkeyMarket.parser import *
from MarketPlaces.Classifier.classify_product import predict
@ -148,15 +150,18 @@ def new_parse(marketPlace, url, createLog):
rmm = thiefWorld_description_parser(soup)
elif marketPlace =="AnonymousMarketplace":
rmm = anonymousMarketplace_description_parser(soup)
print(rmm)
elif marketPlace == "TorBay":
rmm = torbay_description_parser(soup)
elif marketPlace == "M00nkeyMarket":
rmm = m00nkey_description_parser(soup)
# key = u"Pr:" + rmm[0].upper()[:desc_lim1] + u" Vendor:" + rmm[13].upper()[:desc_lim2]
key = u"Url:" + os.path.basename(line2).replace(".html", "")
# save file address with description record in memory
detPage[key] = {'rmm': rmm, 'filename': os.path.basename(line2)}
except :
except:
nError += 1
print("There was a problem to parse the file " + line2 + " in the Description section!")
@ -188,7 +193,7 @@ def new_parse(marketPlace, url, createLog):
readError = True
if not readError:
print("Hello!")
parseError = False
try:
@ -201,12 +206,15 @@ def new_parse(marketPlace, url, createLog):
elif marketPlace == "ThiefWorld":
rw = thiefWorld_listing_parser(soup)
elif marketPlace == "AnonymousMarketplace":
rw = anonymousMarketplace_listing_parser(soup)
rw = anonymousMarketplace_listing_parser(soup)
elif marketPlace == "TorBay":
rw = torbay_listing_parser(soup)
elif marketPlace == "M00nkeyMarket":
rw = m00nkey_listing_parser(soup)
else:
parseError = True
except Exception as e:
raise e
except:
nError += 1
print("There was a problem to parse the file " + line1 + " in the listing section!")
@ -225,7 +233,6 @@ def new_parse(marketPlace, url, createLog):
for rec in rw:
rec = rec.split(',')
print(rec)
# if len(detPage) > 0: #It was created here just because Zeroday Market does not have Description Pages
# key = rec[23]
@ -233,7 +240,6 @@ def new_parse(marketPlace, url, createLog):
# key = u"Pr:" + rec[1].upper()[:list_lim1] + u" Vendor:" + rec[18].upper()[:list_lim2]
key = u"Url:" + cleanLink(rec[20])
print(key)
# if the associated description page is parsed
if key in detPage:


+ 16
- 16
MarketPlaces/M00nkeyMarket/crawler_selenium.py View File

@ -33,19 +33,19 @@ baseURL = 'http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion
# Opens Tor Browser, crawls the website, then parses, then closes tor
#acts like the main method for the crawler, another function at the end of this code calls this function later
def startCrawling():
opentor()
# mktName = getMKTName()
driver = getAccess()
# opentor()
mktName = getMKTName()
# driver = getAccess()
#
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# new_parse(forumName, baseURL, False)
new_parse(mktName, baseURL, False)
# Opens Tor Browser
@ -246,12 +246,12 @@ def crawlForum(driver):
driver.back()
# comment out
break
# break
# comment out
# if count == 1:
# count = 0
# break
if count == 1:
count = 0
break
try:
link = driver.find_element(by=By.LINK_TEXT, value='Next ›').get_attribute('href')


+ 198
- 239
MarketPlaces/M00nkeyMarket/parser.py View File

@ -1,4 +1,4 @@
__author__ = 'DarkWeb'
__author__ = 'Helium'
# Here, we are importing the auxiliary functions to clean or convert data
from MarketPlaces.Utilities.utilities import *
@ -11,133 +11,132 @@ from bs4 import BeautifulSoup
#stores info it needs in different lists, these lists are returned after being organized
#@param: soup object looking at html page of description page
#return: 'row' that contains a variety of lists that each hold info on the description page
def darkfox_description_parser(soup):
def m00nkey_description_parser(soup):
# Fields to be parsed
name = "-1" # 0 Product_Name
describe = "-1" # 1 Product_Description
lastSeen = "-1" # 2 Product_LastViewDate
rules = "-1" # 3 NOT USED ...
CVE = "-1" # 4 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = "-1" # 5 Product_MS_Classification (Microsoft Security)
review = "-1" # 6 Product_Number_Of_Reviews
category = "-1" # 7 Product_Category
shipFrom = "-1" # 8 Product_ShippedFrom
shipTo = "-1" # 9 Product_ShippedTo
left = "-1" # 10 Product_QuantityLeft
escrow = "-1" # 11 Vendor_Warranty
terms = "-1" # 12 Vendor_TermsAndConditions
vendor = "-1" # 13 Vendor_Name
sold = "-1" # 14 Product_QuantitySold
addDate = "-1" # 15 Product_AddedDate
available = "-1" # 16 NOT USED ...
endDate = "-1" # 17 NOT USED ...
BTC = "-1" # 18 Product_BTC_SellingPrice
USD = "-1" # 19 Product_USD_SellingPrice
rating = "-1" # 20 Vendor_Rating
success = "-1" # 21 Vendor_Successful_Transactions
EURO = "-1" # 22 Product_EURO_SellingPrice
# Finding Product Name
name = soup.find('h1').text
name = name.replace('\n', ' ')
name = name.replace(",", "")
name = name.strip()
# Finding Vendor
vendor = soup.find('h3').find('a').text.strip()
# Finding Vendor Rating
rating = soup.find('span', {'class': "tag is-dark"}).text.strip()
# Finding Successful Transactions
success = soup.find('h3').text
success = success.replace("Vendor: ", "")
success = success.replace(vendor, "")
success = success.replace("(", "")
success = success.replace(")", "")
success = success.strip()
bae = soup.find('div', {'class': "box"}).find_all('ul')
# Finding Prices
USD = bae[1].find('strong').text.strip()
li = bae[2].find_all('li')
# Finding Escrow
escrow = li[0].find('span', {'class': "tag is-dark"}).text.strip()
# Finding the Product Category
category = li[1].find('span', {'class': "tag is-dark"}).text.strip()
# Finding the Product Quantity Available
left = li[3].find('span', {'class': "tag is-dark"}).text.strip()
# Finding Number Sold
sold = li[4].find('span', {'class': "tag is-dark"}).text.strip()
li = bae[3].find_all('li')
# Finding Shipment Information (Origin)
if "Ships from:" in li[-2].text:
shipFrom = li[-2].text
shipFrom = shipFrom.replace("Ships from: ", "")
# shipFrom = shipFrom.replace(",", "")
shipFrom = shipFrom.strip()
# Finding Shipment Information (Destination)
shipTo = li[-1].find('div', {'title': "List of countries is scrollable"}).text
shipTo = shipTo.replace("Ships to: ", "")
shipTo = shipTo.strip()
if "certain countries" in shipTo:
countries = ""
tags = li[-1].find_all('span', {'class': "tag"})
for tag in tags:
country = tag.text.strip()
countries += country + ", "
shipTo = countries.strip(", ")
# Finding the Product description
describe = soup.find('div', {'class': "pre-line"}).text
describe = describe.replace("\n", " ")
describe = describe.strip()
'''# Finding the Number of Product Reviews
tag = soup.findAll(text=re.compile('Reviews'))
for index in tag:
reviews = index
par = reviews.find('(')
if par >=0:
reviews = reviews.replace("Reviews (","")
reviews = reviews.replace(")","")
reviews = reviews.split(",")
review = str(abs(int(reviews[0])) + abs(int(reviews[1])))
else :
review = "-1"'''
# Searching for CVE and MS categories
cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
if cve:
CVE = " "
for idx in cve:
CVE += (idx)
CVE += " "
CVE = CVE.replace(',', ' ')
CVE = CVE.replace('\n', '')
ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}'))
if ms:
MS = " "
for im in ms:
MS += (im)
MS += " "
MS = MS.replace(',', ' ')
MS = MS.replace('\n', '')
vendor = "-1" # 0 *Vendor_Name
success = "-1" # 1 Vendor_Successful_Transactions
rating_vendor = "-1" # 2 Vendor_Rating
name = "-1" # 3 *Product_Name
describe = "-1" # 4 Product_Description
CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much
MS = "-1" # 6 Product_MS_Classification (Microsoft Security) dont worry about that much
category = "-1" # 7 Product_Category
views = "-1" # 8 Product_Number_Of_Views
reviews = "-1" # 9 Product_Number_Of_Reviews
rating_item = "-1" # 10 Product_Rating
addDate = "-1" # 11 Product_AddedDate
BTC = "-1" # 12 Product_BTC_SellingPrice
USD = "-1" # 13 Product_USD_SellingPrice
EURO = "-1" # 14 Product_EURO_SellingPrice
sold = "-1" # 15 Product_QuantitySold
left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
#vendor name
temp = soup.find('div', {'class': 'box rounded mb-0'}).find('a').text
vendor = (cleanString(temp.strip()))
#successful transaction
temp = soup.findAll('div', {'class','text-center text-truncate column-flex ml-1 mr-1'}) #card sidebar-menu mb-4 card sidebar-menu mb-4
temp2 = temp[1].findAll('span', {'class', 'float-right font-weight-bold'})
temp = temp2[1].text
success = (cleanString(temp.strip()))
#vendor rating 5
temp = soup.findAll('div', {'class', 'text-center text-truncate column-flex ml-1 mr-1'}) # card sidebar-menu mb-4 card sidebar-menu mb-4
temp2 = temp[1].findAll('span', {'class', 'float-right font-weight-bold'})
temp = temp2[5].text
rating_vendor = (cleanString(temp.strip()))
# product name
temp = soup.find('h3', {'class', 'h3 rounded card-title'}).find('span').text
name = (cleanString(temp.strip()))
# product description
describe = soup.find('div', {'class': "box rounded flex-fill"}).find('pre').text
if "\n" in describe:
describe = describe.replace("\n", " ")
describe = describe.replace("\r", " ")
describe = cleanString(describe.strip())
CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much
MS = "-1" # 6 Product_MS_Classification (Microsoft Security) dont worry about that much
# product category
try:
temp = soup.findAll('table', {'class', 'table table-hover'})
temp2 = temp[1].find('tr').findAll('td')
temp = temp2[1].text
category = cleanString(temp.strip())
except:
temp = soup.find('table', {'class', 'table table-hover'})
temp2 = temp.find('tbody').find('tr').findAll('td')
temp = temp2[1].text
category = cleanString(temp.strip())
# product number of view
try:
temp = soup.find('div', {'class', 'box rounded mb-0'})
temp2 = temp.findAll('i')
temp = temp2[2].text
views = cleanString((temp.strip()))
except:
print('Product number of view')
# views = "-1"
reviews = "-1" # 9 Product_Number_Of_Reviews
rating_item = "-1" # 10 Product_Rating
addDate = "-1" # 11 Product_AddedDate
#BTC selling price box box-rounded mt-2
temp = soup.find('div', {'class', 'box box-rounded mt-2'})
temp2 = temp.findAll('i', {'class', 'float-right color-prices'})
temp = temp2[1].text
BTC = cleanString((temp.strip()))
# USD selling price
temp = soup.find('div', {'class', 'box box-rounded mt-2'})
temp2 = temp.findAll('center')
temp = temp2[1].find('i').text
if "$" in temp:
temp = temp.replace("$", "")
USD = cleanString((temp.strip()))
EURO = "-1" # 14 Product_EURO_SellingPrice
# product sold
temp = soup.find('div', {'class', 'box rounded mb-0'}) # card sidebar-menu mb-4 card sidebar-menu mb-4
temp2 = temp.find('i')
temp = temp2.text
sold = (cleanString(temp.strip()))
# sold = "-1"
# product quantatiy left ###ERRROR
try:
temp = soup.findAll('table', {'class', 'table table-hover'})
temp2 = temp[1].findAll('tr')
temp3 = temp2[1].findAll('td')
temp = temp3[1].text
left = cleanString(temp.strip())
except:
temp = soup.find('table', {'class', 'table table-hover'})
temp2 = temp.findAll('tr')
temp3 = temp2[1].findAll('td')
temp = temp3[1].text
left = cleanString(temp.strip())
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
# Populating the final variable (this should be a list with all fields scraped)
row = (name, describe, lastSeen, rules, CVE, MS, review, category, shipFrom, shipTo, left, escrow, terms, vendor,
sold, addDate, available, endDate, BTC, USD, rating, success, EURO)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
# Sending the results
return row
@ -147,131 +146,91 @@ def darkfox_description_parser(soup):
#stores info it needs in different lists, these lists are returned after being organized
#@param: soup object looking at html page of listing page
#return: 'row' that contains a variety of lists that each hold info on the listing page
def darkfox_listing_parser(soup):
def m00nkey_listing_parser(soup):
# Fields to be parsed
nm = 0 # Total_Products (Should be Integer)
mktName = "DarkFox" # 0 Marketplace_Name
name = [] # 1 Product_Name
CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = [] # 3 Product_MS_Classification (Microsoft Security)
category = [] # 4 Product_Category
describe = [] # 5 Product_Description
escrow = [] # 6 Vendor_Warranty
views = [] # 7 Product_Number_Of_Views
reviews = [] # 8 Product_Number_Of_Reviews
addDate = [] # 9 Product_AddDate
lastSeen = [] # 10 Product_LastViewDate
BTC = [] # 11 Product_BTC_SellingPrice
USD = [] # 12 Product_USD_SellingPrice
EURO = [] # 13 Product_EURO_SellingPrice
sold = [] # 14 Product_QuantitySold
qLeft =[] # 15 Product_QuantityLeft
shipFrom = [] # 16 Product_ShippedFrom
shipTo = [] # 17 Product_ShippedTo
vendor = [] # 18 Vendor
rating = [] # 19 Vendor_Rating
success = [] # 20 Vendor_Successful_Transactions
href = [] # 23 Product_Links (Urls)
listing = soup.findAll('div', {"class": "card"})
nm = 0 # *Total_Products (Should be Integer)
mktName = "M00nkeyMarket" # 0 *Marketplace_Name
vendor = [] # 1 *Vendor y
rating_vendor = [] # 2 Vendor_Rating
success = [] # 3 Vendor_Successful_Transactions
name = [] # 4 *Product_Name y
CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this
MS = [] # 6 Product_MS_Classification (Microsoft Security) dont worry about this
category = [] # 7 Product_Category y
describe = [] # 8 Product_Description
views = [] # 9 Product_Number_Of_Views
reviews = [] # 10 Product_Number_Of_Reviews
rating_item = [] # 11 Product_Rating
addDate = [] # 12 Product_AddDate
BTC = [] # 13 Product_BTC_SellingPrice
USD = [] # 14 Product_USD_SellingPrice y
EURO = [] # 15 Product_EURO_SellingPrice
sold = [] # 16 Product_QuantitySold
qLeft = [] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
listing = soup.findAll('div', {"class": "card mt-1"})
# Populating the Number of Products
nm = len(listing)
for a in listing:
bae = a.findAll('a', href=True)
# Adding the url to the list of urls
link = bae[0].get('href')
link = cleanLink(link)
href.append(link)
# vendor
try:
temp = a.find('col-5 justify-content-between mx-auto').find('a').text
vendor.append(cleanString(temp.strip()))
except:
print('vendor')
#vendor rating
#successful transactions
try:
temp = a.find('col-5 justify-content-between mx-auto').find('div').text
success.append(cleanString(temp.strip()))
except:
print('successful transactions')
# product name
try:
temp = a.find('card-title rounded text-truncate').find('a').text
name.append(cleanString(temp.strip()))
except:
print('product name')
CVE.append('-1')
MS.append('-1')
rating_vendor.append("-1")
try:
temp = a.findAll('btn btn-block btn-primary')
except:
print("Error in product category")
category = [] # 7 Product_Category y
describe = [] # 8 Product_Description
views = [] # 9 Product_Number_Of_Views
reviews = [] # 10 Product_Number_Of_Reviews
rating_item = [] # 11 Product_Rating
addDate = [] # 12 Product_AddDate
BTC = [] # 13 Product_BTC_SellingPrice
USD = [] # 14 Product_USD_SellingPrice y
EURO = [] # 15 Product_EURO_SellingPrice
sold = [] # 16 Product_QuantitySold
qLeft = [] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
# Finding the Product
product = bae[1].find('p').text
product = product.replace('\n', ' ')
product = product.replace(",", "")
product = product.replace("...", "")
product = product.strip()
name.append(product)
bae = a.find('div', {'class': "media-content"}).find('div').find_all('div')
if len(bae) >= 5:
# Finding Prices
price = bae[0].text
ud = price.replace(" USD", " ")
# u = ud.replace("$","")
u = ud.replace(",", "")
u = u.strip()
USD.append(u)
# bc = (prc[1]).strip(' BTC')
# BTC.append(bc)
# Finding the Vendor
vendor_name = bae[1].find('a').text
vendor_name = vendor_name.replace(",", "")
vendor_name = vendor_name.strip()
vendor.append(vendor_name)
# Finding the Category
cat = bae[2].find('small').text
cat = cat.replace("Category: ", "")
cat = cat.replace(",", "")
cat = cat.strip()
category.append(cat)
# Finding Number Sold and Quantity Left
num = bae[3].text
num = num.replace("Sold: ", "")
num = num.strip()
sold.append(num)
quant = bae[4].find('small').text
quant = quant.replace("In stock: ", "")
quant = quant.strip()
qLeft.append(quant)
# Finding Successful Transactions
freq = bae[1].text
freq = freq.replace(vendor_name, "")
freq = re.sub(r'Vendor Level \d+', "", freq)
freq = freq.replace("(", "")
freq = freq.replace(")", "")
freq = freq.strip()
success.append(freq)
# Searching for CVE and MS categories
cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
if not cve:
cveValue="-1"
else:
cee = " "
for idx in cve:
cee += (idx)
cee += " "
cee = cee.replace(',', ' ')
cee = cee.replace('\n', '')
cveValue=cee
CVE.append(cveValue)
ms = a.findAll(text=re.compile('MS\d{2}-\d{3}'))
if not ms:
MSValue="-1"
else:
me = " "
for im in ms:
me += (im)
me += " "
me = me.replace(',', ' ')
me = me.replace('\n', '')
MSValue=me
MS.append(MSValue)
# Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen,
BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
#called by the crawler to get description links on a listing page
#@param: beautifulsoup object that is using the correct html page (listing page)


+ 2
- 2
MarketPlaces/Tor2door/crawler_selenium.py View File

@ -30,7 +30,7 @@ baseURL = 'http://yzrrne3pveltulbavydr2kiashvlnysdwclwmklo6cyjuqpxi7ku4xqd.onion
# Opens Tor Browser, crawls the website
def startCrawling():
opentor()
# marketName = getMarketName()
# marketName = getMKTName()
driver = getAccess()
if driver != 'down':
@ -105,7 +105,7 @@ def login(driver):
# Returns the name of the website
def getMarketName():
def getMKTName():
name = 'Tor2door'
return name


+ 12
- 12
MarketPlaces/TorBay/crawler_selenium.py View File

@ -32,19 +32,19 @@ baseURL = 'http://torbay3253zck4ym5cbowwvrbfjjzruzthrx3np5y6owvifrnhy5ybid.onion
# Opens Tor Browser, crawls the website, then parses, then closes tor
#acts like the main method for the crawler, another function at the end of this code calls this function later
def startCrawling():
opentor()
# opentor()
mktName = getMKTName()
driver = getAccess()
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# new_parse(forumName, baseURL, False)
# driver = getAccess()
#
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
#
new_parse(mktName, baseURL, False)
# Opens Tor Browser


+ 80
- 162
MarketPlaces/TorBay/parser.py View File

@ -35,88 +35,51 @@ def torbay_description_parser(soup):
shipTo = "-1" # 18 Product_ShippedTo
# Finding Product Name
name = soup.find('div', {'class': 'product-information'}).find('h1').text.strip()
# Finding Vendor
vendor = soup.find('div', {"class": "profile-info"}).find('a').text.strip()
# Finding Vendor Rating
rating_vendor.append(-1)
# Finding Successful Transactions
success.append(-1)
bae = soup.find('div', {'class': "box"}).find_all('ul')
try:
product_name = soup.find('div', {'class': 'product-information'}).find('h1').text
name = cleanString(product_name.strip())
except:
try:
product_name = soup.find('div', {'class': 'profile-info'}).find('h2').text
name = cleanString(product_name.strip())
except:
# print(e)
print("product name")
# Finding Vendor FIx
try:
vendor_name = soup.find('div', {"class": "profile-info"}).find('h2').text
vendor = cleanString(vendor_name.strip())
except:
print("description vendor name failed\n")
# Finding Prices
USD = soup.find('div', {'class': "total-price"}).find('span').text.strip()
try:
USD = soup.find('div', {'class': "total-price"}).find('span').text.strip()
except:
print("description price failed\n")
# Finding the Product Category
category = soup.find('div', {'class': "profile-info"}).find('p').find('a').text.strip()
# Finding the Product Quantity Available
left.append(-1)
# Finding Number Sold
sold.append(-1)
li = bae[3].find_all('li')
# Finding Shipment Information (Origin)
if "Ships from:" in li[-2].text:
shipFrom = li[-2].text
shipFrom = shipFrom.replace("Ships from: ", "")
# shipFrom = shipFrom.replace(",", "")
shipFrom = shipFrom.strip()
# Finding Shipment Information (Destination)
shipTo = li[-1].find('div', {'title': "List of countries is scrollable"}).text
shipTo = shipTo.replace("Ships to: ", "")
shipTo = shipTo.strip()
if "certain countries" in shipTo:
countries = ""
tags = li[-1].find_all('span', {'class': "tag"})
for tag in tags:
country = tag.text.strip()
countries += country + ", "
shipTo = countries.strip(", ")
try:
cat = soup.find('div', {'class': "profile-info"}).find('p').text
category = cleanString(cat.strip())
except:
print("description product category failed")
# Finding the Product description
describe = soup.find('div', {'class': "pre-line"}).text
describe = describe.replace("\n", " ")
describe = describe.strip()
'''# Finding the Number of Product Reviews
tag = soup.findAll(text=re.compile('Reviews'))
for index in tag:
reviews = index
par = reviews.find('(')
if par >=0:
reviews = reviews.replace("Reviews (","")
reviews = reviews.replace(")","")
reviews = reviews.split(",")
review = str(abs(int(reviews[0])) + abs(int(reviews[1])))
else :
review = "-1"'''
# Searching for CVE and MS categories
cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
if cve:
CVE = " "
for idx in cve:
CVE += (idx)
CVE += " "
CVE = CVE.replace(',', ' ')
CVE = CVE.replace('\n', '')
ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}'))
if ms:
MS = " "
for im in ms:
MS += (im)
MS += " "
MS = MS.replace(',', ' ')
MS = MS.replace('\n', '')
try:
describe = soup.find('div', {'class': "info"}).find('p').text
if "\n" in describe:
describe = describe.replace("\n", " ")
describe = describe.replace("\r", " ")
describe = cleanString(describe.strip())
except:
# print("product desc")
try:
describe = soup.find('div', {'class': 'info'}).text
describe = cleanString(describe.strip())
except:
print("Product description")
# Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
@ -162,93 +125,48 @@ def torbay_listing_parser(soup):
nm = len(listing)
for a in listing:
bae = a.findAll('a', href=True)
# Adding the url to the list of urls
link = bae[0].get('href')
link = cleanLink(link)
href.append(link)
# Finding the Product
product = bae[1].find('p').text
product = product.replace('\n', ' ')
product = product.replace(",", "")
product = product.replace("...", "")
product = product.strip()
name.append(product)
bae = a.find('div', {'class': "media-content"}).find('div').find_all('div')
if len(bae) >= 5:
# Finding Prices
price = bae[0].text
ud = price.replace(" USD", " ")
# u = ud.replace("$","")
u = ud.replace(",", "")
u = u.strip()
USD.append(u)
# bc = (prc[1]).strip(' BTC')
# BTC.append(bc)
# Finding the Vendor
vendor_name = bae[1].find('a').text
vendor_name = vendor_name.replace(",", "")
vendor_name = vendor_name.strip()
vendor.append(vendor_name)
# Finding the Category
cat = bae[2].find('small').text
cat = cat.replace("Category: ", "")
cat = cat.replace(",", "")
cat = cat.strip()
category.append(cat)
# Finding Number Sold and Quantity Left
num = bae[3].text
num = num.replace("Sold: ", "")
num = num.strip()
sold.append(num)
quant = bae[4].find('small').text
quant = quant.replace("In stock: ", "")
quant = quant.strip()
qLeft.append(quant)
# Finding Successful Transactions
freq = bae[1].text
freq = freq.replace(vendor_name, "")
freq = re.sub(r'Vendor Level \d+', "", freq)
freq = freq.replace("(", "")
freq = freq.replace(")", "")
freq = freq.strip()
success.append(freq)
# Searching for CVE and MS categories
cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
if not cve:
cveValue="-1"
else:
cee = " "
for idx in cve:
cee += (idx)
cee += " "
cee = cee.replace(',', ' ')
cee = cee.replace('\n', '')
cveValue=cee
CVE.append(cveValue)
ms = a.findAll(text=re.compile('MS\d{2}-\d{3}'))
if not ms:
MSValue="-1"
else:
me = " "
for im in ms:
me += (im)
me += " "
me = me.replace(',', ' ')
me = me.replace('\n', '')
MSValue=me
MS.append(MSValue)
try:
product_name = a.find('p', {'class': 'name'}).text
name.append(cleanString(product_name.strip()))
except:
print("product name")
try:
prod = a.find('p', {'class': 'price'}).text # price
USD.append(cleanString(prod.strip()))
except:
print("USD")
try:
ven = a.find('div', {'class': 'pc-footer'}).find('div').find('a').text # pc-footer
vendor.append(cleanString(ven.strip()))
# print(ven)
except:
print("vendor")
try:
h = a.find('p', {'class': 'name'}).find('a').get('href')
href.append(h)
except:
print("in href")
CVE.append("-1")
MS.append("-1")
rating_vendor.append("-1")
success.append("-1")
describe.append("-1")
views.append("-1")
reviews.append("-1")
rating_item.append("-1")
addDate.append("-1")
BTC.append("-1")
EURO.append("-1")
sold.append("-1")
qLeft.append("-1")
shipFrom.append("-1")
shipTo.append("-1")
category.append("Hacking")
# Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,


Loading…
Cancel
Save