Browse Source

need to test DarkMatter scraper, tentatively finished

main
Helium 1 year ago
parent
commit
ccf256762d
11 changed files with 506 additions and 3840 deletions
  1. +1
    -1
      .idea/DW_Pipeline_Test.iml
  2. +1
    -1
      .idea/misc.xml
  3. +3
    -0
      Forums/Cardingleaks/crawler_selenium.py
  4. +13
    -3687
      Forums/Initialization/geckodriver.log
  5. +29
    -25
      MarketPlaces/DarkMatter/crawler_selenium.py
  6. +166
    -112
      MarketPlaces/DarkMatter/parser.py
  7. +272
    -0
      MarketPlaces/Initialization/geckodriver.log
  8. +1
    -1
      MarketPlaces/Initialization/marketsList.txt
  9. +10
    -2
      MarketPlaces/Initialization/prepare_parser.py
  10. +10
    -10
      MarketPlaces/M00nkeyMarket/crawler_selenium.py
  11. +0
    -1
      MarketPlaces/TorBay/parser.py

+ 1
- 1
.idea/DW_Pipeline_Test.iml View File

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="jdk" jdkName="C:\Users\Helium\anaconda3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">


+ 1
- 1
.idea/misc.xml View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="C:\Users\John Wick\anaconda3" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="C:\Users\Helium\anaconda3" project-jdk-type="Python SDK" />
</project>

+ 3
- 0
Forums/Cardingleaks/crawler_selenium.py View File

@ -3,6 +3,9 @@ __author__ = 'DarkWeb'
'''
Cardingleaks Forum Crawler (Selenium)
Crawler updated and fixed
The site has this thing sometime whereyou'll have to look at a new post everyday. makes sure
you login first before crawling.
'''
from selenium import webdriver


+ 13
- 3687
Forums/Initialization/geckodriver.log
File diff suppressed because it is too large
View File


+ 29
- 25
MarketPlaces/DarkMatter/crawler_selenium.py View File

@ -2,8 +2,7 @@ __author__ = 'Helium'
'''
DarkMatter Marketplace Crawler (Selenium)
website has connection issues
not working still trying to debug
Crawler works, but it slow since there is a speed check for clicking
'''
from selenium import webdriver
@ -33,19 +32,19 @@ baseURL = 'http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion
# Opens Tor Browser, crawls the website, then parses, then closes tor
#acts like the main method for the crawler, another function at the end of this code calls this function later
def startCrawling():
opentor()
# mktName = getMKTName()
driver = getAccess()
# opentor()
mktName = getMKTName()
# driver = getAccess()
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
# new_parse(forumName, baseURL, False)
new_parse(mktName, baseURL, False)
# Opens Tor Browser
@ -142,7 +141,7 @@ def getAccess():
# then allows for manual solving of captcha in the terminal
#@param: current selenium web driver
def login(driver):
input("Press ENTER when CAPTCHA is completed\n")
input("Press ENTER when CAPTCHA is completed and page is loaded\n")
# wait for page to show up (This Xpath may need to change based on different seed url)
# Saves the crawled html page, makes the directory path for html pages if not made
@ -186,8 +185,10 @@ def getNameFromURL(url):
def getInterestedLinks():
links = []
# digital
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=73')
# digital fraud software
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=76')
# legit
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=78')
# # hack guides
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=94')
# # services
@ -226,23 +227,26 @@ def crawlForum(driver):
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
time.sleep(1.5) # to keep from detecting click speed
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
time.sleep(1.5)
driver.back()
# to keep from detecting click speed
# comment out
break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try:
nav = driver.find_element(by=By.XPATH, value='/html/body/table[1]/tbody/tr/td/form/div/div[2]/table[2]')
a = nav.find_element(by=By.LINK_TEXT, value=">")
link = a.get_attribute('href')
# nav = driver.find_element(by=By.XPATH, value='/html/body/table[1]/tbody/tr/td/form/div/div[2]/table[2]')
# a = nav.find_element(by=By.LINK_TEXT, value=">")
link = driver.find_element(by=By.LINK_TEXT, value=">").get_attribute('href')
if link == "":
raise NoSuchElementException
count += 1


+ 166
- 112
MarketPlaces/DarkMatter/parser.py View File

@ -35,14 +35,24 @@ def darkmatter_description_parser(soup):
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
vendor = "-1" # 0 *Vendor_Name
success = "-1" # 1 Vendor_Successful_Transactions
rating_vendor = "-1" # 2 Vendor_Rating
# 0 *Vendor_Name
try:
temp = soup.find('table', {'class', 'vtable'})
temp = temp.findAll('tr')
temp2 = temp[3].find('a').text
name = cleanString(temp2.strip())
except:
try:
temp = soup.find('table', {'class', 'vtable'})
temp = temp.findAll('tr')
temp2 = temp[4].find('a').text
name = cleanString(temp2.strip())
except:
print("vendor")
# product name
try:
name = soup.find('head').find('title').text
name = soup.find('div', {'class', 'title-h2'}).text
name = cleanString(name.strip())
except:
print("name")
@ -55,22 +65,54 @@ def darkmatter_description_parser(soup):
except:
print("description")
#product category
try:
temp = soup.find('table', {'class', 'vtable'})
temp = temp.findAll('tr')
temp2 = temp[4].find('th').text
temp2 = cleanString(temp2)
if (temp2 == "Category"):
temp2 = temp[4].find('a').text
category = cleanString(temp2.strip())
except:
try:
temp = soup.find('table', {'class', 'vtable'})
temp = temp.findAll('tr')
temp2 = temp[5].find('th').text
temp2 = cleanString(temp2.strip)
if (temp2 == "Category"):
temp2 = temp[5].find('a').text
category = cleanString(temp2.strip())
except:
print('category')
# usd
try:
temp = soup.find('table', {'class', 'vtable'})
temp = temp.findAll('tr')
temp2 = temp[1].find('td').text
temp2 = temp2.replace(' USD', '')
USD = cleanString(temp2)
except:
print('USD')
# 15 Product_QuantitySold
try:
temp = soup.find('table', {'class', 'vtable'})
temp = temp.findAll('tr')
temp2 = temp[5].find('th').text
temp2 = cleanString(temp2)
temp3 = temp[6].find('th').text
temp3 = cleanString(temp3)
if (temp2 == "Sold"):
temp2 = temp[5].find('td').text
sold = cleanString(temp2.strip())
elif (temp3 == "Sold"):
temp2 = temp[6].find('td').text
sold = cleanString(temp2.strip())
except:
print('sold')
CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = "-1" # 6 Product_MS_Classification (Microsoft Security)
category = "-1" # 7 Product_Category
views = "-1" # 8 Product_Number_Of_Views
reviews = "-1" # 9 Product_Number_Of_Reviews
rating_item = "-1" # 10 Product_Rating
addDate = "-1" # 11 Product_AddedDate
BTC = "-1" # 12 Product_BTC_SellingPrice
USD = "-1" # 13 Product_USD_SellingPrice
EURO = "-1" # 14 Product_EURO_SellingPrice
sold = "-1" # 15 Product_QuantitySold
left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
# Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
@ -98,6 +140,7 @@ def darkmatter_listing_parser(soup):
views = [] # 7 Product_Number_Of_Views
reviews = [] # 8 Product_Number_Of_Reviews
addDate = [] # 9 Product_AddDate
rating_item = [] # 11 Product_Rating
lastSeen = [] # 10 Product_LastViewDate
BTC = [] # 11 Product_BTC_SellingPrice
USD = [] # 12 Product_USD_SellingPrice
@ -111,103 +154,114 @@ def darkmatter_listing_parser(soup):
success = [] # 20 Vendor_Successful_Transactions
href = [] # 23 Product_Links (Urls)
listing = soup.findAll('div', {"class": "card"})
names = soup.find('div', {"class": "content"}).findAll('td', {"class": "lefted", "colspan": "3"})
left = soup.find('div', {"class": "content"}).findAll('table', {"class": "vtable"})
right = soup.find('div', {"class": "content"}).findAll('td', {"class": "vtop centered"})
# vtop centered
count = 0
# Populating the Number of Products
nm = len(listing)
for a in listing:
bae = a.findAll('a', href=True)
# Adding the url to the list of urls
link = bae[0].get('href')
link = cleanLink(link)
href.append(link)
# Finding the Product
product = bae[1].find('p').text
product = product.replace('\n', ' ')
product = product.replace(",", "")
product = product.replace("...", "")
product = product.strip()
name.append(product)
bae = a.find('div', {'class': "media-content"}).find('div').find_all('div')
if len(bae) >= 5:
# Finding Prices
price = bae[0].text
ud = price.replace(" USD", " ")
# u = ud.replace("$","")
u = ud.replace(",", "")
u = u.strip()
USD.append(u)
# bc = (prc[1]).strip(' BTC')
# BTC.append(bc)
# Finding the Vendor
vendor_name = bae[1].find('a').text
vendor_name = vendor_name.replace(",", "")
vendor_name = vendor_name.strip()
vendor.append(vendor_name)
# Finding the Category
cat = bae[2].find('small').text
cat = cat.replace("Category: ", "")
cat = cat.replace(",", "")
cat = cat.strip()
category.append(cat)
# Finding Number Sold and Quantity Left
num = bae[3].text
num = num.replace("Sold: ", "")
num = num.strip()
sold.append(num)
quant = bae[4].find('small').text
quant = quant.replace("In stock: ", "")
quant = quant.strip()
qLeft.append(quant)
# Finding Successful Transactions
freq = bae[1].text
freq = freq.replace(vendor_name, "")
freq = re.sub(r'Vendor Level \d+', "", freq)
freq = freq.replace("(", "")
freq = freq.replace(")", "")
freq = freq.strip()
success.append(freq)
# Searching for CVE and MS categories
cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
if not cve:
cveValue="-1"
else:
cee = " "
for idx in cve:
cee += (idx)
cee += " "
cee = cee.replace(',', ' ')
cee = cee.replace('\n', '')
cveValue=cee
CVE.append(cveValue)
ms = a.findAll(text=re.compile('MS\d{2}-\d{3}'))
if not ms:
MSValue="-1"
else:
me = " "
for im in ms:
me += (im)
me += " "
me = me.replace(',', ' ')
me = me.replace('\n', '')
MSValue=me
MS.append(MSValue)
nm = len(names)
for a in names:
# product name
try:
temp = a.find('a').text
if ("pcs x " in temp):
index = temp.index("pcs x ")
result = temp[index + len("pcs x "):]
name.append(cleanString(result))
elif("pks x " in temp):
index = temp.index("pks x ")
result = temp[index + len("pks x "):]
name.append(cleanString(temp))
except Exception as e:
print("product name", e)
CVE.append("-1")
MS.append("-1")
temp2 = left[count].findAll('tr')
length_2 = len(temp2) - 1
# category
try:
temp = temp2[1].find('td').text
category.append(cleanString(temp.strip()))
except:
print('category')
describe.append("-1")
escrow.append("-1")
views.append("-1")
reviews.append("-1")
addDate.append("-1")
lastSeen.append("-1")
BTC.append("-1")
# usd
try:
temp3 = right[count*2].find('span').text
temp = temp3.replace(' USD', '')
USD.append(cleanString(temp))
except:
print('USD')
EURO.append("-1")
# 14 Product_QuantitySold
try:
temp3 = temp2[length_2].find('th').text
temp3 = cleanString(temp3)
if (temp3 == "Sold:"):
temp = temp2[length_2].find('td').text
sold.append(cleanString(temp.strip()))
else:
sold.append("-1")
except Exception as e:
sold.append("-1")
print('sold', e)
qLeft.append("-1")
shipFrom.append("-1")
# ship to
try:
temp3 = temp2[length_2].find('th').text
temp3 = cleanString(temp3)
if (temp3 == "Ship To:"):
temp = temp2[length_2].find('td').text
shipTo.append(cleanString(temp.strip()))
else:
shipTo.append("-1")
except Exception as e:
shipTo.append("-1")
print('shopto')
# vendor
try:
temp = temp2[0].find('a').text
vendor.append(cleanString(temp.strip()))
except:
print('vendor')
rating.append("-1")
success.append("-1")
try:
temp = a.find('a').get('href')
href.append(temp)
except:
print('href')
count += 1
rating_item.append("-1")
# Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen,
BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href)
return organizeProducts(mktName, nm, vendor, rating, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
#called by the crawler to get description links on a listing page


+ 272
- 0
MarketPlaces/Initialization/geckodriver.log View File

@ -16206,3 +16206,275 @@ unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689741508522 geckodriver INFO Listening on 127.0.0.1:50134
1689741513743 mozrunner::runner INFO Running command: "C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50135" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileD0BVKR"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689741515487 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:50135/devtools/browser/1a9e5574-ca23-42c7-b584-c2a0b50c206b
1689741518138 Marionette INFO Listening on port 50157
1689741518473 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileD0BVKR\thumbnails) because it does not exist
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
SourceActor threw an exception: [Exception... "Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/0fa8124b-e9b3-48d5-af68-c2e26676efa1'" nsresult: "0x805303f4 (<unknown>)" location: "JS frame :: resource://devtools/shared/DevToolsUtils.js :: mainThreadFetch/< :: line 670" data: yes]
Stack: mainThreadFetch/<@resource://devtools/shared/DevToolsUtils.js:670:15
mainThreadFetch@resource://devtools/shared/DevToolsUtils.js:516:10
_fetchURLContents@resource://devtools/server/actors/utils/sources-manager.js:442:22
urlContents@resource://devtools/server/actors/utils/sources-manager.js:406:17
_resurrectSource@resource://devtools/server/actors/thread.js:2142:35
addAllSources@resource://devtools/server/actors/thread.js:1509:14
watch@resource://devtools/server/actors/resources/sources.js:52:17
watchResources@resource://devtools/server/actors/resources/index.js:239:19
_watchTargetResources@resource://devtools/server/actors/targets/target-actor-mixin.js:156:24
addWatcherDataEntry@resource://devtools/server/actors/targets/target-actor-mixin.js:47:20
_addWatcherDataEntry@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:483:24
receiveMessage@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:425:21
Line: 670, column: 0
console.error: ({})
SourceActor threw an exception: [Exception... "Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/b797550c-9d44-408e-b07f-f441e33a0c73'" nsresult: "0x805303f4 (<unknown>)" location: "JS frame :: resource://devtools/shared/DevToolsUtils.js :: mainThreadFetch/< :: line 670" data: yes]
Stack: mainThreadFetch/<@resource://devtools/shared/DevToolsUtils.js:670:15
mainThreadFetch@resource://devtools/shared/DevToolsUtils.js:516:10
_fetchURLContents@resource://devtools/server/actors/utils/sources-manager.js:442:22
urlContents@resource://devtools/server/actors/utils/sources-manager.js:406:17
_resurrectSource@resource://devtools/server/actors/thread.js:2142:35
addAllSources@resource://devtools/server/actors/thread.js:1509:14
watch@resource://devtools/server/actors/resources/sources.js:52:17
watchResources@resource://devtools/server/actors/resources/index.js:239:19
_watchTargetResources@resource://devtools/server/actors/targets/target-actor-mixin.js:156:24
addWatcherDataEntry@resource://devtools/server/actors/targets/target-actor-mixin.js:47:20
_addWatcherDataEntry@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:483:24
receiveMessage@resource://devtools/server/connectors/js-window-actor/DevToolsFrameChild.jsm:425:21
Line: 670, column: 0
console.error: ({})
JavaScript error: resource://devtools/shared/DevToolsUtils.js, line 670: Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/0fa8124b-e9b3-48d5-af68-c2e26676efa1'
JavaScript error: resource://devtools/shared/DevToolsUtils.js, line 670: Failed to open input source 'blob:moz-extension://3064b8c5-bffd-4bf8-b2f1-210b12185538/b797550c-9d44-408e-b07f-f441e33a0c73'
1689741971217 Marionette INFO Stopped listening on port 50151689741971295 mozrunner::runner INFO Running command: "C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50856" "--remote-allow-hosts" "!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resouconsole.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689741972816 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
DevTools listening on ws://localhost:50856/devtools/browser/de05d8f2-77ba-4341-82b2-7f8e417b0b91
1689741976318 Marionette INFO Listening on port 50871
1689741976478 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
1689742170707 Marionette INFO Stopped listening on port 50871
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofilevzgcah\thumbnails) because it does not exist
JavaScript error: chrome://remote/content/marionette/driver.js, line 2326: NS_ERROR_ILLEGAL_VALUE: Component returned failure code: 0x80070057 (NS_ERROR_ILLEGAL_VALUE) [nsIObserverService.removeObserver]
###!!! [Parent][MessageChannel] Error: (msgtype=0x390076,name=PContent::Msg_DestroyBrowsingContextGroup) Closed channel: cannot send/recv
###!!! [Child][MessageChannel] Error: (msgtype=0x23002E,name=PBrowser::Msg___delete__) Channel closing: too late to send/recv, messages will be lost
1689742171623 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689742233303 geckodriver INFO Listening on 127.0.0.1:51281
1689742241232 mozrunner::runner INFO Running command: "C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51282" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileXTza3J"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689742243415 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:51282/devtools/browser/441a67d0-c939-4fe7-8559-dfc69517c130
1689742247608 Marionette INFO Listening on port 51297
1689742247838 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
1689742377914 Marionette INFO Stopped listening on port 51297
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileXTza3J\thumbnails) because it does not exist
1689742380151 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689742746360 geckodriver INFO Listening on 127.0.0.1:51409
1689742749749 mozrunner::runner INFO Running command: "C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51410" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileByrNIs"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689742750949 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:51410/devtools/browser/0ca6717a-cfb2-410b-8fe0-c1507895ffc9
1689742753821 Marionette INFO Listening on port 51415
1689742754141 RemoteAgent WARN TLS certificate errors will be ignored for this session
###!!! [Child][MessageChannel] Error: (msgtype=0x390097,name=PContent::Msg_InitBackground) Channel closing: too late to send/recv, messages will be lost
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
1689742927467 Marionette INFO Stopped listening on port 51415
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileByrNIs\thumbnails) because it does not exist
[Parent 3136, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167
1689742927812 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689742979276 geckodriver INFO Listening on 127.0.0.1:51475
1689742984351 mozrunner::runner INFO Running command: "C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51476" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofile2diYu9"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689742986020 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:51476/devtools/browser/d241f84c-7ffc-4339-a340-9a860079a7eb
1689742990539 Marionette INFO Listening on port 51481
1689742990887 RemoteAgent WARN TLS certificate errors will be ignored for this session
1689743106321 Marionette INFO Stopped listening on port 51481
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofile2diYu9\thumbnails) because it does not exist
[Parent 488, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167
1689743106660 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689743149686 geckodriver INFO Listening on 127.0.0.1:51524
1689743155945 mozrunner::runner INFO Running command: "C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51525" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofile1AJGKJ"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689743157937 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:51525/devtools/browser/05633e0e-f823-4b70-9a91-cc629bf18413
1689743162385 Marionette INFO Listening on port 51530
1689743162597 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
1689743593126 Marionette INFO Stopped listening on port 51530
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofile1AJGKJ\thumbnails) because it does not exist
1689743593785 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1689762141539 geckodriver INFO Listening on 127.0.0.1:52591
1689762144536 mozrunner::runner INFO Running command: "C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "52592" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofiles0dVJD"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1689762145269 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:52592/devtools/browser/e63b4f12-286c-4b51-877d-cc0f8089c508
1689762146817 Marionette INFO Listening on port 52599
1689762146951 RemoteAgent WARN TLS certificate errors will be ignored for this session
1689762662818 Marionette INFO Stopped listening on port 52599
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofiles0dVJD\thumbnails) because it does not exist
[Parent 136, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167
1689762663117 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138

+ 1
- 1
MarketPlaces/Initialization/marketsList.txt View File

@ -1 +1 @@
ViceCity
DarkMatter

+ 10
- 2
MarketPlaces/Initialization/prepare_parser.py View File

@ -13,6 +13,7 @@ from MarketPlaces.AnonymousMarketplace.parser import *
from MarketPlaces.ViceCity.parser import *
from MarketPlaces.TorBay.parser import *
from MarketPlaces.M00nkeyMarket.parser import *
from MarketPlaces.DarkMatter.parser import *
from MarketPlaces.Classifier.classify_product import predict
@ -157,6 +158,8 @@ def new_parse(marketPlace, url, createLog):
rmm = torbay_description_parser(soup)
elif marketPlace == "M00nkeyMarket":
rmm = m00nkey_description_parser(soup)
elif marketPlace == "DarkMatter":
rmm = darkmatter_description_parser(soup)
# key = u"Pr:" + rmm[0].upper()[:desc_lim1] + u" Vendor:" + rmm[13].upper()[:desc_lim2]
key = u"Url:" + os.path.basename(line2).replace(".html", "")
@ -216,10 +219,13 @@ def new_parse(marketPlace, url, createLog):
rw = torbay_listing_parser(soup)
elif marketPlace == "M00nkeyMarket":
rw = m00nkey_listing_parser(soup)
elif marketPlace == "DarkMatter":
rw = darkmatter_listing_parser(soup)
else:
parseError = True
except:
except Exception as e:# change back
print(e)
nError += 1
print("There was a problem to parse the file " + line1 + " in the listing section!")
@ -285,7 +291,9 @@ def new_parse(marketPlace, url, createLog):
try:
shutil.move(source, destination)
num_persisted_moved += 1
except:
except Exception as e:
# except:
print(e)
print("There was a problem to move the file " + detPage[key]['filename'] + " in the Description section!")
nError += 1


+ 10
- 10
MarketPlaces/M00nkeyMarket/crawler_selenium.py View File

@ -33,16 +33,16 @@ MARKET_NAME = 'M00nkeyMarket'
# Opens Tor Browser, crawls the website, then parses, then closes tor
#acts like the main method for the crawler, another function at the end of this code calls this function later
def startCrawling():
opentor()
driver = getAccess()
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# opentor()
# driver = getAccess()
#
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
new_parse(MARKET_NAME, BASE_URL, False)


+ 0
- 1
MarketPlaces/TorBay/parser.py View File

@ -171,7 +171,6 @@ def torbay_listing_parser(soup):
# Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
#called by the crawler to get description links on a listing page
#@param: beautifulsoup object that is using the correct html page (listing page)
#return: list of description links from a listing page


Loading…
Cancel
Save