Browse Source

Working on Parser. Finished description parser.

main
Joshua 1 year ago
parent
commit
594e52949b
3 changed files with 157 additions and 56 deletions
  1. +15
    -15
      MarketPlaces/HiddenMarket/crawler_selenium.py
  2. +71
    -41
      MarketPlaces/HiddenMarket/parser.py
  3. +71
    -0
      MarketPlaces/Initialization/geckodriver.log

+ 15
- 15
MarketPlaces/HiddenMarket/crawler_selenium.py View File

@ -29,19 +29,19 @@ baseURL = 'http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion
# Opens Tor Browser, crawls the website
def startCrawling():
opentor()
# opentor()
marketName = getMKTName()
driver = getAccess()
# driver = getAccess()
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
# new_parse(marketName, baseURL, False)
new_parse(marketName, baseURL, False)
# Opens Tor Browser
@ -211,11 +211,11 @@ def getInterestedLinks():
links = []
# # Civil Software
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/civil_softwares')
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/civil_softwares')
# # Tutorials - Carding
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/carding')
# # Digital - Hacks
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/hacks')
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/hacks')
# Digital - Exploit Kit
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/exploit_kit')
# # 0Day
@ -275,11 +275,11 @@ def crawlForum(driver):
driver.back()
# comment out
break
# break
# comment out
# if count == 1:
# break
if count == 2:
break
try:
pageCount += 1


+ 71
- 41
MarketPlaces/HiddenMarket/parser.py View File

@ -31,20 +31,19 @@ def hiddenmarket_description_parser(soup):
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
bae = soup.find('div', {'class': "col-9"})
bae = soup.find('div', {'class': "main"})
# Finding Product Name
name = bae.find('h2').text
name = bae.find('div', {'class': "heading"}).text
name = name.replace('\n', ' ')
name = name.replace(",", "")
name = name.strip()
mb = bae.findAll('div', {"class": "mb-1"})
mb = bae.find('div', {'class': "information"}).findAll('tr')
# Finding Vendor
vendor = mb[0].text
vendor = mb[1].find('a').text
vendor = vendor.replace(",", "")
vendor = vendor.replace("Sold by:", "")
vendor = vendor.strip()
# # Finding Vendor Rating
@ -52,41 +51,50 @@ def hiddenmarket_description_parser(soup):
# half_star = bae[2].find('i', {'class': "fas fa-star-half-alt"})
# rating = len(full_stars) + (0.5 if half_star is not None else 0)
# Finding Quantity Sold and Left
temp = mb[4].text.split(',')
sold = temp[0].replace("sold", "")
sold = sold.strip()
left = temp[1].replace("in stock", "")
# Finding Quantity Left
temp = mb[-3].text
left = temp.replace("Quantity in stock:", "")
left = left.strip()
# Finding USD
USD = bae.find('div', {"class": "h3 text-secondary"}).text
USD = USD.replace("$", "")
USD = mb[0].text
USD = USD.replace("Price:", "")
USD = USD.replace("USD", "")
USD = USD.strip()
# Finding BTC
temp = bae.find('div', {"class": "small"}).text.split("BTC")
# temp = bae.find('div', {"class": "small"}).text.split("BTC")
BTC = temp[0].strip()
# BTC = temp[0].strip()
# shipping_info = bae[4].text
# if "Digital" not in shipping_info:
# shipping_info = shipping_info.split(" ")
#
# # Finding Shipment Information (Origin)
# shipFrom = shipping_info[0].strip()
#
# # Finding Shipment Information (Destination)
# shipTo = shipping_info[1].strip()
# Finding Shipment Information (Origin)
shipFrom = mb[2].text
shipFrom = shipFrom.replace("Seller location:", "")
shipFrom = shipFrom.strip()
# Finding Shipment Information (Destination)
shipTo = mb[3].text
shipTo = shipTo.replace("Ships to (seller):", "")
shipTo = shipTo.strip()
# Finding the Product description
describe = bae.find('div', {"class": "card border-top-0"}).text
describe = bae.find('div', {"class": "twotabs"}).find('div', {'class': "tab1"}).text
describe = describe.replace("\n", " ")
describe = describe.replace("\r", " ")
describe = describe.replace("-", " ")
describe = describe.strip()
# Finding the Product Category
category = mb[-4].text
category = category.replace("Category:", "")
category = category.strip()
#Finding the number of reviews
reviews = bae.find_all('div', {'class': "heading"})
reviews = reviews[-2].text
reviews = reviews.replace("Comments (", "")
reviews = reviews.replace(")", "")
# Searching for CVE and MS categories
cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
if cve:
@ -139,54 +147,76 @@ def hiddenmarket_listing_parser(soup):
shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
listing = soup.findAll('div', {"class": "card product-card mb-3"})
listing = soup.findAll('div', {"class": "info"})
# Populating the Number of Products
nm = len(listing)
# Finding Category
cat = soup.find("div", {"class": "col-9"})
cat = cat.find("h2").text
cat = cat.replace("Category: ", "")
cat = soup.find("div", {'class': "heading"}).text
cat = cat.replace(",", "")
cat = cat.strip()
for card in listing:
category.append(cat)
bae = card.findAll('a')
# Adding the url to the list of urls
link = bae[0].get('href')
# Adding the url to the list of urls TODO: fix this
link = card.next_sibling
link.find('a').get('href')
href.append(link)
# Finding Product Name
product = bae[1].text
product = card.next_sibling.find('div', {'class': "title"}).find('a').text
product = product.replace('\n', ' ')
product = product.replace(",", "")
product = product.strip()
name.append(product)
# Finding Vendor
vendor_name = bae[2].text
vendor_name = card.text
vendor_name = vendor_name.replace(",", "")
vendor_name = vendor_name.strip()
vendor.append(vendor_name)
# Finding USD
usd = card.find('div', {"class": "mb-1"}).text
usd = usd.replace("$", "")
usd = card.next_sibling.find('div', {"class": "buttons"}).find('div', {'class': "price"}).text
usd = usd.replace("USD", "")
usd = usd.strip()
USD.append(usd)
tb = card.next_sibling.find("span", {"class": "stats"}).find_all('td')
# Finding Reviews
num = card.find("span", {"class": "rate-count"}).text
num = num.replace("(", "")
num = num.replace("review)", "")
num = num.replace("reviews)", "")
num = tb[-1].text
num = num.strip()
reviews.append(num)
# Finding Views
view = tb[0].text.strip()
views.append(view)
# Finding Num of Sales
sale = tb[1].text.strip()
sold.append(sale)
# Finding shipping info
shipping = card.next_sibling.find('div', {'class': "shipping"}).text.split('>')
# SHip from
origin = shipping[0].strip()
shipFrom.append(origin)
#Ship to
destination = shipping[1].strip()
shipTo.append(destination)
# Finding description
description = card.next_sibling.find('div', {'class': "description"}).text
description = description.replace("\n", " ")
description = description.replace("\r", " ")
description = description.replace("-", " ")
description = description.strip()
describe.append(description)
# Searching for CVE and MS categories
cve = card.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
if not cve:


+ 71
- 0
MarketPlaces/Initialization/geckodriver.log View File

@ -16982,3 +16982,74 @@ unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1690471312083 geckodriver INFO Listening on 127.0.0.1:51018
1690471316041 mozrunner::runner INFO Running command: "C:\\Users\\John Wick\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51019" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\JOHNWI~1\\AppData\\Local\\Temp\\rust_mozprofileOvVZJJ"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1690471316828 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:51019/devtools/browser/3db2bc14-2f5c-482e-9367-8dbec91f64d6
1690471318449 Marionette INFO Listening on port 51024
1690471318718 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\John Wick\AppData\Local\Temp\rust_mozprofileOvVZJJ\thumbnails) because it does not exist
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
1690471691471 Marionette INFO Stopped listening on port 51024
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1690471691624 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1690471761742 geckodriver INFO Listening on 127.0.0.1:51209
1690471765842 mozrunner::runner INFO Running command: "C:\\Users\\John Wick\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51210" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\JOHNWI~1\\AppData\\Local\\Temp\\rust_mozprofileKIHjMA"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1690471766480 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:51210/devtools/browser/463ff815-81da-4d95-b1c2-fd70ce5d9152
1690471768298 Marionette INFO Listening on port 51215
1690471768535 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
1690472208303 Marionette INFO Stopped listening on port 51215
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1690472208478 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138

Loading…
Cancel
Save