Browse Source

completed BlackPyramid and CityMarket

main
westernmeadow 1 year ago
parent
commit
2ec850869c
7 changed files with 101 additions and 69 deletions
  1. +16
    -15
      MarketPlaces/BlackPyramid/crawler_selenium.py
  2. +21
    -15
      MarketPlaces/BlackPyramid/parser.py
  3. +20
    -14
      MarketPlaces/CityMarket/crawler_selenium.py
  4. +13
    -23
      MarketPlaces/CityMarket/parser.py
  5. +15
    -0
      MarketPlaces/Initialization/markets_mining.py
  6. +15
    -0
      MarketPlaces/Initialization/prepare_parser.py
  7. +1
    -2
      MarketPlaces/Utilities/utilities.py

+ 16
- 15
MarketPlaces/BlackPyramid/crawler_selenium.py View File

@ -45,7 +45,7 @@ def startCrawling():
print(driver.current_url, e) print(driver.current_url, e)
closetor(driver) closetor(driver)
new_parse(marketName, baseURL, False)
new_parse(marketName, baseURL, True)
# Login # Login
@ -207,11 +207,16 @@ def goToPage(driver, page):
def getInterestedLinks(): def getInterestedLinks():
links = []
# h11 -> Hacking Tools # h11 -> Hacking Tools
links.append('h11')
# g3 -> Guides, Hacking # g3 -> Guides, Hacking
# se3 -> Services, Hacking
# f6 -> Fraud software
links = ['h11','g3','se3','f6']
links.append('g3')
# se3 -> Services
links.append('se11')
# f6 -> Fraud
links.append('f11')
return links return links
@ -226,6 +231,7 @@ def crawlForum(driver):
for listing in pages: for listing in pages:
print('Crawling :', listing) print('Crawling :', listing)
try: try:
driver.get(baseURL)
goToPage(driver, listing) goToPage(driver, listing)
has_next_page = True has_next_page = True
@ -251,12 +257,12 @@ def crawlForum(driver):
# can't use the back button in dark pyramid # can't use the back button in dark pyramid
# driver.back() # driver.back()
# comment out
break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
# go to next page of market # go to next page of market
try: try:
@ -322,8 +328,3 @@ def productPages(html):
def crawler(): def crawler():
startCrawling() startCrawling()
# print("Crawling and Parsing BestCardingWorld .... DONE!") # print("Crawling and Parsing BestCardingWorld .... DONE!")
if __name__ == "__main__":
#crawler()
new_parse("BlackPyramid", baseURL, False)

+ 21
- 15
MarketPlaces/BlackPyramid/parser.py View File

@ -43,6 +43,12 @@ def blackpyramid_description_parser(soup):
name = name.replace(",", "") name = name.replace(",", "")
name = name.strip() name = name.strip()
# Finding Product Rating
rating_span = soup.find('span', {'class': 'to3098503t'}).find_next_sibling('span')
rating_num = rating_span.find('b').text
if rating_num != 'N/A':
rating_item = rating_num[0:3]
# product description # product description
describe = soup.findAll('div', {'class': 'fer048953'})[1].text describe = soup.findAll('div', {'class': 'fer048953'})[1].text
describe = describe.replace('\n', ' ') describe = describe.replace('\n', ' ')
@ -57,11 +63,11 @@ def blackpyramid_description_parser(soup):
vendor = vendor.replace(",", "") vendor = vendor.replace(",", "")
vendor = vendor.strip() vendor = vendor.strip()
# Finding Vendor Rating
rating_span = soup.find('span', {'class': 'to3098503t'}).find_next_sibling('span')
rating_num = rating_span.find('b').text
if rating_num != 'N/A':
rating = rating_num[0:3]
# Finding Product Rating
rating_div = soup.find('div', {'class': 'bold03905 vstat364'}).find_next_sibling('div').find_next_sibling('div')
rating_vendor = cleanNumbers(rating_div.text)
if rating_vendor == "":
rating_vendor = "-1"
# Finding Successful Transactions # Finding Successful Transactions
success_container = soup.find('ul', {'class': 'ul3o00953'}).findAll('li')[1] success_container = soup.find('ul', {'class': 'ul3o00953'}).findAll('li')[1]
@ -102,7 +108,7 @@ def blackpyramid_description_parser(soup):
positive = soup.find('span', {'class': 'ar04999324'}).text positive = soup.find('span', {'class': 'ar04999324'}).text
neutral = soup.find('span', {'class': 'ti9400005 can39953'}).text neutral = soup.find('span', {'class': 'ti9400005 can39953'}).text
negative = soup.find('span', {'class': 'ti9400005 ti90088 can39953'}).text negative = soup.find('span', {'class': 'ti9400005 ti90088 can39953'}).text
review = int(positive) + int(neutral) + int(negative)
reviews = int(positive) + int(neutral) + int(negative)
# Finding product image # Finding product image
image = soup.find('img', {'class': 'img0390503'}) image = soup.find('img', {'class': 'img0390503'})
@ -147,7 +153,7 @@ def blackpyramid_listing_parser(soup):
# Fields to be parsed # Fields to be parsed
nm = 0 # *Total_Products (Should be Integer) nm = 0 # *Total_Products (Should be Integer)
mktName = "Black Pyramid" # 0 *Marketplace_Name
mktName = "BlackPyramid" # 0 *Marketplace_Name
vendor = [] # 1 *Vendor y vendor = [] # 1 *Vendor y
rating_vendor = [] # 2 Vendor_Rating rating_vendor = [] # 2 Vendor_Rating
success = [] # 3 Vendor_Successful_Transactions success = [] # 3 Vendor_Successful_Transactions
@ -196,14 +202,14 @@ def blackpyramid_listing_parser(soup):
product = product.strip() product = product.strip()
name.append(product) name.append(product)
# Finding description
# 'recurisve = False' only searches direct children
desc = card.findChildren('div', recursive=False)[0]
desc = desc.findAll('div', recursive=False)[3].text
desc = desc.replace('\n', ' ')
desc = desc.replace(",", "")
desc = desc.strip()
describe.append(desc)
# # Finding description
# # 'recurisve = False' only searches direct children
# desc = card.findChildren('div', recursive=False)[0]
# desc = desc.findAll('div', recursive=False)[3].text
# desc = desc.replace('\n', ' ')
# desc = desc.replace(",", "")
# desc = desc.strip()
# describe.append(desc)
# Finding Vendor Name # Finding Vendor Name
vendor_name = bae[4].find('span').text vendor_name = bae[4].find('span').text


+ 20
- 14
MarketPlaces/CityMarket/crawler_selenium.py View File

@ -144,6 +144,7 @@ def login(driver):
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//*[@id="collapse3"]'))) (By.XPATH, '//*[@id="collapse3"]')))
# Saves the crawled html page, makes the directory path for html pages if not made # Saves the crawled html page, makes the directory path for html pages if not made
def savePage(driver, page, url): def savePage(driver, page, url):
cleanPage = cleanHTML(driver, page) cleanPage = cleanHTML(driver, page)
@ -186,10 +187,18 @@ def getInterestedLinks():
links = [] links = []
# # Hire hacker # # Hire hacker
links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3')
# # ddos
# links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3')
# # other
# links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=14')
# malware
links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=15')
# ddos
links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16') links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16')
# # hacking service
# software
links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=17')
# botnet
links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=18')
# hacking service
links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=31') links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=31')
return links return links
@ -217,7 +226,7 @@ def crawlForum(driver):
except: except:
driver.refresh() driver.refresh()
html = driver.page_source html = driver.page_source
savePage(driver, html, link)
savePage(driver, html, linksToCrawl[i] + f"page{count+1}")
list = productPages(html) list = productPages(html)
for item in list: for item in list:
@ -230,18 +239,15 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out
# break
# comment out
"""count += 1
if count == 1:
break"""
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
#link = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div/div[2]/nav/ul/li[5]/a').get_attribute('href')
link = driver.find_element(by=By.XPATH,
value='//a[@rel="next"]').get_attribute('href')
link = driver.find_element(by=By.XPATH, value='//a[@rel="next"]').get_attribute('href')
if link == "": if link == "":
raise NoSuchElementException raise NoSuchElementException
count += 1 count += 1


+ 13
- 23
MarketPlaces/CityMarket/parser.py View File

@ -57,19 +57,6 @@ def city_description_parser(soup):
describe = soup.find('div', {'class': "text-white"}).text describe = soup.find('div', {'class': "text-white"}).text
describe = cleanString(describe.strip()) describe = cleanString(describe.strip())
'''# Finding the Number of Product Reviews
tag = soup.findAll(text=re.compile('Reviews'))
for index in tag:
reviews = index
par = reviews.find('(')
if par >=0:
reviews = reviews.replace("Reviews (","")
reviews = reviews.replace(")","")
reviews = reviews.split(",")
review = str(abs(int(reviews[0])) + abs(int(reviews[1])))
else :
review = "-1"'''
# Searching for CVE and MS categories # Searching for CVE and MS categories
cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}')) cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
if cve: if cve:
@ -138,7 +125,6 @@ def city_listing_parser(soup):
# Adding the url to the list of urls # Adding the url to the list of urls
link = bae[0].get('href') link = bae[0].get('href')
link = cleanLink(link)
href.append(link) href.append(link)
# Category # Category
@ -156,15 +142,19 @@ def city_listing_parser(soup):
# USD and BTC Price # USD and BTC Price
price = a.find('div', {"class": "price"}).text price = a.find('div', {"class": "price"}).text
tempUSD = price.split("~")[0]
tempUSD = tempUSD.replace("$", "")
tempUSD = tempUSD.strip()
USD.append(tempUSD)
tempBTC = price.split("~")[1]
tempBTC = tempBTC.replace("BTC", "")
tempBTC = tempBTC.strip()
BTC.append(tempBTC)
if "~" in price:
tempUSD = price.split("~")[0]
tempUSD = tempUSD.replace("$", "")
tempUSD = tempUSD.strip()
USD.append(tempUSD)
tempBTC = price.split("~")[1]
tempBTC = tempBTC.replace("BTC", "")
tempBTC = tempBTC.strip()
BTC.append(tempBTC)
else:
USD.append("-1")
BTC.append("-1")
# Img # Img
product_image = a.find('img') product_image = a.find('img')


+ 15
- 0
MarketPlaces/Initialization/markets_mining.py View File

@ -14,6 +14,11 @@ from MarketPlaces.M00nkeyMarket.crawler_selenium import crawler as crawlerM00nke
from MarketPlaces.ViceCity.crawler_selenium import crawler as crawlerViceCity from MarketPlaces.ViceCity.crawler_selenium import crawler as crawlerViceCity
from MarketPlaces.CypherMarketplace.crawler_selenium import crawler as crawlerCypher from MarketPlaces.CypherMarketplace.crawler_selenium import crawler as crawlerCypher
from MarketPlaces.PabloEscobarMarket.crawler_selenium import crawler as crawlerPabloEscobar from MarketPlaces.PabloEscobarMarket.crawler_selenium import crawler as crawlerPabloEscobar
from MarketPlaces.DarkBazar.crawler_selenium import crawler as crawlerDarkBazar
from MarketPlaces.Sonanza.crawler_selenium import crawler as crawlerSonanza
from MarketPlaces.Kingdom.crawler_selenium import crawler as crawlerKingdom
from MarketPlaces.BlackPyramid.crawler_selenium import crawler as crawlerBlackPyramid
from MarketPlaces.Quest.crawler_selenium import crawler as crawlerQuest
from MarketPlaces.Ares.crawler_selenium import crawler as crawlerAres from MarketPlaces.Ares.crawler_selenium import crawler as crawlerAres
import configparser import configparser
@ -108,6 +113,16 @@ if __name__ == '__main__':
crawlerCypher() crawlerCypher()
elif mkt == "PabloEscobarMarket": elif mkt == "PabloEscobarMarket":
crawlerPabloEscobar() crawlerPabloEscobar()
elif mkt == "DarkBazar":
crawlerDarkBazar()
elif mkt == "Sonanza":
crawlerSonanza()
elif mkt == "Kingdom":
crawlerKingdom()
elif mkt == "BlackPyramid":
crawlerBlackPyramid()
elif mkt == "Quest":
crawlerQuest()
elif mkt == "Ares": elif mkt == "Ares":
crawlerAres() crawlerAres()


+ 15
- 0
MarketPlaces/Initialization/prepare_parser.py View File

@ -18,7 +18,10 @@ from MarketPlaces.CityMarket.parser import *
from MarketPlaces.DarkBazar.parser import * from MarketPlaces.DarkBazar.parser import *
from MarketPlaces.Sonanza.parser import * from MarketPlaces.Sonanza.parser import *
from MarketPlaces.Kingdom.parser import * from MarketPlaces.Kingdom.parser import *
from MarketPlaces.BlackPyramid.parser import *
from MarketPlaces.Quest.parser import *
from MarketPlaces.Ares.parser import * from MarketPlaces.Ares.parser import *
from MarketPlaces.CypherMarketplace.parser import *
from MarketPlaces.Classifier.classify_product import predict from MarketPlaces.Classifier.classify_product import predict
@ -142,6 +145,12 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
rw = sonanza_listing_parser(soup) rw = sonanza_listing_parser(soup)
elif marketPlace == "Kingdom": elif marketPlace == "Kingdom":
rw = kingdom_listing_parser(soup) rw = kingdom_listing_parser(soup)
elif marketPlace == "BlackPyramid":
rw = blackpyramid_listing_parser(soup)
elif marketPlace == "Quest":
rw = quest_listing_parser(soup)
elif marketPlace == "CypherMarketplace":
rw = cyphermarketplace_listing_parser(soup)
else: else:
print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!") print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
raise Exception raise Exception
@ -184,6 +193,12 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile):
rmm = sonanza_description_parser(soup) rmm = sonanza_description_parser(soup)
elif marketPlace == "Kingdom": elif marketPlace == "Kingdom":
rmm = kingdom_description_parser(soup) rmm = kingdom_description_parser(soup)
elif marketPlace == "BlackPyramid":
rmm = blackpyramid_description_parser(soup)
elif marketPlace == "Quest":
rmm = quest_description_parser(soup)
elif marketPlace == "CypherMarketplace":
rmm = cyphermarketplace_description_parser(soup)
else: else:
print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!") print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!")
raise Exception raise Exception


+ 1
- 2
MarketPlaces/Utilities/utilities.py View File

@ -252,8 +252,7 @@ def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nom
lne = marketplace # 0 lne = marketplace # 0
lne += "," lne += ","
# Added for CityMarket
lne += "=1" if len(vendor) == 0 else vendor[n] # 1
lne += "-1" if len(vendor) == 0 else vendor[n] # 1
lne += "," lne += ","
lne += "-1" if len(rating_vendor) == 0 else rating_vendor[n] # 2 lne += "-1" if len(rating_vendor) == 0 else rating_vendor[n] # 2
lne += "," lne += ","


Loading…
Cancel
Save