From 2ec850869c8ec32759b15ebc6109a4d59a70359d Mon Sep 17 00:00:00 2001 From: westernmeadow Date: Sat, 18 Nov 2023 22:08:15 -0800 Subject: [PATCH] completed BlackPyramid and CityMarket --- MarketPlaces/BlackPyramid/crawler_selenium.py | 31 ++++++++-------- MarketPlaces/BlackPyramid/parser.py | 36 +++++++++++-------- MarketPlaces/CityMarket/crawler_selenium.py | 34 ++++++++++-------- MarketPlaces/CityMarket/parser.py | 36 +++++++------------ MarketPlaces/Initialization/markets_mining.py | 15 ++++++++ MarketPlaces/Initialization/prepare_parser.py | 15 ++++++++ MarketPlaces/Utilities/utilities.py | 3 +- 7 files changed, 101 insertions(+), 69 deletions(-) diff --git a/MarketPlaces/BlackPyramid/crawler_selenium.py b/MarketPlaces/BlackPyramid/crawler_selenium.py index a008bf5..9efe7bc 100644 --- a/MarketPlaces/BlackPyramid/crawler_selenium.py +++ b/MarketPlaces/BlackPyramid/crawler_selenium.py @@ -45,7 +45,7 @@ def startCrawling(): print(driver.current_url, e) closetor(driver) - new_parse(marketName, baseURL, False) + new_parse(marketName, baseURL, True) # Login @@ -207,11 +207,16 @@ def goToPage(driver, page): def getInterestedLinks(): + links = [] + # h11 -> Hacking Tools + links.append('h11') # g3 -> Guides, Hacking - # se3 -> Services, Hacking - # f6 -> Fraud software - links = ['h11','g3','se3','f6'] + links.append('g3') + # se3 -> Services + links.append('se11') + # f6 -> Fraud + links.append('f11') return links @@ -226,6 +231,7 @@ def crawlForum(driver): for listing in pages: print('Crawling :', listing) try: + driver.get(baseURL) goToPage(driver, listing) has_next_page = True @@ -251,12 +257,12 @@ def crawlForum(driver): # can't use the back button in dark pyramid # driver.back() - # comment out - break - - # comment out - if count == 1: - break + # # comment out + # break + # + # # comment out + # if count == 1: + # break # go to next page of market try: @@ -322,8 +328,3 @@ def productPages(html): def crawler(): startCrawling() # print("Crawling and Parsing BestCardingWorld .... DONE!") - - -if __name__ == "__main__": - #crawler() - new_parse("BlackPyramid", baseURL, False) diff --git a/MarketPlaces/BlackPyramid/parser.py b/MarketPlaces/BlackPyramid/parser.py index c1ea43d..3980fc4 100644 --- a/MarketPlaces/BlackPyramid/parser.py +++ b/MarketPlaces/BlackPyramid/parser.py @@ -43,6 +43,12 @@ def blackpyramid_description_parser(soup): name = name.replace(",", "") name = name.strip() + # Finding Product Rating + rating_span = soup.find('span', {'class': 'to3098503t'}).find_next_sibling('span') + rating_num = rating_span.find('b').text + if rating_num != 'N/A': + rating_item = rating_num[0:3] + # product description describe = soup.findAll('div', {'class': 'fer048953'})[1].text describe = describe.replace('\n', ' ') @@ -57,11 +63,11 @@ def blackpyramid_description_parser(soup): vendor = vendor.replace(",", "") vendor = vendor.strip() - # Finding Vendor Rating - rating_span = soup.find('span', {'class': 'to3098503t'}).find_next_sibling('span') - rating_num = rating_span.find('b').text - if rating_num != 'N/A': - rating = rating_num[0:3] + # Finding Product Rating + rating_div = soup.find('div', {'class': 'bold03905 vstat364'}).find_next_sibling('div').find_next_sibling('div') + rating_vendor = cleanNumbers(rating_div.text) + if rating_vendor == "": + rating_vendor = "-1" # Finding Successful Transactions success_container = soup.find('ul', {'class': 'ul3o00953'}).findAll('li')[1] @@ -102,7 +108,7 @@ def blackpyramid_description_parser(soup): positive = soup.find('span', {'class': 'ar04999324'}).text neutral = soup.find('span', {'class': 'ti9400005 can39953'}).text negative = soup.find('span', {'class': 'ti9400005 ti90088 can39953'}).text - review = int(positive) + int(neutral) + int(negative) + reviews = int(positive) + int(neutral) + int(negative) # Finding product image image = soup.find('img', {'class': 'img0390503'}) @@ -147,7 +153,7 @@ def blackpyramid_listing_parser(soup): # Fields to be parsed nm = 0 # *Total_Products (Should be Integer) - mktName = "Black Pyramid" # 0 *Marketplace_Name + mktName = "BlackPyramid" # 0 *Marketplace_Name vendor = [] # 1 *Vendor y rating_vendor = [] # 2 Vendor_Rating success = [] # 3 Vendor_Successful_Transactions @@ -196,14 +202,14 @@ def blackpyramid_listing_parser(soup): product = product.strip() name.append(product) - # Finding description - # 'recurisve = False' only searches direct children - desc = card.findChildren('div', recursive=False)[0] - desc = desc.findAll('div', recursive=False)[3].text - desc = desc.replace('\n', ' ') - desc = desc.replace(",", "") - desc = desc.strip() - describe.append(desc) + # # Finding description + # # 'recurisve = False' only searches direct children + # desc = card.findChildren('div', recursive=False)[0] + # desc = desc.findAll('div', recursive=False)[3].text + # desc = desc.replace('\n', ' ') + # desc = desc.replace(",", "") + # desc = desc.strip() + # describe.append(desc) # Finding Vendor Name vendor_name = bae[4].find('span').text diff --git a/MarketPlaces/CityMarket/crawler_selenium.py b/MarketPlaces/CityMarket/crawler_selenium.py index 6f2a399..a00179e 100644 --- a/MarketPlaces/CityMarket/crawler_selenium.py +++ b/MarketPlaces/CityMarket/crawler_selenium.py @@ -144,6 +144,7 @@ def login(driver): WebDriverWait(driver, 100).until(EC.visibility_of_element_located( (By.XPATH, '//*[@id="collapse3"]'))) + # Saves the crawled html page, makes the directory path for html pages if not made def savePage(driver, page, url): cleanPage = cleanHTML(driver, page) @@ -186,10 +187,18 @@ def getInterestedLinks(): links = [] # # Hire hacker - links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3') - # # ddos + # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3') + # # other + # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=14') + # malware + links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=15') + # ddos links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16') - # # hacking service + # software + links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=17') + # botnet + links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=18') + # hacking service links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=31') return links @@ -217,7 +226,7 @@ def crawlForum(driver): except: driver.refresh() html = driver.page_source - savePage(driver, html, link) + savePage(driver, html, linksToCrawl[i] + f"page{count+1}") list = productPages(html) for item in list: @@ -230,18 +239,15 @@ def crawlForum(driver): savePage(driver, driver.page_source, item) driver.back() - # comment out - # break - - # comment out - """count += 1 - if count == 1: - break""" + # # comment out + # break + # + # # comment out + # if count == 1: + # break try: - #link = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div/div[2]/nav/ul/li[5]/a').get_attribute('href') - link = driver.find_element(by=By.XPATH, - value='//a[@rel="next"]').get_attribute('href') + link = driver.find_element(by=By.XPATH, value='//a[@rel="next"]').get_attribute('href') if link == "": raise NoSuchElementException count += 1 diff --git a/MarketPlaces/CityMarket/parser.py b/MarketPlaces/CityMarket/parser.py index 8c9bf37..5679b95 100644 --- a/MarketPlaces/CityMarket/parser.py +++ b/MarketPlaces/CityMarket/parser.py @@ -57,19 +57,6 @@ def city_description_parser(soup): describe = soup.find('div', {'class': "text-white"}).text describe = cleanString(describe.strip()) - '''# Finding the Number of Product Reviews - tag = soup.findAll(text=re.compile('Reviews')) - for index in tag: - reviews = index - par = reviews.find('(') - if par >=0: - reviews = reviews.replace("Reviews (","") - reviews = reviews.replace(")","") - reviews = reviews.split(",") - review = str(abs(int(reviews[0])) + abs(int(reviews[1]))) - else : - review = "-1"''' - # Searching for CVE and MS categories cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}')) if cve: @@ -138,7 +125,6 @@ def city_listing_parser(soup): # Adding the url to the list of urls link = bae[0].get('href') - link = cleanLink(link) href.append(link) # Category @@ -156,15 +142,19 @@ def city_listing_parser(soup): # USD and BTC Price price = a.find('div', {"class": "price"}).text - tempUSD = price.split("~")[0] - tempUSD = tempUSD.replace("$", "") - tempUSD = tempUSD.strip() - USD.append(tempUSD) - - tempBTC = price.split("~")[1] - tempBTC = tempBTC.replace("BTC", "") - tempBTC = tempBTC.strip() - BTC.append(tempBTC) + if "~" in price: + tempUSD = price.split("~")[0] + tempUSD = tempUSD.replace("$", "") + tempUSD = tempUSD.strip() + USD.append(tempUSD) + + tempBTC = price.split("~")[1] + tempBTC = tempBTC.replace("BTC", "") + tempBTC = tempBTC.strip() + BTC.append(tempBTC) + else: + USD.append("-1") + BTC.append("-1") # Img product_image = a.find('img') diff --git a/MarketPlaces/Initialization/markets_mining.py b/MarketPlaces/Initialization/markets_mining.py index e5fe69a..d13389a 100644 --- a/MarketPlaces/Initialization/markets_mining.py +++ b/MarketPlaces/Initialization/markets_mining.py @@ -14,6 +14,11 @@ from MarketPlaces.M00nkeyMarket.crawler_selenium import crawler as crawlerM00nke from MarketPlaces.ViceCity.crawler_selenium import crawler as crawlerViceCity from MarketPlaces.CypherMarketplace.crawler_selenium import crawler as crawlerCypher from MarketPlaces.PabloEscobarMarket.crawler_selenium import crawler as crawlerPabloEscobar +from MarketPlaces.DarkBazar.crawler_selenium import crawler as crawlerDarkBazar +from MarketPlaces.Sonanza.crawler_selenium import crawler as crawlerSonanza +from MarketPlaces.Kingdom.crawler_selenium import crawler as crawlerKingdom +from MarketPlaces.BlackPyramid.crawler_selenium import crawler as crawlerBlackPyramid +from MarketPlaces.Quest.crawler_selenium import crawler as crawlerQuest from MarketPlaces.Ares.crawler_selenium import crawler as crawlerAres import configparser @@ -108,6 +113,16 @@ if __name__ == '__main__': crawlerCypher() elif mkt == "PabloEscobarMarket": crawlerPabloEscobar() + elif mkt == "DarkBazar": + crawlerDarkBazar() + elif mkt == "Sonanza": + crawlerSonanza() + elif mkt == "Kingdom": + crawlerKingdom() + elif mkt == "BlackPyramid": + crawlerBlackPyramid() + elif mkt == "Quest": + crawlerQuest() elif mkt == "Ares": crawlerAres() diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py index 79a2bdc..fc623f4 100644 --- a/MarketPlaces/Initialization/prepare_parser.py +++ b/MarketPlaces/Initialization/prepare_parser.py @@ -18,7 +18,10 @@ from MarketPlaces.CityMarket.parser import * from MarketPlaces.DarkBazar.parser import * from MarketPlaces.Sonanza.parser import * from MarketPlaces.Kingdom.parser import * +from MarketPlaces.BlackPyramid.parser import * +from MarketPlaces.Quest.parser import * from MarketPlaces.Ares.parser import * +from MarketPlaces.CypherMarketplace.parser import * from MarketPlaces.Classifier.classify_product import predict @@ -142,6 +145,12 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile): rw = sonanza_listing_parser(soup) elif marketPlace == "Kingdom": rw = kingdom_listing_parser(soup) + elif marketPlace == "BlackPyramid": + rw = blackpyramid_listing_parser(soup) + elif marketPlace == "Quest": + rw = quest_listing_parser(soup) + elif marketPlace == "CypherMarketplace": + rw = cyphermarketplace_listing_parser(soup) else: print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!") raise Exception @@ -184,6 +193,12 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile): rmm = sonanza_description_parser(soup) elif marketPlace == "Kingdom": rmm = kingdom_description_parser(soup) + elif marketPlace == "BlackPyramid": + rmm = blackpyramid_description_parser(soup) + elif marketPlace == "Quest": + rmm = quest_description_parser(soup) + elif marketPlace == "CypherMarketplace": + rmm = cyphermarketplace_description_parser(soup) else: print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!") raise Exception diff --git a/MarketPlaces/Utilities/utilities.py b/MarketPlaces/Utilities/utilities.py index 7def2f5..c6aa192 100644 --- a/MarketPlaces/Utilities/utilities.py +++ b/MarketPlaces/Utilities/utilities.py @@ -252,8 +252,7 @@ def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nom lne = marketplace # 0 lne += "," - # Added for CityMarket - lne += "=1" if len(vendor) == 0 else vendor[n] # 1 + lne += "-1" if len(vendor) == 0 else vendor[n] # 1 lne += "," lne += "-1" if len(rating_vendor) == 0 else rating_vendor[n] # 2 lne += ","