From 89684724fd7f78900547707b2c1394c0401c5903 Mon Sep 17 00:00:00 2001 From: westernmeadow Date: Fri, 1 Sep 2023 14:25:26 -0700 Subject: [PATCH] debugged ThiefWorld, TorMarket, and AnonMarket --- .idea/DW_Pipeline_Test.iml | 3 +- .idea/misc.xml | 2 +- MarketPlaces/AnonMarket/crawler_selenium.py | 91 +++++------ MarketPlaces/AnonMarket/parser.py | 113 ++++++------- MarketPlaces/CityMarket/parser.py | 100 ++++++------ MarketPlaces/Initialization/marketsList.txt | 13 +- MarketPlaces/Initialization/markets_mining.py | 3 + MarketPlaces/Initialization/prepare_parser.py | 29 ++-- .../MetaVerseMarket/crawler_selenium.py | 10 +- MarketPlaces/MetaVerseMarket/parser.py | 148 ++++++++---------- .../PabloEscobarMarket/crawler_selenium.py | 2 +- MarketPlaces/PabloEscobarMarket/parser.py | 96 ++++++------ MarketPlaces/ThiefWorld/crawler_selenium.py | 12 +- MarketPlaces/ThiefWorld/parser.py | 6 +- MarketPlaces/TorMarket/crawler_selenium.py | 4 +- MarketPlaces/TorMarket/parser.py | 120 ++++++-------- 16 files changed, 351 insertions(+), 401 deletions(-) diff --git a/.idea/DW_Pipeline_Test.iml b/.idea/DW_Pipeline_Test.iml index 08a5719..f27dbb9 100644 --- a/.idea/DW_Pipeline_Test.iml +++ b/.idea/DW_Pipeline_Test.iml @@ -2,7 +2,7 @@ - + @@ -27,6 +27,7 @@ diff --git a/.idea/misc.xml b/.idea/misc.xml index dc9ea49..11f1ea0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/MarketPlaces/AnonMarket/crawler_selenium.py b/MarketPlaces/AnonMarket/crawler_selenium.py index 2171d84..42d8e49 100644 --- a/MarketPlaces/AnonMarket/crawler_selenium.py +++ b/MarketPlaces/AnonMarket/crawler_selenium.py @@ -31,7 +31,6 @@ baseURL = 'http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - opentor() mktName = getMKTName() driver = getAccess() @@ -40,22 +39,10 @@ def startCrawling(): crawlForum(driver) except Exception as e: print(driver.current_url, e) - closetor(driver) + closeDriver(driver) new_parse(mktName, baseURL, True) -# Opens Tor Browser -#prompts for ENTER input to continue -def opentor(): - from MarketPlaces.Initialization.markets_mining import config - - global pid - print("Connecting Tor...") - pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) - pid = pro.pid - time.sleep(7.5) - input('Tor Connected. Press ENTER to continue\n') - return # Returns the name of the website #return: name of site in string type @@ -73,7 +60,7 @@ def getFixedURL(): # Closes Tor Browser #@param: current selenium driver -def closetor(driver): +def closeDriver(driver): # global pid # os.system("taskkill /pid " + str(pro.pid)) # os.system("taskkill /t /f /im tor.exe") @@ -100,7 +87,7 @@ def createFFDriver(): ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.http.sendRefererHeader", 0) - ff_prof.set_preference("permissions.default.image", 2) + ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") @@ -216,47 +203,63 @@ def getInterestedLinks(): #topic and description pages are crawled through here, where both types of pages are saved #@param: selenium driver def crawlForum(driver): - print("Crawling Anon Market") + print("Crawling the Anon Market") linksToCrawl = getInterestedLinks() for link in linksToCrawl: print('Crawling :', link) - has_next_page = True - - while has_next_page: - try: - driver.get(link) - except: - driver.refresh() - - html = driver.page_source - savePage(driver, html, link) + try: + has_next_page = True + count = 0 - # Get all product links on the current page - products_list = productPages(html) - for item in products_list: - itemURL = urlparse.urljoin(baseURL, str(item)) + while has_next_page: try: - driver.get(itemURL) + driver.get(link) except: driver.refresh() - savePage(driver, driver.page_source, item) - driver.back() # Go back to listing after visiting each product - # Find the active page number - active_page_element = driver.find_element(By.XPATH, '//div[@class="page activepage"]') - current_page = int(active_page_element.text) + html = driver.page_source + savePage(driver, html, link) + + # Get all product links on the current page + products_list = productPages(html) + for item in products_list: + itemURL = urlparse.urljoin(baseURL, str(item)) + try: + driver.get(itemURL) + except: + driver.refresh() + savePage(driver, driver.page_source, item) + driver.back() # Go back to listing after visiting each product - # Locate the next page link - try: - next_page_element = active_page_element.find_element(By.XPATH, 'following-sibling::a[1]') - link = next_page_element.get_attribute('href') - except NoSuchElementException: - has_next_page = False + # comment out + # break + + # comment out + if count == 1: + break + + # Locate the next page link + try: + # Find the active page number + active_page_element = driver.find_element(By.XPATH, '//div[@class="page activepage"]') + # current_page = int(active_page_element.text) + + next_page_element = active_page_element.find_element(By.XPATH, 'following-sibling::a[1]') + link = next_page_element.get_attribute('href') + if link == "": + raise NoSuchElementException + count += 1 + + except NoSuchElementException: + has_next_page = False + + except Exception as e: + print(link, e) - print("Crawling Anon Market done.") + print("Crawling the Anon Market done.") # Returns 'True' if the link is a description link #@param: url of any url crawled diff --git a/MarketPlaces/AnonMarket/parser.py b/MarketPlaces/AnonMarket/parser.py index c53283c..c5c7f6d 100644 --- a/MarketPlaces/AnonMarket/parser.py +++ b/MarketPlaces/AnonMarket/parser.py @@ -15,25 +15,27 @@ import re def AnonMarket_description_parser(soup): # Fields to be parsed - vendor = "-1" # 0 *Vendor_Name - success = "-1" # 1 Vendor_Successful_Transactions - rating_vendor = "-1" # 2 Vendor_Rating - name = "-1" # 3 *Product_Name - describe = "-1" # 4 Product_Description - CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much - MS = "-1" # 6 Product_MS_Classification (Microsoft Security) dont worry about that much - category = "-1" # 7 Product_Category - views = "-1" # 8 Product_Number_Of_Views - reviews = "-1" # 9 Product_Number_Of_Reviews - rating_item = "-1" # 10 Product_Rating - addDate = "-1" # 11 Product_AddedDate - BTC = "-1" # 12 Product_BTC_SellingPrice - USD = "-1" # 13 Product_USD_SellingPrice - EURO = "-1" # 14 Product_EURO_SellingPrice - sold = "-1" # 15 Product_QuantitySold - left = "-1" # 16 Product_QuantityLeft - shipFrom = "-1" # 17 Product_ShippedFrom - shipTo = "-1" # 18 Product_ShippedTo + vendor = "-1" # 0 *Vendor_Name + success = "-1" # 1 Vendor_Successful_Transactions + rating_vendor = "-1" # 2 Vendor_Rating + name = "-1" # 3 *Product_Name + describe = "-1" # 4 Product_Description + CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) + MS = "-1" # 6 Product_MS_Classification (Microsoft Security) + category = "-1" # 7 Product_Category + views = "-1" # 8 Product_Number_Of_Views + reviews = "-1" # 9 Product_Number_Of_Reviews + rating_item = "-1" # 10 Product_Rating + addDate = "-1" # 11 Product_AddedDate + BTC = "-1" # 12 Product_BTC_SellingPrice + USD = "-1" # 13 Product_USD_SellingPrice + EURO = "-1" # 14 Product_EURO_SellingPrice + sold = "-1" # 15 Product_QuantitySold + left = "-1" # 16 Product_QuantityLeft + shipFrom = "-1" # 17 Product_ShippedFrom + shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image name_of_product = soup.find("div", {"class": "heading"}).text name = cleanString(name_of_product.strip()) @@ -70,8 +72,7 @@ def AnonMarket_description_parser(soup): # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, - BTC, USD, EURO, sold, left, shipFrom, shipTo) - + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -85,27 +86,29 @@ def AnonMarket_listing_parser(soup): # Fields to be parsed nm = 0 # *Total_Products (Should be Integer) - mktName = "AnonMarket" # 0 *Marketplace_Name - vendor = [] # 1 *Vendor y - rating_vendor = [] # 2 Vendor_Rating - success = [] # 3 Vendor_Successful_Transactions - name = [] # 4 *Product_Name y - CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) - MS = [] # 6 Product_MS_Classification (Microsoft Security) - category = [] # 7 Product_Category y - describe = [] # 8 Product_Description - views = [] # 9 Product_Number_Of_Views - reviews = [] # 10 Product_Number_Of_Reviews - rating_item = [] # 11 Product_Rating - addDate = [] # 12 Product_AddDate - BTC = [] # 13 Product_BTC_SellingPrice - USD = [] # 14 Product_USD_SellingPrice y - EURO = [] # 15 Product_EURO_SellingPrice - sold = [] # 16 Product_QuantitySold - qLeft = [] # 17 Product_QuantityLeft - shipFrom = [] # 18 Product_ShippedFrom - shipTo = [] # 19 Product_ShippedTo - href = [] # 20 Product_Links + mktName = "AnonMarket" # 0 *Marketplace_Name + vendor = [] # 1 *Vendor y + rating_vendor = [] # 2 Vendor_Rating + success = [] # 3 Vendor_Successful_Transactions + name = [] # 4 *Product_Name y + CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this + MS = [] # 6 Product_MS_Classification (Microsoft Security) dont worry about this + category = [] # 7 Product_Category y + describe = [] # 8 Product_Description + views = [] # 9 Product_Number_Of_Views + reviews = [] # 10 Product_Number_Of_Reviews + rating_item = [] # 11 Product_Rating + addDate = [] # 12 Product_AddDate + BTC = [] # 13 Product_BTC_SellingPrice + USD = [] # 14 Product_USD_SellingPrice y + EURO = [] # 15 Product_EURO_SellingPrice + sold = [] # 16 Product_QuantitySold + qLeft = [] # 17 Product_QuantityLeft + shipFrom = [] # 18 Product_ShippedFrom + shipTo = [] # 19 Product_ShippedTo + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links base_url = "http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion" products_list = soup.find_all('div', {'class': 'item'}) @@ -155,30 +158,8 @@ def AnonMarket_listing_parser(soup): continue # Populate the final variable (this should be a list with all fields scraped) - return organizeProducts( - marketplace = "AnonMarket", - nm = nm, - vendor = vendor, - rating_vendor = rating_vendor, - success_vendor = success, - nombre = name, - CVE = CVE, - MS = MS, - category = category, - describe = describe, - views = views, - reviews = reviews, - rating_item = rating_item, - addDate = addDate, - BTC = BTC, - USD = USD, - EURO = EURO, - sold = sold, - qLeft = qLeft, - shipFrom = shipFrom, - shipTo = shipTo, - href = href - ) + return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) #called by the crawler to get description links on a listing page diff --git a/MarketPlaces/CityMarket/parser.py b/MarketPlaces/CityMarket/parser.py index e5f3575..75ca4fa 100644 --- a/MarketPlaces/CityMarket/parser.py +++ b/MarketPlaces/CityMarket/parser.py @@ -15,29 +15,27 @@ def city_description_parser(soup): # Fields to be parsed - name = "-1" # 0 Product_Name - describe = "-1" # 1 Product_Description - lastSeen = "-1" # 2 Product_LastViewDate - rules = "-1" # 3 NOT USED ... - CVE = "-1" # 4 Product_CVE_Classification (Common Vulnerabilities and Exposures) - MS = "-1" # 5 Product_MS_Classification (Microsoft Security) - review = "-1" # 6 Product_Number_Of_Reviews + vendor = "-1" # 0 *Vendor_Name + success = "-1" # 1 Vendor_Successful_Transactions + rating_vendor = "-1" # 2 Vendor_Rating + name = "-1" # 3 *Product_Name + describe = "-1" # 4 Product_Description + CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) + MS = "-1" # 6 Product_MS_Classification (Microsoft Security) category = "-1" # 7 Product_Category - shipFrom = "-1" # 8 Product_ShippedFrom - shipTo = "-1" # 9 Product_ShippedTo - left = "-1" # 10 Product_QuantityLeft - escrow = "-1" # 11 Vendor_Warranty - terms = "-1" # 12 Vendor_TermsAndConditions - vendor = "-1" # 13 Vendor_Name - sold = "-1" # 14 Product_QuantitySold - addDate = "-1" # 15 Product_AddedDate - available = "-1" # 16 NOT USED ... - endDate = "-1" # 17 NOT USED ... - BTC = "-1" # 18 Product_BTC_SellingPrice - USD = "-1" # 19 Product_USD_SellingPrice - rating = "-1" # 20 Vendor_Rating - success = "-1" # 21 Vendor_Successful_Transactions - EURO = "-1" # 22 Product_EURO_SellingPrice + views = "-1" # 8 Product_Number_Of_Views + reviews = "-1" # 9 Product_Number_Of_Reviews + rating_item = "-1" # 10 Product_Rating + addDate = "-1" # 11 Product_AddedDate + BTC = "-1" # 12 Product_BTC_SellingPrice + USD = "-1" # 13 Product_USD_SellingPrice + EURO = "-1" # 14 Product_EURO_SellingPrice + sold = "-1" # 15 Product_QuantitySold + left = "-1" # 16 Product_QuantityLeft + shipFrom = "-1" # 17 Product_ShippedFrom + shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image divmd7 = soup.find('div', {'class': "col-md-7"}) ptag = soup.findAll('p') @@ -79,8 +77,7 @@ def city_description_parser(soup): # Finding the Product description describe = soup.find('div', {'class': "text-white"}).text - describe = describe.replace("\n", " ") - describe = describe.strip() + describe = cleanString(describe.strip()) '''# Finding the Number of Product Reviews tag = soup.findAll(text=re.compile('Reviews')) @@ -114,8 +111,8 @@ def city_description_parser(soup): MS = MS.replace('\n', '') # Populating the final variable (this should be a list with all fields scraped) - row = (name, describe, lastSeen, rules, CVE, MS, review, category, shipFrom, shipTo, left, escrow, terms, vendor, - sold, addDate, available, endDate, BTC, USD, rating, success, EURO) + row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -128,29 +125,30 @@ def city_description_parser(soup): def city_listing_parser(soup): # Fields to be parsed - nm = 0 # Total_Products (Should be Integer) - mktName = "CityMarket" # 0 Marketplace_Name - name = [] # 1 Product_Name - CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures) - MS = [] # 3 Product_MS_Classification (Microsoft Security) - category = [] # 4 Product_Category - describe = [] # 5 Product_Description - escrow = [] # 6 Vendor_Warranty - views = [] # 7 Product_Number_Of_Views - reviews = [] # 8 Product_Number_Of_Reviews - addDate = [] # 9 Product_AddDate - lastSeen = [] # 10 Product_LastViewDate - BTC = [] # 11 Product_BTC_SellingPrice - USD = [] # 12 Product_USD_SellingPrice - EURO = [] # 13 Product_EURO_SellingPrice - sold = [] # 14 Product_QuantitySold - qLeft =[] # 15 Product_QuantityLeft - shipFrom = [] # 16 Product_ShippedFrom - shipTo = [] # 17 Product_ShippedTo - vendor = [] # 18 Vendor - rating = [] # 19 Vendor_Rating - success = [] # 20 Vendor_Successful_Transactions - href = [] # 23 Product_Links (Urls) + nm = 0 # *Total_Products (Should be Integer) + mktName = "CityMarket" # 0 *Marketplace_Name + vendor = [] # 1 *Vendor y + rating_vendor = [] # 2 Vendor_Rating + success = [] # 3 Vendor_Successful_Transactions + name = [] # 4 *Product_Name y + CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this + MS = [] # 6 Product_MS_Classification (Microsoft Security) dont worry about this + category = [] # 7 Product_Category y + describe = [] # 8 Product_Description + views = [] # 9 Product_Number_Of_Views + reviews = [] # 10 Product_Number_Of_Reviews + rating_item = [] # 11 Product_Rating + addDate = [] # 12 Product_AddDate + BTC = [] # 13 Product_BTC_SellingPrice + USD = [] # 14 Product_USD_SellingPrice y + EURO = [] # 15 Product_EURO_SellingPrice + sold = [] # 16 Product_QuantitySold + qLeft = [] # 17 Product_QuantityLeft + shipFrom = [] # 18 Product_ShippedFrom + shipTo = [] # 19 Product_ShippedTo + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links listing = soup.findAll('div', {"class": "card"}) @@ -227,8 +225,8 @@ def city_listing_parser(soup): MS.append(MSValue) # Populate the final variable (this should be a list with all fields scraped) - return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen, - BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href) + return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) #called by the crawler to get description links on a listing page diff --git a/MarketPlaces/Initialization/marketsList.txt b/MarketPlaces/Initialization/marketsList.txt index 540b444..87f811c 100644 --- a/MarketPlaces/Initialization/marketsList.txt +++ b/MarketPlaces/Initialization/marketsList.txt @@ -1,12 +1 @@ -Apocalypse -DarkBazar -DarkMatter -DigitalThriftShop -HiddenMarket -LionMarketplace -Nexus -Robinhood -ThiefWorld -TorBay -TorMarket -ViceCity \ No newline at end of file +ThiefWorld \ No newline at end of file diff --git a/MarketPlaces/Initialization/markets_mining.py b/MarketPlaces/Initialization/markets_mining.py index 62e97f8..21888fc 100644 --- a/MarketPlaces/Initialization/markets_mining.py +++ b/MarketPlaces/Initialization/markets_mining.py @@ -27,6 +27,7 @@ from MarketPlaces.CypherMarketplace.crawler_selenium import crawler as crawlerCy from MarketPlaces.DarkBazar.crawler_selenium import crawler as crawlerDarkBazar from MarketPlaces.PabloEscobarMarket.crawler_selenium import crawler as crawlerPabloEscobar from MarketPlaces.AnonMarket.crawler_selenium import crawler as crawlerAnonMarket +from MarketPlaces.MetaVerseMarket.crawler_selenium import crawler as crawlerMetaVerse import configparser import os @@ -146,5 +147,7 @@ if __name__ == '__main__': crawlerPabloEscobar() elif mkt == "AnonMarket": crawlerAnonMarket() + elif mkt == "MetaVerseMarket": + crawlerMetaVerse() print("\nScraping process completed!") diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py index 60abf80..1cc5af5 100644 --- a/MarketPlaces/Initialization/prepare_parser.py +++ b/MarketPlaces/Initialization/prepare_parser.py @@ -24,6 +24,8 @@ from MarketPlaces.MikesGrandStore.parser import * from MarketPlaces.DarkBazar.parser import * from MarketPlaces.PabloEscobarMarket.parser import * from MarketPlaces.AnonMarket.parser import * +from MarketPlaces.CityMarket.parser import * +from MarketPlaces.MetaVerseMarket.parser import * from MarketPlaces.Classifier.classify_product import predict @@ -161,6 +163,10 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile): rw = pabloescobarmarket_listing_parser(soup) elif marketPlace == "AnonMarket": rw = AnonMarket_listing_parser(soup) + elif marketPlace == "CityMarket": + rw = city_listing_parser(soup) + elif marketPlace == "MetaVerseMarket": + rw = metaversemarket_listing_parser(soup) else: print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!") raise Exception @@ -218,6 +224,10 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile): rmm = pabloescobarmarket_description_parser(soup) elif marketPlace == "AnonMarket": rmm = AnonMarket_description_parser(soup) + elif marketPlace == "CityMarket": + rmm = city_description_parser(soup) + elif marketPlace == "MetaVerseMarket": + rmm = metaversemarket_description_parser(soup) else: print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!") raise Exception @@ -243,18 +253,13 @@ def persist_record(url, rec, cur, con, createLog, logFile, listingFile, descript con.rollback() - trace = traceback.format_exc() - - if trace.find("already exists") == -1: - incrementError() - print(f"There was a problem to persist the files ({listingFile} + {descriptionFile}) in the database!") - traceback.print_exc() - if createLog: - logFile.write( - str(nError) + f". There was a problem to persist the files ({listingFile} + {descriptionFile}) in the database!\n") - return False - else: - return True + incrementError() + print(f"There was a problem to persist the files ({listingFile} + {descriptionFile}) in the database!") + traceback.print_exc() + if createLog: + logFile.write( + str(nError) + f". There was a problem to persist the files ({listingFile} + {descriptionFile}) in the database!\n") + return False def move_file(filePath, createLog, logFile): diff --git a/MarketPlaces/MetaVerseMarket/crawler_selenium.py b/MarketPlaces/MetaVerseMarket/crawler_selenium.py index 49760fc..d5783a4 100644 --- a/MarketPlaces/MetaVerseMarket/crawler_selenium.py +++ b/MarketPlaces/MetaVerseMarket/crawler_selenium.py @@ -186,10 +186,10 @@ def getInterestedLinks(): # hacking links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hacking') - # hosting - links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hosting') - # hacking guides and tutorials - links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hacking-guides-and-tutorials') + # # hosting + # links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hosting') + # # hacking guides and tutorials + # links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hacking-guides-and-tutorials') return links @@ -236,7 +236,7 @@ def crawlForum(driver): break try: - link = driver.find_element(by=By.XPATH, value='//a[@class="page-link-next"]').get_attribute('href') + link = driver.find_element(by=By.PARTIAL_LINK_TEXT, value='Next').get_attribute('href') if link == "": raise NoSuchElementException count += 1 diff --git a/MarketPlaces/MetaVerseMarket/parser.py b/MarketPlaces/MetaVerseMarket/parser.py index 047db35..c43b823 100644 --- a/MarketPlaces/MetaVerseMarket/parser.py +++ b/MarketPlaces/MetaVerseMarket/parser.py @@ -14,55 +14,52 @@ from bs4 import BeautifulSoup def metaversemarket_description_parser(soup): # Fields to be parsed - name = "-1" # 0 Product_Name - describe = "-1" # 1 Product_Description - lastSeen = "-1" # 2 Product_LastViewDate - CVE = "-1" # 4 Product_CVE_Classification (Common Vulnerabilities and Exposures) - MS = "-1" # 5 Product_MS_Classification (Microsoft Security) - review = "-1" # 6 Product_Number_Of_Reviews - category = "-1" # 7 Product_Category - shipFrom = "-1" # 8 Product_ShippedFrom - shipTo = "-1" # 9 Product_ShippedTo - left = "-1" # 10 Product_QuantityLeft - escrow = "-1" # 11 Vendor_Warranty - terms = "-1" # 12 Vendor_TermsAndConditions - vendor = "-1" # 13 Vendor_Name - sold = "-1" # 14 Product_QuantitySold - addDate = "-1" # 15 Product_AddedDate - BTC = "-1" # 18 Product_BTC_SellingPrice - USD = "-1" # 19 Product_USD_SellingPrice - rating = "-1" # 20 Vendor_Rating - success = "-1" # 21 Vendor_Successful_Transactions - EURO = "-1" # 22 Product_EURO_SellingPrice + vendor = "-1" # 0 *Vendor_Name + success = "-1" # 1 Vendor_Successful_Transactions + rating_vendor = "-1" # 2 Vendor_Rating + name = "-1" # 3 *Product_Name + describe = "-1" # 4 Product_Description + CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) + MS = "-1" # 6 Product_MS_Classification (Microsoft Security) + category = "-1" # 7 Product_Category + views = "-1" # 8 Product_Number_Of_Views + reviews = "-1" # 9 Product_Number_Of_Reviews + rating_item = "-1" # 10 Product_Rating + addDate = "-1" # 11 Product_AddedDate + BTC = "-1" # 12 Product_BTC_SellingPrice + USD = "-1" # 13 Product_USD_SellingPrice + EURO = "-1" # 14 Product_EURO_SellingPrice + sold = "-1" # 15 Product_QuantitySold + left = "-1" # 16 Product_QuantityLeft + shipFrom = "-1" # 17 Product_ShippedFrom + shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image # Finding Product Name - name = soup.find('div', {'class': "panel-heading"}).text.strip + name = soup.find('div', {'class': "panel-heading"}).text + name = cleanString(name.strip()) # Finding Vendor temp = soup.findAll('div', {'class': "col-xs-12 col-sm-6 mt-5"}) temp = temp[1].findAll('span') - temp = temp[1].find('b').text - name = temp.replace("@", "") + vendor = temp[1].find('b').text + vendor = cleanString(vendor.strip()) # Finding Product Reviews - review = soup.find('span', {'class': "badge bg-success fs-12px"}).text.strip() + reviews = soup.find('span', {'class': "badge bg-success fs-12px"}).text.strip() # Finding Successful Transactions # NA - # Finding Prices - USD = soup.find('h3', {'class': "mb-2"}).text() + USD = soup.find('h3', {'class': "mb-2"}).text USD = USD.replace("Price: $", "").strip() - # Finding Escrow - escrow = soup.find('div', {'class': "alert alert-info text-center fw-bold"}).text - escrow = escrow.replace('You are protected by ', "").strip() - # Finding the Product Category - temp = soup.select('div[class="mt-2"]') - temp = temp[0].findAll('span') - category = temp[1].text.strip() + temp = soup.select('div[class="mt-2"]')[1].text + temp = temp.replace("Category:", "") + category = temp.strip() # Finding the Product Quantity Available # temp = soup.find('em', {'class': "icon ni ni-layers-fill"}).parent.parent.parent @@ -78,8 +75,8 @@ def metaversemarket_description_parser(soup): # Finding Shipment Information (Origin) - temp = soup.findAll('div', {'class': "alert alert-info"}) - temp = temp[1].text.split("to") + temp = soup.find('div', {'class': "alert alert-info"}).text + temp = temp.split("to") shipFrom = temp[0].replace("Shipping from ", "").strip() # Finding Shipment Information (Destination) @@ -123,8 +120,8 @@ def metaversemarket_description_parser(soup): MS = MS.replace('\n', '') # Populating the final variable (this should be a list with all fields scraped) - row = (name, describe, lastSeen, CVE, MS, review, category, shipFrom, shipTo, left, escrow, terms, vendor, - sold, addDate, BTC, USD, rating, success, EURO) + row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -136,29 +133,30 @@ def metaversemarket_description_parser(soup): # return: 'row' that contains a variety of lists that each hold info on the listing page def metaversemarket_listing_parser(soup): # Fields to be parsed - nm = 0 # Total_Products (Should be Integer) - mktName = "DarkFox" # 0 Marketplace_Name - name = [] # 1 Product_Name - CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures) - MS = [] # 3 Product_MS_Classification (Microsoft Security) - category = [] # 4 Product_Category - describe = [] # 5 Product_Description - escrow = [] # 6 Vendor_Warranty - views = [] # 7 Product_Number_Of_Views - reviews = [] # 8 Product_Number_Of_Reviews - addDate = [] # 9 Product_AddDate - lastSeen = [] # 10 Product_LastViewDate - BTC = [] # 11 Product_BTC_SellingPrice - USD = [] # 12 Product_USD_SellingPrice - EURO = [] # 13 Product_EURO_SellingPrice - sold = [] # 14 Product_QuantitySold - qLeft = [] # 15 Product_QuantityLeft - shipFrom = [] # 16 Product_ShippedFrom - shipTo = [] # 17 Product_ShippedTo - vendor = [] # 18 Vendor - rating = [] # 19 Vendor_Rating - success = [] # 20 Vendor_Successful_Transactions - href = [] # 23 Product_Links (Urls) + nm = 0 # *Total_Products (Should be Integer) + mktName = "MetaVerseMarket" # 0 *Marketplace_Name + vendor = [] # 1 *Vendor y + rating_vendor = [] # 2 Vendor_Rating + success = [] # 3 Vendor_Successful_Transactions + name = [] # 4 *Product_Name y + CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this + MS = [] # 6 Product_MS_Classification (Microsoft Security) dont worry about this + category = [] # 7 Product_Category y + describe = [] # 8 Product_Description + views = [] # 9 Product_Number_Of_Views + reviews = [] # 10 Product_Number_Of_Reviews + rating_item = [] # 11 Product_Rating + addDate = [] # 12 Product_AddDate + BTC = [] # 13 Product_BTC_SellingPrice + USD = [] # 14 Product_USD_SellingPrice y + EURO = [] # 15 Product_EURO_SellingPrice + sold = [] # 16 Product_QuantitySold + qLeft = [] # 17 Product_QuantityLeft + shipFrom = [] # 18 Product_ShippedFrom + shipTo = [] # 19 Product_ShippedTo + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links listing = soup.findAll('div', {"class": "col-12 col-sm-4 col-xl-3 product_item_col p-1"}) @@ -175,11 +173,7 @@ def metaversemarket_listing_parser(soup): # Finding the Product product = bae[1].find('span', {"class": "text-primary"}).text - product = product.replace('\n', ' ') - product = product.replace(",", "") - product = product.replace("...", "") - product = product.strip() - name.append(product) + name.append(cleanString(product.strip())) # Finding Prices price = a.find('strong').text @@ -191,20 +185,18 @@ def metaversemarket_listing_parser(soup): # Finding the Vendor temp = a.find('div', {'class': "mt-1 fs-12px"}) temp = temp.findAll('span') - temp = temp[1].find('b').text - vendor_name = temp.replace("@", "").strip() - vendor.append(vendor_name) + vendor_name = temp[1].find('b').text + vendor.append(cleanString(vendor_name.strip())) # Finding the Category cat = a.select_one('div[class="fs-12px"]') cat = cat.findAll('span')[1].text - cat = cat.text cat = cat.strip() category.append(cat) - badge = a.findAll('span', {'class': "badge bg-success"}) + badge = a.find('span', {'class': "badge bg-success"}) # Finding Number Sold and Quantity Left - temp = badge[1].text + temp = badge.text temp = temp.split("/") num = temp[0] num = num.strip() @@ -226,11 +218,7 @@ def metaversemarket_listing_parser(soup): description = a.find('p', {'class': "alert alert-light text-ssbold p-1"}).text description = description.replace("\n", " ") description = description.strip() - describe.append(description) - - # Finding Escrow - es = a.find('span', {'class': "fw-bold"}).text.strip() - escrow.append(es) + describe.append(cleanString(description)) # Finding Number of Views view = a.find('span', {'class': "badge bg-primary"}).text.strip() @@ -239,11 +227,11 @@ def metaversemarket_listing_parser(soup): # Find where ships from ships = a.find('div', {'class': "alert alert-info item_alert fs-12px p-1"}) ships = ships.findAll('b') - sFrom = ships[0].text.strips() + sFrom = ships[0].text.strip() shipFrom.append(sFrom) # Find where it ships to - sTo = ships[1].text.strips() + sTo = ships[1].text.strip() shipTo.append(sTo) # Searching for CVE and MS categories @@ -274,8 +262,8 @@ def metaversemarket_listing_parser(soup): MS.append(MSValue) # Populate the final variable (this should be a list with all fields scraped) - return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen, - BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href) + return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) # called by the crawler to get description links on a listing page diff --git a/MarketPlaces/PabloEscobarMarket/crawler_selenium.py b/MarketPlaces/PabloEscobarMarket/crawler_selenium.py index 8dc783c..7f516ff 100644 --- a/MarketPlaces/PabloEscobarMarket/crawler_selenium.py +++ b/MarketPlaces/PabloEscobarMarket/crawler_selenium.py @@ -209,7 +209,7 @@ def crawlForum(driver): driver.back() # comment out - # break + break # comment out if count == 1: diff --git a/MarketPlaces/PabloEscobarMarket/parser.py b/MarketPlaces/PabloEscobarMarket/parser.py index ecdd086..a716581 100644 --- a/MarketPlaces/PabloEscobarMarket/parser.py +++ b/MarketPlaces/PabloEscobarMarket/parser.py @@ -14,26 +14,27 @@ from bs4 import BeautifulSoup def pabloescobarmarket_description_parser(soup): # Fields to be parsed - name = "-1" # 0 Product_Name - describe = "-1" # 1 Product_Description - lastSeen = "-1" # 2 Product_LastViewDate - CVE = "-1" # 4 Product_CVE_Classification (Common Vulnerabilities and Exposures) - MS = "-1" # 5 Product_MS_Classification (Microsoft Security) - review = "-1" # 6 Product_Number_Of_Reviews - category = "-1" # 7 Product_Category - shipFrom = "-1" # 8 Product_ShippedFrom - shipTo = "-1" # 9 Product_ShippedTo - left = "-1" # 10 Product_QuantityLeft - escrow = "-1" # 11 Vendor_Warranty - terms = "-1" # 12 Vendor_TermsAndConditions - vendor = "-1" # 13 Vendor_Name - sold = "-1" # 14 Product_QuantitySold - addDate = "-1" # 15 Product_AddedDate - BTC = "-1" # 18 Product_BTC_SellingPrice - USD = "-1" # 19 Product_USD_SellingPrice - rating = "-1" # 20 Vendor_Rating - success = "-1" # 21 Vendor_Successful_Transactions - EURO = "-1" # 22 Product_EURO_SellingPrice + vendor = "-1" # 0 *Vendor_Name + success = "-1" # 1 Vendor_Successful_Transactions + rating_vendor = "-1" # 2 Vendor_Rating + name = "-1" # 3 *Product_Name + describe = "-1" # 4 Product_Description + CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) + MS = "-1" # 6 Product_MS_Classification (Microsoft Security) + category = "-1" # 7 Product_Category + views = "-1" # 8 Product_Number_Of_Views + reviews = "-1" # 9 Product_Number_Of_Reviews + rating_item = "-1" # 10 Product_Rating + addDate = "-1" # 11 Product_AddedDate + BTC = "-1" # 12 Product_BTC_SellingPrice + USD = "-1" # 13 Product_USD_SellingPrice + EURO = "-1" # 14 Product_EURO_SellingPrice + sold = "-1" # 15 Product_QuantitySold + left = "-1" # 16 Product_QuantityLeft + shipFrom = "-1" # 17 Product_ShippedFrom + shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image # Finding Product Name # NA @@ -109,8 +110,8 @@ def pabloescobarmarket_description_parser(soup): MS = MS.replace('\n', '') # Populating the final variable (this should be a list with all fields scraped) - row = (name, describe, lastSeen, CVE, MS, review, category, shipFrom, shipTo, left, escrow, terms, vendor, - sold, addDate, BTC, USD, rating, success, EURO) + row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -122,29 +123,30 @@ def pabloescobarmarket_description_parser(soup): # return: 'row' that contains a variety of lists that each hold info on the listing page def pabloescobarmarket_listing_parser(soup): # Fields to be parsed - nm = 0 # Total_Products (Should be Integer) - mktName = "PabloEscobarMarket" # 0 Marketplace_Name - name = [] # 1 Product_Name - CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures) - MS = [] # 3 Product_MS_Classification (Microsoft Security) - category = [] # 4 Product_Category - describe = [] # 5 Product_Description - escrow = [] # 6 Vendor_Warranty - views = [] # 7 Product_Number_Of_Views - reviews = [] # 8 Product_Number_Of_Reviews - addDate = [] # 9 Product_AddDate - lastSeen = [] # 10 Product_LastViewDate - BTC = [] # 11 Product_BTC_SellingPrice - USD = [] # 12 Product_USD_SellingPrice - EURO = [] # 13 Product_EURO_SellingPrice - sold = [] # 14 Product_QuantitySold - qLeft = [] # 15 Product_QuantityLeft - shipFrom = [] # 16 Product_ShippedFrom - shipTo = [] # 17 Product_ShippedTo - vendor = [] # 18 Vendor - rating = [] # 19 Vendor_Rating - success = [] # 20 Vendor_Successful_Transactions - href = [] # 23 Product_Links (Urls) + nm = 0 # *Total_Products (Should be Integer) + mktName = "PabloEscobarMarket" # 0 *Marketplace_Name + vendor = [] # 1 *Vendor y + rating_vendor = [] # 2 Vendor_Rating + success = [] # 3 Vendor_Successful_Transactions + name = [] # 4 *Product_Name y + CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this + MS = [] # 6 Product_MS_Classification (Microsoft Security) dont worry about this + category = [] # 7 Product_Category y + describe = [] # 8 Product_Description + views = [] # 9 Product_Number_Of_Views + reviews = [] # 10 Product_Number_Of_Reviews + rating_item = [] # 11 Product_Rating + addDate = [] # 12 Product_AddDate + BTC = [] # 13 Product_BTC_SellingPrice + USD = [] # 14 Product_USD_SellingPrice y + EURO = [] # 15 Product_EURO_SellingPrice + sold = [] # 16 Product_QuantitySold + qLeft = [] # 17 Product_QuantityLeft + shipFrom = [] # 18 Product_ShippedFrom + shipTo = [] # 19 Product_ShippedTo + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links listing = soup.findAll('div', {"class": "p-4"}) @@ -220,8 +222,8 @@ def pabloescobarmarket_listing_parser(soup): MS.append(MSValue) # Populate the final variable (this should be a list with all fields scraped) - return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen, - BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href) + return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) # called by the crawler to get description links on a listing page diff --git a/MarketPlaces/ThiefWorld/crawler_selenium.py b/MarketPlaces/ThiefWorld/crawler_selenium.py index 16f60b0..af5a456 100644 --- a/MarketPlaces/ThiefWorld/crawler_selenium.py +++ b/MarketPlaces/ThiefWorld/crawler_selenium.py @@ -87,8 +87,8 @@ def createFFDriver(): ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True) ff_prof.set_preference("signon.rememberSignons", False) ff_prof.set_preference("network.cookie.lifetimePolicy", 2) - ff_prof.set_preference("network.dns.disablePrefetch", True) - ff_prof.set_preference("network.http.sendRefererHeader", 0) + # ff_prof.set_preference("network.dns.disablePrefetch", True) + # ff_prof.set_preference("network.http.sendRefererHeader", 0) ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) @@ -180,8 +180,8 @@ def getNameFromURL(url): def getInterestedLinks(): links = [] - # Hacking and DDOS - links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/35') + # Hacking and DOSS + links.append(['Hacking and DOSS', 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/35']) # # Carding Manuals # links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/20') # # Software @@ -202,7 +202,8 @@ def crawlForum(driver): i = 0 while i < len(linksToCrawl): - link = linksToCrawl[i] + cat = linksToCrawl[i][0] + link = linksToCrawl[i][1] print('Crawling :', link) try: has_next_page = True @@ -214,6 +215,7 @@ def crawlForum(driver): except: driver.refresh() html = driver.page_source + html += f"{cat}" savePage(driver, html, link) list = productPages(html) diff --git a/MarketPlaces/ThiefWorld/parser.py b/MarketPlaces/ThiefWorld/parser.py index bd6c371..ba0f51c 100644 --- a/MarketPlaces/ThiefWorld/parser.py +++ b/MarketPlaces/ThiefWorld/parser.py @@ -66,8 +66,6 @@ def thiefWorld_description_parser(soup: BeautifulSoup) -> Tuple: rating_item = rating_item.replace("rating", "") rating_item = cleanString(rating_item.strip()) - category = "Hacking, DOSS" - # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) @@ -126,7 +124,9 @@ def thiefWorld_listing_parser(soup: BeautifulSoup): CVE.append('-1') MS.append('-1') - category.append('Hacking, DOSS') + + cat = soup.find('calsys-cat').text + category.append(cat.strip()) productDescription = product.find('div', {'class': 'text'}).text productDescription = cleanString(productDescription.strip()) diff --git a/MarketPlaces/TorMarket/crawler_selenium.py b/MarketPlaces/TorMarket/crawler_selenium.py index de75f89..86fde52 100644 --- a/MarketPlaces/TorMarket/crawler_selenium.py +++ b/MarketPlaces/TorMarket/crawler_selenium.py @@ -215,14 +215,14 @@ def crawlForum(driver): driver.back() # comment out - break + # break # comment out if count == 1: break try: - link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href') + link = driver.find_element(by=By.LINK_TEXT, value='NEXT').get_attribute('href') if link == "": raise NoSuchElementException count += 1 diff --git a/MarketPlaces/TorMarket/parser.py b/MarketPlaces/TorMarket/parser.py index e6e14b9..417f8ac 100644 --- a/MarketPlaces/TorMarket/parser.py +++ b/MarketPlaces/TorMarket/parser.py @@ -16,29 +16,27 @@ def tormarket_description_parser(soup): # Fields to be parsed - name = "-1" # 0 Product_Name - describe = "-1" # 1 Product_Description - lastSeen = "-1" # 2 Product_LastViewDate - rules = "-1" # 3 NOT USED ... - CVE = "-1" # 4 Product_CVE_Classification (Common Vulnerabilities and Exposures) - MS = "-1" # 5 Product_MS_Classification (Microsoft Security) - review = "-1" # 6 Product_Number_Of_Reviews + vendor = "-1" # 0 *Vendor_Name + success = "-1" # 1 Vendor_Successful_Transactions + rating_vendor = "-1" # 2 Vendor_Rating + name = "-1" # 3 *Product_Name + describe = "-1" # 4 Product_Description + CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) + MS = "-1" # 6 Product_MS_Classification (Microsoft Security) category = "-1" # 7 Product_Category - shipFrom = "-1" # 8 Product_ShippedFrom - shipTo = "-1" # 9 Product_ShippedTo - left = "-1" # 10 Product_QuantityLeft - escrow = "-1" # 11 Vendor_Warranty - terms = "-1" # 12 Vendor_TermsAndConditions - vendor = "-1" # 13 Vendor_Name - sold = "-1" # 14 Product_QuantitySold - addDate = "-1" # 15 Product_AddedDate - available = "-1" # 16 NOT USED ... - endDate = "-1" # 17 NOT USED ... - BTC = "-1" # 18 Product_BTC_SellingPrice - USD = "-1" # 19 Product_USD_SellingPrice - rating = "-1" # 20 Vendor_Rating - success = "-1" # 21 Vendor_Successful_Transactions - EURO = "-1" # 22 Product_EURO_SellingPrice + views = "-1" # 8 Product_Number_Of_Views + reviews = "-1" # 9 Product_Number_Of_Reviews + rating_item = "-1" # 10 Product_Rating + addDate = "-1" # 11 Product_AddedDate + BTC = "-1" # 12 Product_BTC_SellingPrice + USD = "-1" # 13 Product_USD_SellingPrice + EURO = "-1" # 14 Product_EURO_SellingPrice + sold = "-1" # 15 Product_QuantitySold + left = "-1" # 16 Product_QuantityLeft + shipFrom = "-1" # 17 Product_ShippedFrom + shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image #finding the name of the product name_of_product = soup.find("h1", {"class": "product_title entry-title"}).find("a").text @@ -51,7 +49,7 @@ def tormarket_description_parser(soup): if inquires_about_product == "There are no inquiries yet.": review = 0 else: - review = -1 #fix later pls + review = "-1" #fix later pls #finding the terms and conditions terms_and_conditions = soup.find("div", {"class": "woocommerce-Tabs-panel woocommerce-Tabs-panel--wcfm_enquiry_tab panel entry-content wc-tab"}).find("p").text @@ -68,8 +66,8 @@ def tormarket_description_parser(soup): #everything else gets a -1 because they are not found # Populating the final variable (this should be a list with all fields scraped) - row = (name, describe, lastSeen, rules, CVE, MS, review, category, shipFrom, shipTo, left, escrow, terms, vendor, - sold, addDate, available, endDate, BTC, USD, rating, success, EURO) + row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -82,28 +80,30 @@ def tormarket_description_parser(soup): def tormarket_listing_parser(soup): # Fields to be parsed - nm = 0 # *Total_Products (Should be Integer) - mktName = "TorMarket" # 0 *Marketplace_Name - vendor = [] # 1 *Vendor y - rating_vendor = [] # 2 Vendor_Rating - success = [] # 3 Vendor_Successful_Transactions - name = [] # 4 *Product_Name y - CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) - MS = [] # 6 Product_MS_Classification (Microsoft Security) - category = [] # 7 Product_Category y - describe = [] # 8 Product_Description - views = [] # 9 Product_Number_Of_Views - reviews = [] # 10 Product_Number_Of_Reviews - rating_item = [] # 11 Product_Rating - addDate = [] # 12 Product_AddDate - BTC = [] # 13 Product_BTC_SellingPrice - USD = [] # 14 Product_USD_SellingPrice y - EURO = [] # 15 Product_EURO_SellingPrice - sold = [] # 16 Product_QuantitySold - qLeft = [] # 17 Product_QuantityLeft - shipFrom = [] # 18 Product_ShippedFrom - shipTo = [] # 19 Product_ShippedTo - href = [] # 20 Product_Links + nm = 0 # *Total_Products (Should be Integer) + mktName = "TorMarket" # 0 *Marketplace_Name + vendor = [] # 1 *Vendor y + rating_vendor = [] # 2 Vendor_Rating + success = [] # 3 Vendor_Successful_Transactions + name = [] # 4 *Product_Name y + CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this + MS = [] # 6 Product_MS_Classification (Microsoft Security) dont worry about this + category = [] # 7 Product_Category y + describe = [] # 8 Product_Description + views = [] # 9 Product_Number_Of_Views + reviews = [] # 10 Product_Number_Of_Reviews + rating_item = [] # 11 Product_Rating + addDate = [] # 12 Product_AddDate + BTC = [] # 13 Product_BTC_SellingPrice + USD = [] # 14 Product_USD_SellingPrice y + EURO = [] # 15 Product_EURO_SellingPrice + sold = [] # 16 Product_QuantitySold + qLeft = [] # 17 Product_QuantityLeft + shipFrom = [] # 18 Product_ShippedFrom + shipTo = [] # 19 Product_ShippedTo + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links products_list = soup.find('ul', {"class": "products columns-3 tablet-columns-2 mobile-columns-1"}).find_all('li') nm = len(products_list) @@ -159,30 +159,8 @@ def tormarket_listing_parser(soup): # Populate the final variable (this should be a list with all fields scraped) - return organizeProducts( - marketplace = "TorMarket", - nm = nm, - vendor = vendor, - rating_vendor = rating_vendor, - success_vendor = success, - nombre = name, - CVE = CVE, - MS = MS, - category = category, - describe = describe, - views = views, - reviews = reviews, - rating_item = rating_item, - addDate = addDate, - BTC = BTC, - USD = USD, - EURO = EURO, - sold = sold, - qLeft = qLeft, - shipFrom = shipFrom, - shipTo = shipTo, - href = href - ) + return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) #called by the crawler to get description links on a listing page