__author__ = 'DarkWeb' # Here, we are importing the auxiliary functions to clean or convert data from MarketPlaces.Utilities.utilities import * # Here, we are importing BeautifulSoup to search through the HTML tree from bs4 import BeautifulSoup, ResultSet, Tag # This is the method to parse the Description Pages (one page to each Product in the Listing Pages) def thedarkmarket_description_parser(soup: BeautifulSoup): # Fields to be parsed vendor = "-1" # 0 *Vendor_Name success = "-1" # 1 Vendor_Successful_Transactions rating_vendor = "-1" # 2 Vendor_Rating name = "-1" # 3 *Product_Name describe = "-1" # 4 Product_Description CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) MS = "-1" # 6 Product_MS_Classification (Microsoft Security) category = "-1" # 7 Product_Category views = "-1" # 8 Product_Number_Of_Views reviews = "-1" # 9 Product_Number_Of_Reviews rating_item = "-1" # 10 Product_Rating addDate = "-1" # 11 Product_AddedDate BTC = "-1" # 12 Product_BTC_SellingPrice USD = "-1" # 13 Product_USD_SellingPrice EURO = "-1" # 14 Product_EURO_SellingPrice sold = "-1" # 15 Product_QuantitySold left = "-1" # 16 Product_QuantityLeft shipFrom = "-1" # 17 Product_ShippedFrom shipTo = "-1" # 18 Product_ShippedTo image = "-1" image_vendor = "-1" details: Tag = soup.find("div", {"class": "wc-content"}) vendor = details.find("div", {"class": "product_meta"}).find("a", {"class": "wcvendors_cart_sold_by_meta"}).text name = details.find("h1", {"class": "product_title entry-title"}).text describe_list = [ elem.text for elem in details.find("div", {"id": "tab-description"}).find_all() if elem.name != "h2" ] describe = " ".join(describe_list) categories_list: ResultSet[Tag] = details.find("span", {"class": "posted_in"}).find_all("a") category = "Hacking" reviews = details.find("div", {"class": "review-link"}).get("title") rating_item = details.find("div", {"class": "star-rating"}).get('title') price_container = details.find("p", {"class": "price"}) if not price_container.find("ins"): USD = price_container.find("span", {"class": "woocommerce-Price-amount amount"}).text.replace("$", "") else: USD = price_container.find("ins").find("span", {"class": "woocommerce-Price-amount amount"}).text.replace("$", "") # print(f"\n[desc] Product: {name}") # print(f"[desc] Price: ${USD}\n") # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, BTC, USD, EURO, sold, left, shipFrom, shipTo, image, image_vendor) # Sending the results return row # This is the method to parse the Listing Pages def thedarkmarket_listing_parser(soup: BeautifulSoup): # Fields to be parsed nm = 0 # *Total_Products (Should be Integer) mktName = "TheDarkMarket" # 0 *Marketplace_Name vendor = [] # 1 *Vendor y rating_vendor = [] # 2 Vendor_Rating success = [] # 3 Vendor_Successful_Transactions name = [] # 4 *Product_Name y CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) MS = [] # 6 Product_MS_Classification (Microsoft Security) category = [] # 7 Product_Category y describe = [] # 8 Product_Description views = [] # 9 Product_Number_Of_Views reviews = [] # 10 Product_Number_Of_Reviews rating_item = [] # 11 Product_Rating addDate = [] # 12 Product_AddDate BTC = [] # 13 Product_BTC_SellingPrice USD = [] # 14 Product_USD_SellingPrice y EURO = [] # 15 Product_EURO_SellingPrice sold = [] # 16 Product_QuantitySold qLeft =[] # 17 Product_QuantityLeft shipFrom = [] # 18 Product_ShippedFrom shipTo = [] # 19 Product_ShippedTo image = [] image_vendor = [] href = [] # 20 Product_Links products_list: ResultSet[Tag] = soup.find("ul", {"class": "products columns-3"}).find_all("li") for product in products_list: nm += 1 product_vendor = product.find("small", {"class": "wcvendors_sold_by_in_loop"}).find("a").text vendor.append(cleanString(product_vendor)) # rating_vendor.append("-1") # success.append("-1") product_name = product.find("h2", {"class": "woocommerce-loop-product__title"}).text name.append(cleanString(product_name)) # CVE.append("-1") # MS.append("-1") product_category = product.find("div", {"class": 'product-categories'}).text category.append(cleanString(product_category)) # describe.append("-1") # views.append("-1") # reviews.append("-1") product_rating = product.find("div", {"class": "star-rating"}).get("title") rating_item.append(cleanString(product_rating)) # addDate.append(datetime.now().strftime("%m/%d/%Y ")) # BTC.append("-1") price_container = product.find("span", {"class": "price"}) if not price_container.find("ins"): product_price = price_container.find("span", {"class": "woocommerce-Price-amount amount"}).text.replace("$", "") else: product_price = price_container.find("ins").find("span", {"class": "woocommerce-Price-amount amount"}).text.replace("$", "") USD.append(cleanNumbers(product_price)) # EURO.append("-1") # sold.append("-1") # qLeft.append("-1") # shipTo.append("-1") # shipFrom.append("-1") product_href = product.find("a", {"class": "woocommerce-LoopProduct-link woocommerce-loop-product__link"}).get("href") href.append(product_href) # print(f"\n[list] Product: {product_name}") # print(f"[list] Links: ${product_href}\n") product_images_list = product.find("a", {"class": "tf-loop-product-thumbs-link"}).find("img").get("data-srcset").split(" ") product_image = product_images_list[0] image.append(product_image) # Populate the final variable (this should be a list with all fields scraped) return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) def thedarkmarket_links_parser(soup: BeautifulSoup): # Returning all links that should be visited by the Crawler href = [] listing: ResultSet[Tag] = soup.find("ul", {"class": "products columns-3"}).find_all("li") for li in listing: a = li.find('a', {"class": "woocommerce-LoopProduct-link woocommerce-loop-product__link"}) link = a.get('href') href.append(link) print(f"Links: {href}") return href