__author__ = 'DarkWeb' # Here, we are importing the auxiliary functions to clean or convert data from MarketPlaces.Utilities.utilities import * # Here, we are importing BeautifulSoup to search through the HTML tree from bs4 import BeautifulSoup, ResultSet, Tag def apocalypse_description_parser(soup: Tag): # Fields to be parsed vendor = "-1" # 0 *Vendor_Name success = "-1" # 1 Vendor_Successful_Transactions rating_vendor = "-1" # 2 Vendor_Rating name = "-1" # 3 *Product_Name describe = "-1" # 4 Product_Description CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) MS = "-1" # 6 Product_MS_Classification (Microsoft Security) category = "-1" # 7 Product_Category views = "-1" # 8 Product_Number_Of_Views reviews = "-1" # 9 Product_Number_Of_Reviews rating_item = "-1" # 10 Product_Rating addDate = "-1" # 11 Product_AddedDate BTC = "-1" # 12 Product_BTC_SellingPrice USD = "-1" # 13 Product_USD_SellingPrice EURO = "-1" # 14 Product_EURO_SellingPrice sold = "-1" # 15 Product_QuantitySold left = "-1" # 16 Product_QuantityLeft shipFrom = "-1" # 17 Product_ShippedFrom shipTo = "-1" # 18 Product_ShippedTo content: Tag = soup.find("div", {'id': "article_page"}) product_name = content.find("p", {"class": "list-group-item text-center mb-0 box"}).text name = cleanString(product_name.strip()) product_description = content.find("pre").text describe = cleanString(product_description.strip()) product_reviews_list: Tag = content.find("table", {"class": "table product_reviews"}) \ .find_all("li") review = str(len(product_reviews_list)) product_category = content.find("a", {"class": "badge badge-danger"}).text category = cleanString(product_category.strip()) product_ships_from = content.find("span", {"class": "badge badge-info"}).text shipFrom = cleanString(product_ships_from.strip()) product_success_badge: ResultSet[Tag] = content.find_all("span", {"class": "badge badge-success"}) product_ships_to = product_success_badge[1].text shipTo = cleanString(product_ships_to.strip()) product_supply = content.find("span", {"class": "badge badge-warning"}).text left = cleanString(product_supply.strip()) product_primary_badge: ResultSet[Tag] = content.find_all("span", {"class": "badge badge-primary"}) # Product vendor comes in the form of "@ vendor_name" product_vendor = product_primary_badge[0].text.replace("@", "") vendor = cleanString(product_vendor.strip()) sold = cleanString(product_primary_badge[1].text.strip()) product_prices: Tag = content.find("p", {"style": "border-bottom:1px solid GREY;"}) USD = product_prices.find("span", {"class": "pr"}).text prices_array: ResultSet[Tag] = product_prices.find_all("span", {"class": "pr1"}) BTC = prices_array[1].text # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, BTC, USD, EURO, sold, left, shipFrom, shipTo) # Sending the results return row def apocalypse_listing_parser(soup: Tag): # Fields to be parsed nm = 0 # Total_Products (Should be Integer) mktName = "Apocalypse" # 0 Marketplace_Name name = [] # 1 Product_Name CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures) MS = [] # 3 Product_MS_Classification (Microsoft Security) category = [] # 4 Product_Category describe = [] # 5 Product_Description escrow = [] # 6 Vendor_Warranty views = [] # 7 Product_Number_Of_Views reviews = [] # 8 Product_Number_Of_Reviews addDate = [] # 9 Product_AddDate lastSeen = [] # 10 Product_LastViewDate BTC = [] # 11 Product_BTC_SellingPrice USD = [] # 12 Product_USD_SellingPrice EURO = [] # 13 Product_EURO_SellingPrice sold = [] # 14 Product_QuantitySold qLeft =[] # 15 Product_QuantityLeft shipFrom = [] # 16 Product_ShippedFrom shipTo = [] # 17 Product_ShippedTo vendor = [] # 18 Vendor rating = [] # 19 Vendor_Rating success = [] # 20 Vendor_Successful_Transactions href = [] # 23 Product_Links (Urls) listings: ResultSet[Tag] = soup.find("div", {"class": "col-lg-9 my-4"}).find_all("div", {"class": "col-lg-4 col-md-6 mb-1"}) for prod in listings: product_name = prod.find('h5', {"class": "art_title"}).text name.append(cleanString(product_name.strip())) CVE.append("-1") MS.append("-1") describe.append("-1") escrow.append("-1") reviews.append("-1") addDate.append("-1") lastSeen.append("-1") BTC.append("-1") EURO.append("-1") shipTo.append("-1") success.append("-1") product_price = prod.find("span", {"class": "priceP"}).text USD.append(cleanString(product_price.strip())) product_sold = prod.find("span", {"class": "badge badge-success"}).text sold.append(cleanString(product_sold.strip())) product_statistics: ResultSet[Tag] = prod.find_all("p", {"class": "mb-0 card-text"}) product_category = product_statistics[0].find("a").text category.append(cleanString(product_category.strip())) product_sold = product_statistics[1].find("span").text sold.append(cleanString(product_sold.strip())) product_quantity_left = product_statistics[2].find("span", {"class": "badge bluebadge"}).text qLeft.append(cleanString(product_quantity_left.strip())) product_views = product_statistics[3].find("span").text views.append(cleanString(product_views.strip())) product_ships_from = product_statistics[4].find("span").text shipFrom.append(cleanString(product_ships_from.strip())) product_vendor_tag: Tag = product_statistics[5].find("a").find("span", {"class": "badge badge-primary"}) # Product vendors & ratings are displayed as "vender_name ★ 5.0" # When split by the star (★), it should return a 2-value array product_vendor, product_vendor_rating = product_vendor_tag.text.split("★") try: vendor.append(cleanString(product_vendor.strip())) rating.append(cleanString(product_vendor_rating.strip())) except Exception as e: raise e product_href = prod.find('a').get('href') href.append(product_href) nm += 1 return organizeProducts( marketplace=mktName, nm=nm, vendor=vendor, rating_vendor=rating, success_vendor=success, nombre=name, CVE=CVE, MS=MS, category=category, describe=describe, views=views, reviews=reviews, rating_item=["-1" for _ in range(nm)], addDate=addDate, BTC=BTC, USD=USD, EURO=EURO, sold=sold, qLeft=qLeft, shipFrom=shipFrom, shipTo=shipTo, href=href ) #called by the crawler to get description links on a listing page #@param: beautifulsoup object that is using the correct html page (listing page) #return: list of description links from a listing page def apocalypse_links_parser(soup): # Returning all links that should be visited by the Crawler href = [] listing = soup.findAll('div', {"class": "col-lg-4 col-md-6 mb-1"}) for a in listing: bae = a.find('a', href=True) link = bae['href'] href.append(link) return href