khangtran
/
dark_web_forums

__author__ = 'DarkWeb'
# Here, we are importing the auxiliary functions to clean or convert datafrom MarketPlaces.Utilities.utilities import *
# Here, we are importing BeautifulSoup to search through the HTML treefrom bs4 import BeautifulSoup, ResultSet, Tag

# This is the method to parse the Description Pages (one page to each Product in the Listing Pages)def darkmarket_description_parser(soup: BeautifulSoup):
    # Fields to be parsed
    vendor = "-1"                       # 0 *Vendor_Name    success = "-1"                      # 1 Vendor_Successful_Transactions    rating_vendor = "-1"                # 2 Vendor_Rating    name = "-1"                         # 3 *Product_Name    describe = "-1"                     # 4 Product_Description    CVE = "-1"                          # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)    MS = "-1"                           # 6 Product_MS_Classification (Microsoft Security)    category = "-1"                     # 7 Product_Category    views = "-1"                        # 8 Product_Number_Of_Views    reviews = "-1"                      # 9 Product_Number_Of_Reviews    rating_item = "-1"                  # 10 Product_Rating    addDate = "-1"                      # 11 Product_AddedDate    BTC = "-1"                          # 12 Product_BTC_SellingPrice    USD = "-1"                          # 13 Product_USD_SellingPrice    EURO = "-1"                         # 14 Product_EURO_SellingPrice    sold = "-1"                         # 15 Product_QuantitySold    left = "-1"                         # 16 Product_QuantityLeft    shipFrom = "-1"                     # 17 Product_ShippedFrom    shipTo = "-1"                       # 18 Product_ShippedTo    image = "-1"    image_vendor = "-1"
    details: Tag = soup.find("div", {"class": "wc-content"})
    vendor = details.find("div", {"class": "product_meta"}).find("a", {"class": "wcvendors_cart_sold_by_meta"}).text        name = details.find("h1", {"class": "product_title entry-title"}).text        describe_list = [        elem.text for elem in         details.find("div", {"id": "tab-description"}).find_all()        if elem.name != "h2"    ]        describe = " ".join(describe_list)        categories_list: ResultSet[Tag] = details.find("span", {"class": "posted_in"}).find_all("a")        category = "Hacking"        reviews = details.find("div", {"class": "review-link"}).get("title")        rating_item = details.find("div", {"class": "star-rating"}).get('title')        price_container = details.find("p", {"class": "price"})        if not price_container.find("ins"):        USD = price_container.find("span", {"class": "woocommerce-Price-amount amount"}).text.replace("$", "")    else:        USD = price_container.find("ins").find("span", {"class": "woocommerce-Price-amount amount"}).text.replace("$", "")
    # print(f"\n[desc] Product: {name}")    # print(f"[desc] Price: ${USD}\n")
    # Populating the final variable (this should be a list with all fields scraped)    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, image_vendor)
    # Sending the results    return row

# This is the method to parse the Listing Pagesdef darkmarket_listing_parser(soup: BeautifulSoup):
    # Fields to be parsed    nm = 0                                    # *Total_Products (Should be Integer)    mktName = "TheDarkMarket"                      # 0 *Marketplace_Name    vendor = []                               # 1 *Vendor y    rating_vendor = []                        # 2 Vendor_Rating    success = []                              # 3 Vendor_Successful_Transactions    name = []                                 # 4 *Product_Name y    CVE = []                                  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)    MS = []                                   # 6 Product_MS_Classification (Microsoft Security)    category = []                             # 7 Product_Category y    describe = []                             # 8 Product_Description    views = []                                # 9 Product_Number_Of_Views    reviews = []                              # 10 Product_Number_Of_Reviews    rating_item = []                          # 11 Product_Rating    addDate = []                              # 12 Product_AddDate    BTC = []                                  # 13 Product_BTC_SellingPrice    USD = []                                  # 14 Product_USD_SellingPrice y    EURO = []                                 # 15 Product_EURO_SellingPrice    sold = []                                 # 16 Product_QuantitySold    qLeft =[]                                 # 17 Product_QuantityLeft    shipFrom = []                             # 18 Product_ShippedFrom    shipTo = []                               # 19 Product_ShippedTo    image = []    image_vendor = []    href = []                                 # 20 Product_Links
    products_list: ResultSet[Tag] = soup.find("ul", {"class": "products columns-3"}).find_all("li")        for product in products_list:        nm += 1                product_vendor = product.find("small", {"class": "wcvendors_sold_by_in_loop"}).find("a").text        vendor.append(cleanString(product_vendor))                # rating_vendor.append("-1")        # success.append("-1")                product_name = product.find("h2", {"class": "woocommerce-loop-product__title"}).text        name.append(cleanString(product_name))                # CVE.append("-1")        # MS.append("-1")                product_category = product.find("div", {"class": 'product-categories'}).text        category.append(cleanString(product_category))                # describe.append("-1")        # views.append("-1")        # reviews.append("-1")                product_rating = product.find("div", {"class": "star-rating"}).get("title")        rating_item.append(cleanString(product_rating))                # addDate.append(datetime.now().strftime("%m/%d/%Y "))        # BTC.append("-1")                price_container = product.find("span", {"class": "price"})                        if not price_container.find("ins"):            product_price = price_container.find("span", {"class": "woocommerce-Price-amount amount"}).text.replace("$", "")        else:            product_price = price_container.find("ins").find("span", {"class": "woocommerce-Price-amount amount"}).text.replace("$", "")                USD.append(cleanNumbers(product_price))                # EURO.append("-1")        # sold.append("-1")        # qLeft.append("-1")        # shipTo.append("-1")        # shipFrom.append("-1")                product_href = product.find("a", {"class": "woocommerce-LoopProduct-link woocommerce-loop-product__link"}).get("href")        href.append(product_href)                # print(f"\n[list] Product: {product_name}")        # print(f"[list] Links: ${product_href}\n")                product_images_list = product.find("a", {"class": "tf-loop-product-thumbs-link"}).find("img").get("data-srcset").split(" ")        product_image = product_images_list[0]        image.append(product_image)        # Populate the final variable (this should be a list with all fields scraped)    return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image,                            image_vendor)

def darkmarket_links_parser(soup: BeautifulSoup):
    # Returning all links that should be visited by the Crawler    href = []
    listing: ResultSet[Tag] = soup.find("ul", {"class": "products columns-3"}).find_all("li")
    for li in listing:
        a = li.find('a', {"class": "woocommerce-LoopProduct-link woocommerce-loop-product__link"})        link = a.get('href')        href.append(link)    print(f"Links: {href}")
    return href