From d90bf734d0ad334a87ea8530ba8b3530e6c259c2 Mon Sep 17 00:00:00 2001
From: dabadcuber5 <dabadcuber5@gmail.com>
Date: Tue, 25 Jul 2023 10:37:45 -0700
Subject: [PATCH] finished TorMarket

---
 MarketPlaces/Initialization/marketsList.txt   |   2 +-
 MarketPlaces/Initialization/prepare_parser.py |  15 +-
 MarketPlaces/TorBay/parser.py                 |   1 -
 MarketPlaces/TorMarket/crawler_selenium.py    |  32 +-
 MarketPlaces/TorMarket/parser.py              | 345 +++++++-----------
 setup.ini                                     |  14 +-
 6 files changed, 165 insertions(+), 244 deletions(-)

diff --git a/MarketPlaces/Initialization/marketsList.txt b/MarketPlaces/Initialization/marketsList.txt
index 356649b..0393154 100644
--- a/MarketPlaces/Initialization/marketsList.txt
+++ b/MarketPlaces/Initialization/marketsList.txt
@@ -1 +1 @@
-LionMarketplace
\ No newline at end of file
+TorMarket
\ No newline at end of file
diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py
index 21663fb..75090b1 100644
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@@ -16,7 +16,7 @@ from MarketPlaces.M00nkeyMarket.parser import *
 from MarketPlaces.DarkMatter.parser import *
 from MarketPlaces.DigitalThriftShop.parser import *
 from MarketPlaces.LionMarketplace.parser import *
-
+from MarketPlaces.TorMarket.parser import *
 from MarketPlaces.Classifier.classify_product import predict
 
 
@@ -166,7 +166,8 @@ def new_parse(marketPlace, url, createLog):
                 rmm = digitalThriftShop_description_parser(soup)
             elif marketPlace == "LionMarketplace":
                 rmm = lionmarketplace_description_parser(soup)
-
+            elif marketPlace == "TorMarket":
+                rmm = tormarket_description_parser(soup)
             # key = u"Pr:" + rmm[0].upper()[:desc_lim1] + u" Vendor:" + rmm[13].upper()[:desc_lim2]
             key = u"Url:" + os.path.basename(line2).replace(".html", "")
 
@@ -197,8 +198,8 @@ def new_parse(marketPlace, url, createLog):
                 html = open(line1.strip('\n'))
                 soup = BeautifulSoup(html, "html.parser")
                 html.close()
-            except:
-
+            except Exception as e:
+                raise e
                 nError += 1
                 print("There was a problem to read the file " + line1 + " in the Listing section!")
                 if createLog:
@@ -232,13 +233,17 @@ def new_parse(marketPlace, url, createLog):
                     rw = digitalThriftShop_listing_parser(soup)
                 elif marketPlace == "LionMarketplace":
                     rw = lionmarketplace_listing_parser(soup)
+                elif marketPlace == "TorMarket":
+                    rw = tormarket_listing_parser(soup)
                 else:
                     parseError = True
 
-            except:
+            except Exception as e:
 
                 nError += 1
                 print("There was a problem to parse the file " + line1 + " in the listing section!")
+                print("I am here for some reason!")
+                raise e
                 if createLog:
                     logFile.write(
                         str(nError) + ". There was a problem to parse the file " + line1 + " in the Listing section.\n")
diff --git a/MarketPlaces/TorBay/parser.py b/MarketPlaces/TorBay/parser.py
index 25134fa..5c4f164 100644
--- a/MarketPlaces/TorBay/parser.py
+++ b/MarketPlaces/TorBay/parser.py
@@ -2,7 +2,6 @@ __author__ = 'DarkWeb'
 
 # Here, we are importing the auxiliary functions to clean or convert data
 from MarketPlaces.Utilities.utilities import *
-
 # Here, we are importing BeautifulSoup to search through the HTML tree
 from bs4 import BeautifulSoup
 
diff --git a/MarketPlaces/TorMarket/crawler_selenium.py b/MarketPlaces/TorMarket/crawler_selenium.py
index 0528a05..ed94a8b 100644
--- a/MarketPlaces/TorMarket/crawler_selenium.py
+++ b/MarketPlaces/TorMarket/crawler_selenium.py
@@ -31,19 +31,19 @@ baseURL = 'http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
-    opentor()
-    # mktName = getMKTName()
-    driver = getAccess()
+    # opentor()
+    mktName = getMKTName()
+    # driver = getAccess()
+    #
+    # if driver != 'down':
+    #     try:
+    #         login(driver)
+    #         crawlForum(driver)
+    #     except Exception as e:
+    #         print(driver.current_url, e)
+    #     closetor(driver)
 
-    if driver != 'down':
-        try:
-            login(driver)
-            crawlForum(driver)
-        except Exception as e:
-            print(driver.current_url, e)
-        closetor(driver)
-
-    # new_parse(forumName, baseURL, False)
+    new_parse(mktName, baseURL, False)
 
 
 # Opens Tor Browser
@@ -187,9 +187,9 @@ def getInterestedLinks():
     # Hacking Tutorials
     links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/guides-tutorials/hacking/')
     # # Malware
-    # links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/malware/')
+    links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/malware/')
     # # Hacking Services
-    # links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/services/hacking-services/')
+    links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/services/hacking-services/')
 
     return links
 
@@ -232,8 +232,8 @@ def crawlForum(driver):
                     break
 
                 # comment out
-                if count == 1:
-                    break
+                # if count == 1:
+                #     break
 
                 try:
                     link = driver.find_element(by=By.XPATH, value=
diff --git a/MarketPlaces/TorMarket/parser.py b/MarketPlaces/TorMarket/parser.py
index 847ca50..69d680c 100644
--- a/MarketPlaces/TorMarket/parser.py
+++ b/MarketPlaces/TorMarket/parser.py
@@ -6,12 +6,13 @@ from MarketPlaces.Utilities.utilities import *
 # Here, we are importing BeautifulSoup to search through the HTML tree
 from bs4 import BeautifulSoup
 
+import re
 
 #parses description pages, so takes html pages of description pages using soup object, and parses it for info it needs
 #stores info it needs in different lists, these lists are returned after being organized
 #@param: soup object looking at html page of description page
 #return: 'row' that contains a variety of lists that each hold info on the description page
-def darkfox_description_parser(soup):
+def tormarket_description_parser(soup):
 
     # Fields to be parsed
 
@@ -39,101 +40,32 @@ def darkfox_description_parser(soup):
     success = "-1"                      # 21 Vendor_Successful_Transactions
     EURO = "-1"                         # 22 Product_EURO_SellingPrice
 
-    # Finding Product Name
-    name = soup.find('h1').text
-    name = name.replace('\n', ' ')
-    name = name.replace(",", "")
-    name = name.strip()
-
-    # Finding Vendor
-    vendor = soup.find('h3').find('a').text.strip()
-
-    # Finding Vendor Rating
-    rating = soup.find('span', {'class': "tag is-dark"}).text.strip()
-
-    # Finding Successful Transactions
-    success = soup.find('h3').text
-    success = success.replace("Vendor: ", "")
-    success = success.replace(vendor, "")
-    success = success.replace("(", "")
-    success = success.replace(")", "")
-    success = success.strip()
-
-    bae = soup.find('div', {'class': "box"}).find_all('ul')
-
-    # Finding Prices
-    USD = bae[1].find('strong').text.strip()
-
-    li = bae[2].find_all('li')
-
-    # Finding Escrow
-    escrow = li[0].find('span', {'class': "tag is-dark"}).text.strip()
-
-    # Finding the Product Category
-    category = li[1].find('span', {'class': "tag is-dark"}).text.strip()
-
-    # Finding the Product Quantity Available
-    left = li[3].find('span', {'class': "tag is-dark"}).text.strip()
-
-    # Finding Number Sold
-    sold = li[4].find('span', {'class': "tag is-dark"}).text.strip()
-
-    li = bae[3].find_all('li')
-
-    # Finding Shipment Information (Origin)
-    if "Ships from:" in li[-2].text:
-        shipFrom = li[-2].text
-        shipFrom = shipFrom.replace("Ships from: ", "")
-        # shipFrom = shipFrom.replace(",", "")
-        shipFrom = shipFrom.strip()
-
-    # Finding Shipment Information (Destination)
-    shipTo = li[-1].find('div', {'title': "List of countries is scrollable"}).text
-    shipTo = shipTo.replace("Ships to: ", "")
-    shipTo = shipTo.strip()
-    if "certain countries" in shipTo:
-        countries = ""
-        tags = li[-1].find_all('span', {'class': "tag"})
-        for tag in tags:
-            country = tag.text.strip()
-            countries += country + ", "
-        shipTo = countries.strip(", ")
-
-    # Finding the Product description
-    describe = soup.find('div', {'class': "pre-line"}).text
-    describe = describe.replace("\n", " ")
-    describe = describe.strip()
-
-    '''# Finding the Number of Product Reviews
-    tag = soup.findAll(text=re.compile('Reviews'))
-    for index in tag:
-        reviews = index
-        par = reviews.find('(')
-        if par >=0:
-            reviews = reviews.replace("Reviews (","")
-            reviews = reviews.replace(")","")
-            reviews = reviews.split(",")
-            review = str(abs(int(reviews[0])) + abs(int(reviews[1])))
-        else  :
-            review = "-1"'''
-
-    # Searching for CVE and MS categories
-    cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
-    if cve:
-        CVE = " "
-        for idx in cve:
-            CVE += (idx)
-            CVE += "  "
-            CVE = CVE.replace(',', ' ')
-            CVE = CVE.replace('\n', '')
-    ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}'))
-    if ms:
-        MS = " "
-        for im in ms:
-            MS += (im)
-            MS += " "
-            MS = MS.replace(',', ' ')
-            MS = MS.replace('\n', '')
+    #finding the name of the product
+    name_of_product = soup.find("h1", {"class": "product_title entry-title"}).find("a").text
+    name = cleanString(name_of_product.strip())
+    #finding the description of the product
+    description_of_product = soup.find("div", {"class": "woocommerce-product-details__short-description"}).text
+    describe = cleanString(description_of_product.strip())
+    #finding the replies
+    inquires_about_product = soup.find("div", {"class": "woocommerce-Tabs-panel woocommerce-Tabs-panel--wcfm_enquiry_tab panel entry-content wc-tab"}).find("p").text
+    if inquires_about_product == "There are no inquiries yet.":
+        review = 0
+    else:
+        review = -1 #fix later pls
+
+    #finding the terms and conditions
+    terms_and_conditions = soup.find("div", {"class": "woocommerce-Tabs-panel woocommerce-Tabs-panel--wcfm_enquiry_tab panel entry-content wc-tab"}).find("p").text
+    term = cleanString(terms_and_conditions)
+
+    #finding the name of the vendor
+    name_of_vendor = soup.find("div", {"class": "wcfmmp_sold_by_store"}).find("a").text
+    vendor = cleanString(name_of_vendor)
+
+    #finding the price of the item
+    price = soup.find("p", {"class": "price"}).find("bdi").text
+    price_cleaned = price[1:]
+    USD = price_cleaned.strip()
+    #everything else gets a -1 because they are not found
 
     # Populating the final variable (this should be a list with all fields scraped)
     row = (name, describe, lastSeen, rules, CVE, MS, review, category, shipFrom, shipTo, left, escrow, terms, vendor,
@@ -147,130 +79,113 @@ def darkfox_description_parser(soup):
 #stores info it needs in different lists, these lists are returned after being organized
 #@param: soup object looking at html page of listing page
 #return: 'row' that contains a variety of lists that each hold info on the listing page
-def darkfox_listing_parser(soup):
+def tormarket_listing_parser(soup):
 
     # Fields to be parsed
-    nm = 0                                    # Total_Products (Should be Integer)
-    mktName = "DarkFox"                       # 0 Marketplace_Name
-    name = []                                 # 1 Product_Name
-    CVE = []                                  # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures)
-    MS = []                                   # 3 Product_MS_Classification (Microsoft Security)
-    category = []                             # 4 Product_Category
-    describe = []                             # 5 Product_Description
-    escrow = []                               # 6 Vendor_Warranty
-    views = []                                # 7 Product_Number_Of_Views
-    reviews = []                              # 8 Product_Number_Of_Reviews
-    addDate = []                              # 9 Product_AddDate
-    lastSeen = []                             # 10 Product_LastViewDate
-    BTC = []                                  # 11 Product_BTC_SellingPrice
-    USD = []                                  # 12 Product_USD_SellingPrice
-    EURO = []                                 # 13 Product_EURO_SellingPrice
-    sold = []                                 # 14 Product_QuantitySold
-    qLeft =[]                                 # 15 Product_QuantityLeft
-    shipFrom = []                             # 16 Product_ShippedFrom
-    shipTo = []                               # 17 Product_ShippedTo
-    vendor = []                               # 18 Vendor
-    rating = []                               # 19 Vendor_Rating
-    success = []                              # 20 Vendor_Successful_Transactions
-    href = []                                 # 23 Product_Links (Urls)
-
-    listing = soup.findAll('div', {"class": "card"})
-
-    # Populating the Number of Products
-    nm = len(listing)
-
-    for a in listing:
-        bae = a.findAll('a', href=True)
-
-        # Adding the url to the list of urls
-        link = bae[0].get('href')
-        link = cleanLink(link)
-        href.append(link)
-
-        # Finding the Product
-        product = bae[1].find('p').text
-        product = product.replace('\n', ' ')
-        product = product.replace(",", "")
-        product = product.replace("...", "")
-        product = product.strip()
-        name.append(product)
-
-        bae = a.find('div', {'class': "media-content"}).find('div').find_all('div')
-
-        if len(bae) >= 5:
-            # Finding Prices
-            price = bae[0].text
-            ud = price.replace(" USD", " ")
-            # u = ud.replace("$","")
-            u = ud.replace(",", "")
-            u = u.strip()
-            USD.append(u)
-            # bc = (prc[1]).strip(' BTC')
-            # BTC.append(bc)
-
-            # Finding the Vendor
-            vendor_name = bae[1].find('a').text
-            vendor_name = vendor_name.replace(",", "")
-            vendor_name = vendor_name.strip()
-            vendor.append(vendor_name)
-
-            # Finding the Category
-            cat = bae[2].find('small').text
-            cat = cat.replace("Category: ", "")
-            cat = cat.replace(",", "")
-            cat = cat.strip()
-            category.append(cat)
-
-            # Finding Number Sold and Quantity Left
-            num = bae[3].text
-            num = num.replace("Sold: ", "")
-            num = num.strip()
-            sold.append(num)
-
-            quant = bae[4].find('small').text
-            quant = quant.replace("In stock: ", "")
-            quant = quant.strip()
-            qLeft.append(quant)
-
-            # Finding Successful Transactions
-            freq = bae[1].text
-            freq = freq.replace(vendor_name, "")
-            freq = re.sub(r'Vendor Level \d+', "", freq)
-            freq = freq.replace("(", "")
-            freq = freq.replace(")", "")
-            freq = freq.strip()
-            success.append(freq)
+    nm = 0  # *Total_Products (Should be Integer)
+    mktName = "TorMarket"  # 0 *Marketplace_Name
+    vendor = []  # 1 *Vendor y
+    rating_vendor = []  # 2 Vendor_Rating
+    success = []  # 3 Vendor_Successful_Transactions
+    name = []  # 4 *Product_Name y
+    CVE = []  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
+    MS = []  # 6 Product_MS_Classification (Microsoft Security)
+    category = []  # 7 Product_Category y
+    describe = []  # 8 Product_Description
+    views = []  # 9 Product_Number_Of_Views
+    reviews = []  # 10 Product_Number_Of_Reviews
+    rating_item = []  # 11 Product_Rating
+    addDate = []  # 12 Product_AddDate
+    BTC = []  # 13 Product_BTC_SellingPrice
+    USD = []  # 14 Product_USD_SellingPrice y
+    EURO = []  # 15 Product_EURO_SellingPrice
+    sold = []  # 16 Product_QuantitySold
+    qLeft = []  # 17 Product_QuantityLeft
+    shipFrom = []  # 18 Product_ShippedFrom
+    shipTo = []  # 19 Product_ShippedTo
+    href = []  # 20 Product_Links
+    products_list = soup.find_all('li')
+    nm = 0
+    for product in products_list:
+        try:
+            # Finding the name of the product
+            name_of_product = product.find("h2", {"class": "woocommerce-loop-product__title"}).find("a").text
+            name_of_product_cleaned = cleanString(name_of_product.strip())
+            print(name_of_product_cleaned)
+            name.append(name_of_product_cleaned)
+            #finding the URL
+            try:
+                url = product.find("div", {"class": "product-loop-content text-center"}).find("a").get("href")
+                print(url)
+                href.append(url)
+            except AttributeError as e:
+                print("I can't find the link")
+                raise e
+
+            #finding the rating of the product
+            rating_score_of_product = product.find("div", {"class": "product-loop-content text-center"}).find("div").find("span").text
+            rating_item.append(cleanString(rating_score_of_product.strip()))
+            print("done")
+            #finding the rating of the vendors
+            rating_score_of_vendor = product.find("div", {"class": "wcfmmp-store-rating"}).find("strong").text
+            rating_vendor.append(cleanString(rating_score_of_vendor.strip()))
+            print("done")
+            #finding the cost in USD
+            cost = product.find("span", {"class": "woocommerce-Price-amount amount"}).text
+            USD.append(cost)
+            print("done")
+            #finding the name of the vendor
+            vendor_name = product.find("div", {"class": "wcfmmp_sold_by_wrapper"}).find("a").text
+            vendor.append(cleanString(vendor_name.strip()))
+            print("done")
+            #everything else appends a -1
+            success.append("-1")
+            CVE.append("-1")
+            MS.append("-1")
+            category.append("-1")
+            describe.append("-1")
+            views.append("-1")
+            reviews.append("-1")
+            addDate.append("-1")
+            BTC.append("-1")
+            EURO.append("-1")
+            sold.append("-1")
+            qLeft.append("-1")
+            shipFrom.append("-1")
+            shipTo.append("-1")
+            print("Done! moving onto the next product!")
+            print(len(shipTo))
+            nm += 1
+        except AttributeError as e:
+            print("I'm somewhere I don't belong. I'm going to leave")
+            continue
 
-        # Searching for CVE and MS categories
-        cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
-        if not cve:
-            cveValue="-1"
-        else:
-            cee = " "
-            for idx in cve:
-                cee += (idx)
-                cee += "  "
-                cee = cee.replace(',', ' ')
-                cee = cee.replace('\n', '')
-            cveValue=cee
-        CVE.append(cveValue)
-        
-        ms = a.findAll(text=re.compile('MS\d{2}-\d{3}'))
-        if not ms:
-            MSValue="-1"
-        else:
-            me = " "
-            for im in ms:
-                me += (im)
-                me += " "
-                me = me.replace(',', ' ')
-                me = me.replace('\n', '')
-            MSValue=me
-        MS.append(MSValue)
 
     # Populate the final variable (this should be a list with all fields scraped)
-    return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen,
-                     BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href)
+    return organizeProducts(
+        marketplace = "TorMarket",
+        nm = nm,
+        vendor = vendor,
+        rating_vendor = rating_vendor,
+        success_vendor = success,
+        nombre = name,
+        CVE = CVE,
+        MS = MS,
+        category = category,
+        describe = describe,
+        views = views,
+        reviews = reviews,
+        rating_item = rating_item,
+        addDate = addDate,
+        BTC = BTC,
+        USD = USD,
+        EURO = EURO,
+        sold = sold,
+        qLeft = qLeft,
+        shipFrom = shipFrom,
+        shipTo = shipTo,
+        href = href
+    )
 
 
 #called by the crawler to get description links on a listing page
diff --git a/setup.ini b/setup.ini
index 641d3f1..3ff6f3e 100644
--- a/setup.ini
+++ b/setup.ini
@@ -1,15 +1,17 @@
 
 [TOR]
-firefox_binary_path = C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe
-firefox_profile_path = C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\TorBrowser\\Data\\Browser\\profile.default
-geckodriver_path = C:\\Users\\Helium\\PycharmProjects\\dw_pipeline_test\\selenium\\geckodriver.exe
+firefox_binary_path = C:\Users\dabadcuber5\Desktop\Tor Browser\Browser\firefox.exe
+firefox_profile_path = C:\Users\dabadcuber5\Desktop\Tor Browser\Browser\TorBrowser\Data\Browser\profile.default
+geckodriver_path = C:\Users\dabadcuber5\dw_pipeline_test\selenium\geckodriver.exe
 
 [Project]
-project_directory = C:\\Users\\Helium\\PycharmProjects\\dw_pipeline_test
-shared_folder = \\VBoxSvr\\Shared
+project_directory = C:\Users\dabadcuber5\dw_pipeline_test
+shared_folder = \\Mac\\Shared
+
+
 
 [PostgreSQL]
 ip = localhost
 username = postgres
-password = password
+password = Ilovelucky1!
 database = darkweb_markets_forums
\ No newline at end of file