From 115449492e6a8df62cc0539906fffa78f548526e Mon Sep 17 00:00:00 2001
From: Khoi <minhkhoitran2k3@gmail.com>
Date: Thu, 20 Jul 2023 13:50:29 -0700
Subject: [PATCH] Completed and tested the parsers for DigitalThriftShop

---
 .../DigitalThriftShop/crawler_selenium.py     | 26 +++++-----
 MarketPlaces/DigitalThriftShop/parser.py      | 52 ++++++++++++++++---
 MarketPlaces/Initialization/marketsList.txt   |  2 +-
 MarketPlaces/Initialization/prepare_parser.py |  8 ++-
 4 files changed, 67 insertions(+), 21 deletions(-)

diff --git a/MarketPlaces/DigitalThriftShop/crawler_selenium.py b/MarketPlaces/DigitalThriftShop/crawler_selenium.py
index 984e0f5..58c833a 100644
--- a/MarketPlaces/DigitalThriftShop/crawler_selenium.py
+++ b/MarketPlaces/DigitalThriftShop/crawler_selenium.py
@@ -32,19 +32,19 @@ baseURL = 'http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
-    opentor()
-    # mktName = getMKTName()
-    driver = getAccess()
-
-    if driver != 'down':
-        try:
-            login(driver)
-            crawlForum(driver)
-        except Exception as e:
-            print(driver.current_url, e)
-        closetor(driver)
-
-    # new_parse(forumName, baseURL, False)
+    # opentor()
+    mktName = getMKTName()
+    # driver = getAccess()
+
+    # if driver != 'down':
+    #     try:
+    #         login(driver)
+    #         crawlForum(driver)
+    #     except Exception as e:
+    #         print(driver.current_url, e)
+    #     closetor(driver)
+
+    new_parse(mktName, baseURL, False)
 
 
 # Opens Tor Browser
diff --git a/MarketPlaces/DigitalThriftShop/parser.py b/MarketPlaces/DigitalThriftShop/parser.py
index 8706076..b45c3b9 100644
--- a/MarketPlaces/DigitalThriftShop/parser.py
+++ b/MarketPlaces/DigitalThriftShop/parser.py
@@ -4,7 +4,7 @@ __author__ = 'DarkWeb'
 from MarketPlaces.Utilities.utilities import *
 
 # Here, we are importing BeautifulSoup to search through the HTML tree
-from bs4 import BeautifulSoup, Tag
+from bs4 import BeautifulSoup, ResultSet, Tag
 
 
 #parses description pages, so takes html pages of description pages using soup object, and parses it for info it needs
@@ -40,7 +40,7 @@ def digitalThriftShop_description_parser(soup: Tag):
     product_name = soup.find("h1", {"class": "product_title entry-title"}).text
     name = cleanString(product_name.strip())
 
-    product_description = soup.find("id", {"tab-description"}).find("p").text
+    product_description = soup.find("div", {"id": "tab-description"}).find("p").text
     describe = cleanString(product_description.strip())
     
     product_category = soup.find("span", {"class": "posted_in"}).find("a").text
@@ -52,7 +52,7 @@ def digitalThriftShop_description_parser(soup: Tag):
         reviews = product_rating.find("span", {"Class": "rating"}).text
     
     except Exception as e:
-        raise e
+        pass
 
     product_BTC = soup.find("div", {"id": "price-BTC"}).find("span", {"class": "priceinfo cw-noselect"}).text
     BTC = cleanString(product_BTC.strip())
@@ -74,7 +74,7 @@ def digitalThriftShop_description_parser(soup: Tag):
 #stores info it needs in different lists, these lists are returned after being organized
 #@param: soup object looking at html page of listing page
 #return: 'row' that contains a variety of lists that each hold info on the listing page
-def digitalThriftShop_listing_parser(soup):
+def digitalThriftShop_listing_parser(soup: Tag):
 
    # Fields to be parsed
     nm = 0                                    # *Total_Products (Should be Integer)
@@ -100,13 +100,53 @@ def digitalThriftShop_listing_parser(soup):
     shipTo = []                               # 19 Product_ShippedTo
     href = []                                 # 20 Product_Links
 
+    product_category = soup.find("h1", {"class": "woocommerce-products-header__title page-title"}).text
+
+    products_list: ResultSet[Tag] = soup.find("ul", {"class": "products columns-5"}).find_all("li")
+    
     
     
+    for product in products_list:
+        nm += 1
+        vendor.append("-1")
+        rating_vendor.append("-1")
+        success.append("-1")
+        
+        product_name = product.find("h2", {"class": "woocommerce-loop-product__title"}).text
+        name.append(cleanString(product_name.strip()))
+        
+        CVE.append("-1")
+        MS.append("-1")
+        category.append(cleanString(product_category.strip()))
+        describe.append("-1")
+        views.append("-1")
+        reviews.append("-1")
+        
+        try:
+            product_rating = product.find("div", {"class": "star-rating"}).find("strong", {"class": "rating"}).text
+            rating_item.append(cleanString(product_rating.strip()))
+        except:
+            rating_item.append("-1")
+        
+        addDate.append("-1")
+        BTC.append("-1")
+        
+        product_USD = product.find("span", {"class": "price"}).text
+        USD.append(product_USD.replace("$", "").strip())
+        
+        EURO.append("-1")
+        sold.append("-1")
+        qLeft.append("-1")
+        shipFrom.append("-1")
+        shipTo.append("-1")
+
+        product_href = product.find("a", {"class": "woocommerce-LoopProduct-link woocommerce-loop-product__link"}).get("href")
+        href.append(cleanString(product_href.strip()))
     
 
     # Populate the final variable (this should be a list with all fields scraped)
-    # return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
-    #                         reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
+    return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
+                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
 
 
 #called by the crawler to get description links on a listing page
diff --git a/MarketPlaces/Initialization/marketsList.txt b/MarketPlaces/Initialization/marketsList.txt
index 032ecf3..e7488f3 100644
--- a/MarketPlaces/Initialization/marketsList.txt
+++ b/MarketPlaces/Initialization/marketsList.txt
@@ -1 +1 @@
-M00nkeyMarket
\ No newline at end of file
+DigitalThriftShop
\ No newline at end of file
diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py
index 9fa557c..f2ff464 100644
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@@ -14,6 +14,7 @@ from MarketPlaces.ViceCity.parser import *
 from MarketPlaces.TorBay.parser import *
 from MarketPlaces.M00nkeyMarket.parser import *
 from MarketPlaces.DarkMatter.parser import *
+from MarketPlaces.DigitalThriftShop.parser import *
 
 from MarketPlaces.Classifier.classify_product import predict
 
@@ -160,6 +161,8 @@ def new_parse(marketPlace, url, createLog):
                 rmm = m00nkey_description_parser(soup)
             elif marketPlace == "DarkMatter":
                 rmm = darkmatter_description_parser(soup)
+            elif marketPlace == "DigitalThriftShop":
+                rmm = digitalThriftShop_description_parser(soup)
 
             # key = u"Pr:" + rmm[0].upper()[:desc_lim1] + u" Vendor:" + rmm[13].upper()[:desc_lim2]
             key = u"Url:" + os.path.basename(line2).replace(".html", "")
@@ -167,7 +170,8 @@ def new_parse(marketPlace, url, createLog):
             # save file address with description record in memory
             detPage[key] = {'rmm': rmm, 'filename': os.path.basename(line2)}
 
-        except :  
+        except Exception as e:
+            raise e  
 
             nError += 1
             print("There was a problem to parse the file " + line2 + " in the Description section!")
@@ -221,6 +225,8 @@ def new_parse(marketPlace, url, createLog):
                     rw = m00nkey_listing_parser(soup)
                 elif marketPlace == "DarkMatter":
                     rw = darkmatter_listing_parser(soup)
+                elif marketPlace == "DigitalThriftShop":
+                    rw = digitalThriftShop_listing_parser(soup)
                 else:
                     parseError = True