From 803f4b47a28bc68d18a7ee0b987ea966a79d0305 Mon Sep 17 00:00:00 2001
From: westernmeadow <wkwan626@gmail.com>
Date: Thu, 31 Aug 2023 15:46:36 -0700
Subject: [PATCH] debugged LionMarketplace, Nexus, and RobinhoodMarket

---
 Forums/Initialization/prepare_parser.py       |  6 +-
 MarketPlaces/Initialization/prepare_parser.py |  7 +-
 MarketPlaces/LionMarketplace/parser.py        |  2 +-
 MarketPlaces/Nexus/crawler_selenium.py        | 20 +++-
 MarketPlaces/Nexus/parser.py                  | 97 ++++++++++---------
 MarketPlaces/RobinhoodMarket/parser.py        | 13 ++-
 6 files changed, 80 insertions(+), 65 deletions(-)

diff --git a/Forums/Initialization/prepare_parser.py b/Forums/Initialization/prepare_parser.py
index 1f089e6..e3cc468 100644
--- a/Forums/Initialization/prepare_parser.py
+++ b/Forums/Initialization/prepare_parser.py
@@ -206,17 +206,17 @@ def persist_record(url, rec, cur, con, createLog, logFile, listingFile, descript
 
 def move_file(filePath, createLog, logFile):
 
-    # source = line2.replace(os.path.basename(line2), "") + filename
     source = filePath
     destination = filePath.replace(os.path.basename(filePath), "") + r'Read/'
 
     try:
-        shutil.move(source, destination)
+        shutil.move(source, destination, shutil.copytree)
         return True
     except:
 
-        print("There was a problem to move the file " + filePath)
         incrementError()
+        print("There was a problem to move the file " + filePath)
+        traceback.print_exc()
         if createLog:
             logFile.write(
                 str(nError) + ". There was a problem to move the file " + filePath + "\n")
diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py
index c7699bd..c50bb1a 100644
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@@ -253,17 +253,18 @@ def persist_record(url, rec, cur, con, createLog, logFile, listingFile, descript
 
 
 def move_file(filePath, createLog, logFile):
-    # source = line2.replace(os.path.basename(line2), "") + filename
+
     source = filePath
     destination = filePath.replace(os.path.basename(filePath), "") + r'Read/'
 
     try:
-        shutil.move(source, destination)
+        shutil.move(source, destination, shutil.copytree)
         return True
     except:
 
-        print("There was a problem to move the file " + filePath)
         incrementError()
+        print("There was a problem to move the file " + filePath)
+        traceback.print_exc()
         if createLog:
             logFile.write(
                 str(nError) + ". There was a problem to move the file " + filePath + "\n")
diff --git a/MarketPlaces/LionMarketplace/parser.py b/MarketPlaces/LionMarketplace/parser.py
index 81a911c..a37febf 100644
--- a/MarketPlaces/LionMarketplace/parser.py
+++ b/MarketPlaces/LionMarketplace/parser.py
@@ -56,7 +56,7 @@ def lionmarketplace_description_parser(soup):
     name = (cleanString(temp.strip()))
 
     # product description
-    temp = soup.find('div', {'class': "mt-4"}).find(text=True, recursive=False)
+    temp = soup.find('div', {'class': "mt-4"}).contents[-1]
     describe = cleanString(temp.strip())
 
     # Finding Product Image
diff --git a/MarketPlaces/Nexus/crawler_selenium.py b/MarketPlaces/Nexus/crawler_selenium.py
index d7c84c2..4ae7cfe 100644
--- a/MarketPlaces/Nexus/crawler_selenium.py
+++ b/MarketPlaces/Nexus/crawler_selenium.py
@@ -85,8 +85,8 @@ def createFFDriver():
     ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
     ff_prof.set_preference("signon.rememberSignons", False)
     ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
-    ff_prof.set_preference("network.dns.disablePrefetch", True)
-    ff_prof.set_preference("network.http.sendRefererHeader", 0)
+    # ff_prof.set_preference("network.dns.disablePrefetch", True)
+    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
     ff_prof.set_preference("permissions.default.image", 3)
     ff_prof.set_preference("browser.download.folderList", 2)
     ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
@@ -96,7 +96,7 @@ def createFFDriver():
     ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
     ff_prof.set_preference('network.proxy.socks_port', 9150)
     ff_prof.set_preference('network.proxy.socks_remote_dns', True)
-    ff_prof.set_preference("javascript.enabled", False)
+    ff_prof.set_preference("javascript.enabled", True)
     ff_prof.update_preferences()
 
     service = Service(config.get('TOR', 'geckodriver_path'))
@@ -204,6 +204,12 @@ def crawlForum(driver):
                     driver.get(link)
                 except:
                     driver.refresh()
+
+                # waiting for btc price to load
+                WebDriverWait(driver, 30).until(EC.visibility_of_element_located(
+                    (By.XPATH, "/html/body/div[1]/div[2]/div/div/main/ul/li[1]/div/span/span[3]")))
+                time.sleep(5)
+
                 html = driver.page_source
                 savePage(driver, html, link)
 
@@ -214,6 +220,11 @@ def crawlForum(driver):
                         driver.get(itemURL)
                     except:
                         driver.refresh()
+
+                    # waiting for btc price to load
+                    WebDriverWait(driver, 30).until(EC.visibility_of_element_located(
+                        (By.XPATH, "/html/body/div[1]/div[2]/div/div/main/div[3]/div[2]/p/span[3]")))
+
                     savePage(driver, driver.page_source, item)
                     driver.back()
 
@@ -225,8 +236,7 @@ def crawlForum(driver):
                     break
 
                 try:
-                    link = driver.find_element(by=By.XPATH, value=
-                        '/html/body/div[1]/div[2]/div/div/main/nav/ul/li[3]/a').get_attribute('href')
+                    link = driver.find_element(by=By.LINK_TEXT, value='→').get_attribute('href')
                     if link == "":
                         raise NoSuchElementException
                     count += 1
diff --git a/MarketPlaces/Nexus/parser.py b/MarketPlaces/Nexus/parser.py
index 093188e..f673110 100644
--- a/MarketPlaces/Nexus/parser.py
+++ b/MarketPlaces/Nexus/parser.py
@@ -43,6 +43,10 @@ def nexus_description_parser(soup):
     name_of_product = soup.find("h1", {"class": "product_title entry-title"}).text
     name = cleanString(name_of_product.strip())
 
+    # Find the BTC Price
+    prices = soup.find('p', {"class": "price"}).findAll('span', {"class": "cs"})
+    BTC = prices[0].text
+    BTC = cleanNumbers(BTC.strip())
 
     # finding the description of the product
     description_div = soup.find("div", {"class": "woocommerce-product-details__short-description"})
@@ -52,7 +56,7 @@ def nexus_description_parser(soup):
         describe = cleanString(description_div.text.strip())
 
     # Finding Product Image
-    image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
+    image = soup.find('div', {'class': 'woocommerce-product-gallery__wrapper'}).find('img')
     image = image.get('src')
     image = image.split('base64,')[-1]
 
@@ -110,56 +114,53 @@ def nexus_listing_parser(soup):
     image_vendor = []                         # 21 Vendor_Image
     href = []                                 # 22 Product_Links
 
-    products_list = soup.find_all('li')
-    nm = 0
+    main = soup.find('main', {'id': 'main'})
+    products_list = main.find('ul', recursive=False).find_all('li', recursive=False)
+    nm = len(products_list)
+
     for product in products_list:
+        # Finding the name of the product
+        name_of_product = product.find("h2", {"class": "woocommerce-loop-product__title"}).find("a").text
+        name_of_product_cleaned = cleanString(name_of_product.strip())
+        # print(name_of_product_cleaned)
+        name.append(name_of_product_cleaned)
+        #finding the URL
         try:
-            # Finding the name of the product
-            name_of_product = product.find("h2", {"class": "woocommerce-loop-product__title"}).find("a").text
-            name_of_product_cleaned = cleanString(name_of_product.strip())
-            # print(name_of_product_cleaned)
-            name.append(name_of_product_cleaned)
-            #finding the URL
-            try:
-                url = product.find("a", class_="woocommerce-loop-product__link").get('href')
-                href.append(url)
-            except AttributeError as e:
-                print("I can't find the link")
-                raise e
-
-            # Finding Product Image
-            product_image = product.find('a', {'class': 'woocommerce-loop-image-link woocommerce-LoopProduct-link woocommerce-loop-product__link'}).find('img')
-            product_image = product_image.get('src')
-            product_image = product_image.split('base64,')[-1]
-            image.append(product_image)
-
-            BTC.append("-1")
-
-            #everything else appends a -1
-            rating_vendor.append("-1")
-            USD.append("-1")
-            vendor.append(mktName)
-            success.append("-1")
-            CVE.append("-1")
-            MS.append("-1")
-            category.append("-1")
-            describe.append("-1")
-            views.append("-1")
-            reviews.append("-1")
-            addDate.append("-1")
-            EURO.append("-1")
-            sold.append("-1")
-            qLeft.append("-1")
-            shipFrom.append("-1")
-            shipTo.append("-1")
-            image_vendor.append("-1")
-            # print("Done! moving onto the next product!")
-            # print(len(shipTo))
-            nm += 1
+            url = product.find("a", class_="woocommerce-loop-product__link").get('href')
+            href.append(url)
         except AttributeError as e:
-            print("I'm somewhere I don't belong. I'm going to leave")
-            continue
-
+            print("I can't find the link")
+            raise e
+
+        # Finding Product Image
+        product_image = product.find('a', {'class': 'woocommerce-loop-image-link woocommerce-LoopProduct-link woocommerce-loop-product__link'}).find('img')
+        product_image = product_image.get('src')
+        product_image = product_image.split('base64,')[-1]
+        image.append(product_image)
+
+        # Finding BTC Price
+        prices = product.find('span', {"class": "price"}).findAll('span', {"class": "cs"})
+        price = prices[0].text
+        BTC.append(cleanNumbers(price.strip()))
+
+        #everything else appends a -1
+        rating_vendor.append("-1")
+        USD.append("-1")
+        vendor.append('-1')
+        success.append("-1")
+        CVE.append("-1")
+        MS.append("-1")
+        category.append("-1")
+        describe.append("-1")
+        views.append("-1")
+        reviews.append("-1")
+        addDate.append("-1")
+        EURO.append("-1")
+        sold.append("-1")
+        qLeft.append("-1")
+        shipFrom.append("-1")
+        shipTo.append("-1")
+        image_vendor.append("-1")
 
     # Populate the final variable (this should be a list with all fields scraped)
     return organizeProducts(
diff --git a/MarketPlaces/RobinhoodMarket/parser.py b/MarketPlaces/RobinhoodMarket/parser.py
index 5de7a70..c036d17 100644
--- a/MarketPlaces/RobinhoodMarket/parser.py
+++ b/MarketPlaces/RobinhoodMarket/parser.py
@@ -51,14 +51,17 @@ def Robinhood_description_parser(soup):
     # Finding description
     desc = ''
     tab = soup.find('div', {"id": "tab-description"})
-    for p in tab.findAll('p'):
-        desc += p.text
+    if tab is not None:
+        for p in tab.findAll('p'):
+            desc += p.text
     if desc == '':
-        desc = soup.find('div', {"class": "woocommerce-product-details__short-description"}).text
+        short = soup.find('div', {"class": "woocommerce-product-details__short-description"})
+        if short is not None:
+            desc = short.text
     describe = cleanString(desc.strip())
 
     # Finding Product Image
-    image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
+    image = soup.find('div', {'class': 'woocommerce-product-gallery__wrapper'}).find('img')
     image = image.get('src')
     image = image.split('base64,')[-1]
 
@@ -164,7 +167,7 @@ def Robinhood_listing_parser(soup):
         name.append(product)
 
         # Finding Product Image
-        product_image = card.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'})
+        product_image = card.find('a').find('img')
         product_image = product_image.get('src')
         product_image = product_image.split('base64,')[-1]
         image.append(product_image)