From db93632843bc3705e5958a393eb3922c3aed7d13 Mon Sep 17 00:00:00 2001
From: Joshua <student.joshuaestrada@gmail.com>
Date: Thu, 13 Jul 2023 19:59:40 -0700
Subject: [PATCH 1/4] updated utilities.py to fix bug with clean_html()

---
 .idea/DW_Pipeline_Test.iml                |   2 +-
 .idea/misc.xml                            |   2 +-
 Forums/Utilities/utilities.py             |   2 +-
 MarketPlaces/Utilities/utilities.py       |   2 +-
 MarketPlaces/ViceCity/crawler_selenium.py |   0
 MarketPlaces/ViceCity/parser.py           | 232 ++++++++++++++++++++++
 6 files changed, 236 insertions(+), 4 deletions(-)
 create mode 100644 MarketPlaces/ViceCity/crawler_selenium.py
 create mode 100644 MarketPlaces/ViceCity/parser.py
diff --git a/.idea/DW_Pipeline_Test.iml b/.idea/DW_Pipeline_Test.iml
index b4b832d..c554b68 100644
--- a/.idea/DW_Pipeline_Test.iml
+++ b/.idea/DW_Pipeline_Test.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="C:\Users\Helium\anaconda3" jdkType="Python SDK" />
+    <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index baf04e9..91bfa41 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="C:\Users\Helium\anaconda3" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="C:\Users\John Wick\anaconda3" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/Forums/Utilities/utilities.py b/Forums/Utilities/utilities.py
index d8ca9eb..2196307 100644
--- a/Forums/Utilities/utilities.py
+++ b/Forums/Utilities/utilities.py
@@ -292,7 +292,7 @@ def cleanHTML(html):
     clean_html = re.sub(r"<embed.*scriptable?>", "", clean_html)
 
     # image and JavaScript
-    clean_html = re.sub(r"<div.*background-image.*?>", "", clean_html)
+    clean_html = re.sub(r"<div[^>]*style=\"[^\"]*background-image.*?>", "", clean_html)
 
     return clean_html
 
diff --git a/MarketPlaces/Utilities/utilities.py b/MarketPlaces/Utilities/utilities.py
index 50bbf89..b1ecba4 100644
--- a/MarketPlaces/Utilities/utilities.py
+++ b/MarketPlaces/Utilities/utilities.py
@@ -315,7 +315,7 @@ def cleanHTML(html):
     clean_html = re.sub(r"<embed.*scriptable?>", "", clean_html)
 
     # image and JavaScript
-    clean_html = re.sub(r"<div.*background-image.*?>", "", clean_html)
+    clean_html = re.sub(r"<div[^>]*style=\"[^\"]*background-image.*?>", "", clean_html)
 
     return clean_html
 
diff --git a/MarketPlaces/ViceCity/crawler_selenium.py b/MarketPlaces/ViceCity/crawler_selenium.py
new file mode 100644
index 0000000..e69de29
diff --git a/MarketPlaces/ViceCity/parser.py b/MarketPlaces/ViceCity/parser.py
new file mode 100644
index 0000000..2508b2d
--- /dev/null
+++ b/MarketPlaces/ViceCity/parser.py
@@ -0,0 +1,232 @@
+__author__ = 'DarkWeb'
+
+# Here, we are importing the auxiliary functions to clean or convert data
+from MarketPlaces.Utilities.utilities import *
+
+# Here, we are importing BeautifulSoup to search through the HTML tree
+from bs4 import BeautifulSoup
+
+
+# This is the method to parse the Description Pages (one page to each Product in the Listing Pages)
+def tor2door_description_parser(soup):
+    # Fields to be parsed
+
+    vendor = "-1"  # 0 *Vendor_Name
+    success = "-1"  # 1 Vendor_Successful_Transactions
+    rating_vendor = "-1"  # 2 Vendor_Rating
+    name = "-1"  # 3 *Product_Name
+    describe = "-1"  # 4 Product_Description
+    CVE = "-1"  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
+    MS = "-1"  # 6 Product_MS_Classification (Microsoft Security)
+    category = "-1"  # 7 Product_Category
+    views = "-1"  # 8 Product_Number_Of_Views
+    reviews = "-1"  # 9 Product_Number_Of_Reviews
+    rating_item = "-1"  # 10 Product_Rating
+    addDate = "-1"  # 11 Product_AddedDate
+    BTC = "-1"  # 12 Product_BTC_SellingPrice
+    USD = "-1"  # 13 Product_USD_SellingPrice
+    EURO = "-1"  # 14 Product_EURO_SellingPrice
+    sold = "-1"  # 15 Product_QuantitySold
+    left = "-1"  # 16 Product_QuantityLeft
+    shipFrom = "-1"  # 17 Product_ShippedFrom
+    shipTo = "-1"  # 18 Product_ShippedTo
+
+    bae = soup.find('div', {'class': "col-9"})
+
+    # Finding Product Name
+    name = bae.find('h2').text
+    name = name.replace('\n', ' ')
+    name = name.replace(",", "")
+    name = name.strip()
+
+    mb = bae.findAll('div', {"class": "mb-1"})
+
+    # Finding Vendor
+    vendor = mb[0].text
+    vendor = vendor.replace(",", "")
+    vendor = vendor.replace("Sold by:", "")
+    vendor = vendor.strip()
+
+    # # Finding Vendor Rating
+    # full_stars = bae[2].find_all('i', {'class': "fas fa-star"})
+    # half_star = bae[2].find('i', {'class': "fas fa-star-half-alt"})
+    # rating = len(full_stars) + (0.5 if half_star is not None else 0)
+
+    # Finding Quantity Sold and Left
+    temp = mb[4].text.split(',')
+
+    sold = temp[0].replace("sold", "")
+    sold = sold.strip()
+
+    left = temp[1].replace("in stock", "")
+    left = left.strip()
+
+    # Finding USD
+    USD = bae.find('div', {"class": "h3 text-secondary"}).text
+    USD = USD.replace("$", "")
+    USD = USD.strip()
+
+    # Finding BTC
+    temp = bae.find('div', {"class": "small"}).text.split("BTC")
+
+    BTC = temp[0].strip()
+
+    # shipping_info = bae[4].text
+    # if "Digital" not in shipping_info:
+    #     shipping_info = shipping_info.split("  ")
+    #
+    #     # Finding Shipment Information (Origin)
+    #     shipFrom = shipping_info[0].strip()
+    #
+    #     # Finding Shipment Information (Destination)
+    #     shipTo = shipping_info[1].strip()
+
+    # Finding the Product description
+    describe = bae.find('div', {"class": "card border-top-0"}).text
+    describe = describe.replace("\n", " ")
+    describe = describe.replace("\r", " ")
+    describe = describe.strip()
+
+    # Searching for CVE and MS categories
+    cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
+    if cve:
+        CVE = " "
+        for idx in cve:
+            CVE += (idx)
+            CVE += "  "
+            CVE = CVE.replace(',', ' ')
+            CVE = CVE.replace('\n', '')
+    ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}'))
+    if ms:
+        MS = " "
+        for im in ms:
+            MS += (im)
+            MS += " "
+            MS = MS.replace(',', ' ')
+            MS = MS.replace('\n', '')
+
+    # Populating the final variable (this should be a list with all fields scraped)
+    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
+           BTC, USD, EURO, sold, left, shipFrom, shipTo)
+
+    # Sending the results
+    return row
+
+
+# This is the method to parse the Listing Pages
+def tor2door_listing_parser(soup):
+    # Fields to be parsed
+    nm = 0  # *Total_Products (Should be Integer)
+    mktName = "Tor2door"  # 0 *Marketplace_Name
+    vendor = []  # 1 *Vendor y
+    rating_vendor = []  # 2 Vendor_Rating
+    success = []  # 3 Vendor_Successful_Transactions
+    name = []  # 4 *Product_Name y
+    CVE = []  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
+    MS = []  # 6 Product_MS_Classification (Microsoft Security)
+    category = []  # 7 Product_Category y
+    describe = []  # 8 Product_Description
+    views = []  # 9 Product_Number_Of_Views
+    reviews = []  # 10 Product_Number_Of_Reviews
+    rating_item = []  # 11 Product_Rating
+    addDate = []  # 12 Product_AddDate
+    BTC = []  # 13 Product_BTC_SellingPrice
+    USD = []  # 14 Product_USD_SellingPrice y
+    EURO = []  # 15 Product_EURO_SellingPrice
+    sold = []  # 16 Product_QuantitySold
+    qLeft = []  # 17 Product_QuantityLeft
+    shipFrom = []  # 18 Product_ShippedFrom
+    shipTo = []  # 19 Product_ShippedTo
+    href = []  # 20 Product_Links
+
+    listing = soup.findAll('div', {"class": "card product-card mb-3"})
+
+    # Populating the Number of Products
+    nm = len(listing)
+
+    # Finding Category
+    cat = soup.find("div", {"class": "col-9"})
+    cat = cat.find("h2").text
+    cat = cat.replace("Category: ", "")
+    cat = cat.replace(",", "")
+    cat = cat.strip()
+
+    for card in listing:
+        category.append(cat)
+
+        bae = card.findAll('a')
+
+        # Adding the url to the list of urls
+        link = bae[0].get('href')
+        href.append(link)
+
+        # Finding Product Name
+        product = bae[1].text
+        product = product.replace('\n', ' ')
+        product = product.replace(",", "")
+        product = product.strip()
+        name.append(product)
+
+        # Finding Vendor
+        vendor_name = bae[2].text
+        vendor_name = vendor_name.replace(",", "")
+        vendor_name = vendor_name.strip()
+        vendor.append(vendor_name)
+
+        # Finding USD
+        usd = card.find('div', {"class": "mb-1"}).text
+        usd = usd.replace("$", "")
+        usd = usd.strip()
+        USD.append(usd)
+
+        # Finding Reviews
+        num = card.find("span", {"class": "rate-count"}).text
+        num = num.replace("(", "")
+        num = num.replace("review)", "")
+        num = num.replace("reviews)", "")
+        num = num.strip()
+        reviews.append(num)
+
+        # Searching for CVE and MS categories
+        cve = card.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
+        if not cve:
+            cveValue = "-1"
+        else:
+            cee = " "
+            for idx in cve:
+                cee += (idx)
+                cee += "  "
+                cee = cee.replace(',', ' ')
+                cee = cee.replace('\n', '')
+            cveValue = cee
+        CVE.append(cveValue)
+
+        ms = card.findAll(text=re.compile('MS\d{2}-\d{3}'))
+        if not ms:
+            MSValue = "-1"
+        else:
+            me = " "
+            for im in ms:
+                me += (im)
+                me += " "
+                me = me.replace(',', ' ')
+                me = me.replace('\n', '')
+            MSValue = me
+        MS.append(MSValue)
+
+    # Populate the final variable (this should be a list with all fields scraped)
+    return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
+                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
+
+
+def tor2door_links_parser(soup):
+    # Returning all links that should be visited by the Crawler
+    href = []
+
+    listing = soup.findAll('div', {"class": "card product-card mb-3"})
+
+    for div in listing:
+        link = div.find('a')['href']
+        href.append(link)
+
+    return href
\ No newline at end of file

From bd5a05ffb46afded758c48989bcb8e5e631c17a6 Mon Sep 17 00:00:00 2001
From: Joshua <student.joshuaestrada@gmail.com>
Date: Thu, 13 Jul 2023 20:16:48 -0700
Subject: [PATCH 2/4] finished crawler and parser

---
 MarketPlaces/Initialization/markets_mining.py |   3 +
 MarketPlaces/Initialization/prepare_parser.py |   7 +-
 MarketPlaces/ViceCity/crawler_selenium.py     | 333 ++++++++++++++++++
 MarketPlaces/ViceCity/parser.py               | 166 +++++----
 4 files changed, 435 insertions(+), 74 deletions(-)

diff --git a/MarketPlaces/Initialization/markets_mining.py b/MarketPlaces/Initialization/markets_mining.py
index 664454a..1c533ad 100644
--- a/MarketPlaces/Initialization/markets_mining.py
+++ b/MarketPlaces/Initialization/markets_mining.py
@@ -20,6 +20,7 @@ from MarketPlaces.Apocalypse.crawler_selenium import crawler as crawlerApocalyps
 from MarketPlaces.CityMarket.crawler_selenium import crawler as crawlerCityMarket
 from MarketPlaces.DarkMatter.crawler_selenium import crawler as crawlerDarkMatter
 from MarketPlaces.M00nkeyMarket.crawler_selenium import crawler as crawlerM00nkeyMarket
+from MarketPlaces.ViceCity.crawler_selenium import crawler as crawlerViceCity
 
 import configparser
 import time
@@ -106,6 +107,8 @@ if __name__ == '__main__':
             crawlerDarkMatter()
         elif mkt == "M00nkeyMarket":
             crawlerM00nkeyMarket()
+        elif mkt == "ViceCity":
+            crawlerViceCity()
 
 
 
diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py
index 4d5508b..5739f30 100644
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@@ -10,6 +10,7 @@ from MarketPlaces.Tor2door.parser import *
 from MarketPlaces.Apocalypse.parser import *
 from MarketPlaces.ThiefWorld.parser import *
 from MarketPlaces.AnonymousMarketplace.parser import *
+from MarketPlaces.ViceCity.parser import *
 
 from MarketPlaces.Classifier.classify_product import predict
 
@@ -148,6 +149,8 @@ def new_parse(marketPlace, url, createLog):
                 rmm = thiefWorld_description_parser(soup)
             elif marketPlace =="AnonymousMarketplace":
                 rmm = anonymousMarketplace_description_parser(soup)
+            elif marketPlace == "ViceCity":
+                rmm = vicecity_description_parser(soup)
             
             # key = u"Pr:" + rmm[0].upper()[:desc_lim1] + u" Vendor:" + rmm[13].upper()[:desc_lim2]
             key = u"Url:" + os.path.basename(line2).replace(".html", "")
@@ -200,7 +203,9 @@ def new_parse(marketPlace, url, createLog):
                 elif marketPlace == "ThiefWorld":
                     rw = thiefWorld_listing_parser(soup)
                 elif marketPlace == "AnonymousMarketplace":
-                    rw = anonymousMarketplace_listing_parser(soup)                
+                    rw = anonymousMarketplace_listing_parser(soup)
+                elif marketPlace == "ViceCity":
+                    rw = vicecity_listing_parser(soup)
                 else:
                     parseError = True
 
diff --git a/MarketPlaces/ViceCity/crawler_selenium.py b/MarketPlaces/ViceCity/crawler_selenium.py
index e69de29..0b22082 100644
--- a/MarketPlaces/ViceCity/crawler_selenium.py
+++ b/MarketPlaces/ViceCity/crawler_selenium.py
@@ -0,0 +1,333 @@
+__author__ = 'DarkWeb'
+
+'''
+ViceCity Market Forum Crawler (Selenium)
+'''
+
+from selenium import webdriver
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
+from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
+from selenium.webdriver.firefox.service import Service
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.by import By
+
+from PIL import Image
+import urllib.parse as urlparse
+import os, time
+from datetime import date
+import subprocess
+import configparser
+import subprocess
+from bs4 import BeautifulSoup
+from MarketPlaces.Initialization.prepare_parser import new_parse
+from MarketPlaces.ViceCity.parser import vicecity_links_parser
+from MarketPlaces.Utilities.utilities import cleanHTML
+
+config = configparser.ConfigParser()
+config.read('../../setup.ini')
+counter = 1
+baseURL = 'http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/'
+
+
+# Opens Tor Browser, crawls the website, then parses, then closes tor
+#acts like the main method for the crawler, another function at the end of this code calls this function later
+def startCrawling():
+    # opentor()
+    mktName = getMKTName()
+    # driver = getAccess()
+    #
+    # if driver != 'down':
+    #     try:
+    #         login(driver)
+    #         crawlForum(driver)
+    #     except Exception as e:
+    #         print(driver.current_url, e)
+    #     closetor(driver)
+
+    new_parse(mktName, baseURL, True)
+
+
+# Opens Tor Browser
+#prompts for ENTER input to continue
+def opentor():
+    global pid
+    print("Connecting Tor...")
+    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
+    pid = pro.pid
+    time.sleep(7.5)
+    input('Tor Connected. Press ENTER to continue\n')
+    return
+
+
+# Returns the name of the website
+#return: name of site in string type
+def getMKTName():
+    name = 'ViceCity'
+    return name
+
+
+# Return the base link of the website
+#return: url of base site in string type
+def getFixedURL():
+    url = 'http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/'
+    return url
+
+
+# Closes Tor Browser
+#@param: current selenium driver
+def closetor(driver):
+    # global pid
+    # os.system("taskkill /pid " + str(pro.pid))
+    # os.system("taskkill /t /f /im tor.exe")
+    print('Closing Tor...')
+    driver.close()
+    time.sleep(3)
+    return
+
+
+# Creates FireFox 'driver' and configure its 'Profile'
+# to use Tor proxy and socket
+def createFFDriver():
+    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
+
+    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
+    # ff_prof.set_preference("places.history.enabled", False)
+    # ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
+    # ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
+    # ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
+    # ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
+    # ff_prof.set_preference("signon.rememberSignons", False)
+    # ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
+    # ff_prof.set_preference("network.dns.disablePrefetch", True)
+    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
+    # ff_prof.set_preference("permissions.default.image", 3)
+    # ff_prof.set_preference("browser.download.folderList", 2)
+    # ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
+    # ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
+    ff_prof.set_preference('network.proxy.type', 1)
+    ff_prof.set_preference("network.proxy.socks_version", 5)
+    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
+    ff_prof.set_preference('network.proxy.socks_port', 9150)
+    ff_prof.set_preference('network.proxy.socks_remote_dns', True)
+    ff_prof.set_preference("javascript.enabled", False)
+    ff_prof.update_preferences()
+
+    service = Service(config.get('TOR', 'geckodriver_path'))
+
+    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
+
+    return driver
+
+#the driver 'gets' the url, attempting to get on the site, if it can't access return 'down'
+#return: return the selenium driver or string 'down'
+def getAccess():
+    url = getFixedURL()
+    driver = createFFDriver()
+    try:
+        driver.get(url)
+        return driver
+    except:
+        driver.close()
+        return 'down'
+
+
+# Manual captcha solver, waits fora specific element so that the whole page loads, finds the input box, gets screenshot of captcha
+# then allows for manual solving of captcha in the terminal
+#@param: current selenium web driver
+def login(driver):
+    # wait for first captcha page to show up (This Xpath may need to change based on different seed url)
+    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
+        (By.XPATH, "/html/body/div/div/form/div/div[1]")))
+    input("Press Enter once captcha done (dont press done)")
+    #clicks button after captcha is inputted
+    driver.find_element(by=By.XPATH, value='/html/body/div/div/form/button').click()
+
+    #wait for login page to show up
+    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
+        (By.XPATH, '/html/body/div/div/div/form')))
+    #puts username into box
+    userBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
+    userBox.send_keys('ct1234')
+
+    #waits for second catpcha to be inputted by user
+    input("Press Enter once captcha done (dont press continue)")
+    #clicks on continue
+    driver.find_element(by=By.XPATH, value='/html/body/div/div/div/form/input[2]').click()
+
+    #waits for password to show
+    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
+        (By.XPATH, '/html/body/div/div/div/form/div[3]/input')))
+    time.sleep(10) # give time for site to catch up
+    # puts password into box
+    passBox = driver.find_element(by=By.XPATH, value='/html/body/div/div/div/form/div[2]/input')
+    passBox.send_keys('DementedBed123-')
+    driver.find_element(by=By.XPATH, value='/html/body/div/div/div/form/div[3]/input').click()
+
+    # wait for pin input to show
+    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
+        (By.XPATH, '/html/body/div[1]/div/form/span')))
+    pinBox = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div/form/input[1]')
+    pinBox.send_keys('12345')
+    driver.find_element(by=By.XPATH, value='/html/body/div[1]/div/form/input[2]').click()
+
+    # waits for main listing page before crawling to ensure everything goes well
+    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
+        (By.XPATH, '/html/body/div[1]/div/div[2]')))
+
+
+# Saves the crawled html page, makes the directory path for html pages if not made
+def savePage(page, url):
+    cleanPage = cleanHTML(page)
+    filePath = getFullPathName(url)
+    os.makedirs(os.path.dirname(filePath), exist_ok=True)
+    open(filePath, 'wb').write(cleanPage.encode('utf-8'))
+    return
+
+
+# Gets the full path of the page to be saved along with its appropriate file name
+#@param: raw url as crawler crawls through every site
+def getFullPathName(url):
+    fileName = getNameFromURL(url)
+    if isDescriptionLink(url):
+        fullPath = r'..\ViceCity\HTML_Pages\\' + str(
+            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
+            "%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
+    else:
+        fullPath = r'..\ViceCity\HTML_Pages\\' + str(
+            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
+            "%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
+    return fullPath
+
+
+# Creates the file name from passed URL, gives distinct name if can't be made unique after cleaned
+#@param: raw url as crawler crawls through every site
+def getNameFromURL(url):
+    global counter
+    name = ''.join(e for e in url if e.isalnum())
+    if (name == ''):
+        name = str(counter)
+        counter = counter + 1
+    return name
+
+
+# returns list of urls, here is where you can list the different urls of interest, the crawler runs through this list
+#in this example, there are a couple of categories some threads fall under such as
+# Guides and Tutorials, Digital Products, and Software and Malware
+#as you can see they are categories of products
+def getInterestedLinks():
+    links = []
+
+    # Digital - Fraud Software, Has Hacking and Guides
+    links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=150')
+    # Digital - Guides and Tutorials
+    # links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=94')
+    # Carding Services
+    # links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=155')
+    # Digital - Other (half junk half random stuff like: bots, rats, viruses, and guides)
+    # links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=153')
+
+    return links
+
+
+# gets links of interest to crawl through, iterates through list, where each link is clicked and crawled through
+#topic and description pages are crawled through here, where both types of pages are saved
+#@param: selenium driver
+def crawlForum(driver):
+    print("Crawling the ViceCity Market")
+
+    linksToCrawl = getInterestedLinks()
+    visited = set(linksToCrawl)
+    initialTime = time.time()
+
+    count = 0
+    i = 0
+    while i < len(linksToCrawl):
+        link = linksToCrawl[i]
+        print('Crawling :', link)
+        try:
+            try:
+                driver.get(link)
+            except:
+                driver.refresh()
+            html = driver.page_source
+            savePage(html, link)
+
+            has_next_page = True
+            while has_next_page:
+                list = productPages(html)
+                j = 0
+                for item in list:
+                    itemURL = urlparse.urljoin(baseURL, str(item))
+                    try:
+                        driver.get(itemURL)
+                    except:
+                        driver.refresh()
+                    time.sleep(2.5) # to let page catchup
+                    savePage(driver.page_source, item)
+                    driver.back()
+
+                    #comment out
+                    break
+
+                # # comment out
+                # if count == 1:
+                #    count = 0
+                #    break
+
+                try:
+                    temp = driver.find_element(by=By.CLASS_NAME, value='pagination')
+                    link = temp.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
+                    if link == "":
+                        raise NoSuchElementException
+                    try:
+                        driver.get(link)
+                    except:
+                        driver.refresh()
+                    html = driver.page_source
+                    savePage(html, link)
+                    count += 1
+
+                except NoSuchElementException:
+                    has_next_page = False
+
+        except Exception as e:
+            print(link, e)
+        i += 1
+
+    # finalTime = time.time()
+    # print finalTime - initialTime
+
+    input("Crawling ViceCity  done sucessfully. Press ENTER to continue\n")
+
+
+# Returns 'True' if the link is a description link
+#@param: url of any url crawled
+#return: true if is a description page, false if not
+def isDescriptionLink(url):
+    if 'listing' in url:
+        return True
+    return False
+
+
+# Returns True if the link is a listingPage link
+#@param: url of any url crawled
+#return: true if is a Listing page, false if not
+def isListingLink(url):
+    if 'category' in url:
+        return True
+    return False
+
+
+# calling the parser to define the links, the html is the url of a link from the list of interested link list
+#@param: link from interested link list ie. getInterestingLinks()
+#return: list of description links that should be crawled through
+def productPages(html):
+    soup = BeautifulSoup(html, "html.parser")
+    return vicecity_links_parser(soup)
+
+
+def crawler():
+    startCrawling()
+    # print("Crawling and Parsing BestCardingWorld .... DONE!")
diff --git a/MarketPlaces/ViceCity/parser.py b/MarketPlaces/ViceCity/parser.py
index 2508b2d..65d6b8f 100644
--- a/MarketPlaces/ViceCity/parser.py
+++ b/MarketPlaces/ViceCity/parser.py
@@ -8,7 +8,7 @@ from bs4 import BeautifulSoup
 
 
 # This is the method to parse the Description Pages (one page to each Product in the Listing Pages)
-def tor2door_description_parser(soup):
+def vicecity_description_parser(soup):
     # Fields to be parsed
 
     vendor = "-1"  # 0 *Vendor_Name
@@ -31,62 +31,68 @@ def tor2door_description_parser(soup):
     shipFrom = "-1"  # 17 Product_ShippedFrom
     shipTo = "-1"  # 18 Product_ShippedTo
 
-    bae = soup.find('div', {'class': "col-9"})
-
     # Finding Product Name
-    name = bae.find('h2').text
+    name = soup.find('div', {'class': "listing_info"}).find('div', {'class': "listing_right"})
+    name = name.find('span', {'style': "font-size:18px;font-weight: bold;color: #fff"}).text
     name = name.replace('\n', ' ')
     name = name.replace(",", "")
     name = name.strip()
 
-    mb = bae.findAll('div', {"class": "mb-1"})
-
     # Finding Vendor
-    vendor = mb[0].text
-    vendor = vendor.replace(",", "")
-    vendor = vendor.replace("Sold by:", "")
-    vendor = vendor.strip()
+    vendor = soup.find('div', {'class': "listing_info"}).find('div', {'class': "listing_right"}).find('a').text.strip()
 
-    # # Finding Vendor Rating
-    # full_stars = bae[2].find_all('i', {'class': "fas fa-star"})
-    # half_star = bae[2].find('i', {'class': "fas fa-star-half-alt"})
-    # rating = len(full_stars) + (0.5 if half_star is not None else 0)
+    # Finding Vendor Rating
+    rating = soup.find('div', {'class': "listing_info"}).find('div', {'class': "listing_right"}).find('a').get('title')
+    rating = str(re.match(r"\d+%", rating)).strip()
 
     # Finding Quantity Sold and Left
-    temp = mb[4].text.split(',')
+    # temp = mb[4].text.split(',')
+    #
+    # sold = temp[0].replace("sold", "")
+    # sold = sold.strip()
+    #
+    # left = temp[1].replace("in stock", "")
+    # left = left.strip()
 
-    sold = temp[0].replace("sold", "")
-    sold = sold.strip()
+    # Finding Successful Transactions
+    success = soup.find('div', {'class': "listing_info"}).find('div', {'class': "listing_right"}).find('a').get('title')
+    success = str(re.compile(r"\d+(?= sales)", success)).strip()
 
-    left = temp[1].replace("in stock", "")
-    left = left.strip()
+    bae = soup.find('pre')
 
     # Finding USD
-    USD = bae.find('div', {"class": "h3 text-secondary"}).text
-    USD = USD.replace("$", "")
-    USD = USD.strip()
+    USD = bae.find('span').text
+    USD = str(re.compile(r"\$\d+(?:\.\d+)?", USD))
+    USD = USD.replace("$", "").strip()
 
     # Finding BTC
-    temp = bae.find('div', {"class": "small"}).text.split("BTC")
+    BTC = bae.findall('span')
+    BTC = str(re.compile(r"\d+(?:\.\d+)?", BTC[1].text)).strip()
 
-    BTC = temp[0].strip()
+    # Finding the Product Category
+    category = soup.find('div', {'class': "listing_info"}).find('div', {'class': "listing_right"})
+    category = category.find('span', {'style': "font-size:15px;color: #a1a1a1"}).text
+    category = category.replace("Category:", "").strip()
 
-    # shipping_info = bae[4].text
-    # if "Digital" not in shipping_info:
-    #     shipping_info = shipping_info.split("  ")
-    #
-    #     # Finding Shipment Information (Origin)
-    #     shipFrom = shipping_info[0].strip()
-    #
-    #     # Finding Shipment Information (Destination)
-    #     shipTo = shipping_info[1].strip()
+    li = bae.find('span', {'style': "float:right"}).find_all('span')
+
+    # Finding Shipment Information (Origin)
+    shipFrom = li[1].text.strip()
+
+    # Finding Shipment Information (Destination)
+    shipTo = li[-2].text.strip()
 
     # Finding the Product description
-    describe = bae.find('div', {"class": "card border-top-0"}).text
+    describe = soup.find('p', {
+        'style': "width:705px;margin-left:-305px;background-color: #242424;border-radius: 3px;border: 1px solid #373737;padding: 5px;"}).text
     describe = describe.replace("\n", " ")
-    describe = describe.replace("\r", " ")
     describe = describe.strip()
 
+    # Finding the Number of Product Reviews
+    li = soup.find_all('label', {'class': "tc_label threetabs"})
+    review = li[1].text
+    review = str(re.compile(r"\d+", review)).strip()
+
     # Searching for CVE and MS categories
     cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
     if cve:
@@ -114,10 +120,10 @@ def tor2door_description_parser(soup):
 
 
 # This is the method to parse the Listing Pages
-def tor2door_listing_parser(soup):
+def vicecity_listing_parser(soup):
     # Fields to be parsed
     nm = 0  # *Total_Products (Should be Integer)
-    mktName = "Tor2door"  # 0 *Marketplace_Name
+    mktName = "ViceCity"  # 0 *Marketplace_Name
     vendor = []  # 1 *Vendor y
     rating_vendor = []  # 2 Vendor_Rating
     success = []  # 3 Vendor_Successful_Transactions
@@ -139,56 +145,69 @@ def tor2door_listing_parser(soup):
     shipTo = []  # 19 Product_ShippedTo
     href = []  # 20 Product_Links
 
-    listing = soup.findAll('div', {"class": "card product-card mb-3"})
+    listing = soup.find('div', {"class": "frontpage"}).findAll('div', {"class": "wLf"})
 
     # Populating the Number of Products
     nm = len(listing)
 
-    # Finding Category
-    cat = soup.find("div", {"class": "col-9"})
-    cat = cat.find("h2").text
-    cat = cat.replace("Category: ", "")
-    cat = cat.replace(",", "")
-    cat = cat.strip()
+    # # Finding Category
+    # cat = soup.find("div", {"class": "col-9"})
+    # cat = cat.find("h2").text
+    # cat = cat.replace("Category: ", "")
+    # cat = cat.replace(",", "")
+    # cat = cat.strip()
 
-    for card in listing:
-        category.append(cat)
+    for a in listing:
+        # category.append(cat)
 
-        bae = card.findAll('a')
+        # bae = card.findAll('a')
 
         # Adding the url to the list of urls
-        link = bae[0].get('href')
+        link = a.find('div', {"class": "wLfLeft"}).find('a', href=True).get('href')
+        link = cleanLink(link)
         href.append(link)
 
-        # Finding Product Name
-        product = bae[1].text
+        # Finding the Product Name
+        product = a.find('div', {"class": "wLfName"}).find('a').text
         product = product.replace('\n', ' ')
         product = product.replace(",", "")
+        product = product.replace("...", "")
         product = product.strip()
         name.append(product)
 
-        # Finding Vendor
-        vendor_name = bae[2].text
+        # Finding the Vendor
+        vendor_name = a.find('div', {"class": "wLfVendor"}).find('a').text
         vendor_name = vendor_name.replace(",", "")
         vendor_name = vendor_name.strip()
         vendor.append(vendor_name)
 
-        # Finding USD
-        usd = card.find('div', {"class": "mb-1"}).text
-        usd = usd.replace("$", "")
-        usd = usd.strip()
-        USD.append(usd)
-
-        # Finding Reviews
-        num = card.find("span", {"class": "rate-count"}).text
-        num = num.replace("(", "")
-        num = num.replace("review)", "")
-        num = num.replace("reviews)", "")
-        num = num.strip()
-        reviews.append(num)
+        # Finding Prices
+        price = a.find('div', {"class": "wLfPrice"}).find_all('span')
+        ud = price[0].text.replace(" USD", " ")
+        # u = ud.replace("$","")
+        u = ud.replace(",", "")
+        u = u.strip()
+        USD.append(u)
+        bc = price[1].text
+        bc = str(re.compile(r"\d+(?:\.\d+)?", bc))
+        BTC.append(bc)
+
+        # # Finding Reviews
+        # num = card.find("span", {"class": "rate-count"}).text
+        # num = num.replace("(", "")
+        # num = num.replace("review)", "")
+        # num = num.replace("reviews)", "")
+        # num = num.strip()
+        # reviews.append(num)
+
+        # Finding Successful Transactions
+        freq = a.find('div', {"class": "wLfVendor"}).find('a').get('title')
+        freq = re.compile(r'\d+(?= sales)', freq)
+        freq = freq.strip()
+        success.append(freq)
 
         # Searching for CVE and MS categories
-        cve = card.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
+        cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
         if not cve:
             cveValue = "-1"
         else:
@@ -201,7 +220,7 @@ def tor2door_listing_parser(soup):
             cveValue = cee
         CVE.append(cveValue)
 
-        ms = card.findAll(text=re.compile('MS\d{2}-\d{3}'))
+        ms = a.findAll(text=re.compile('MS\d{2}-\d{3}'))
         if not ms:
             MSValue = "-1"
         else:
@@ -219,14 +238,15 @@ def tor2door_listing_parser(soup):
                             reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
 
 
-def tor2door_links_parser(soup):
+def vicecity_links_parser(soup):
     # Returning all links that should be visited by the Crawler
-    href = []
 
-    listing = soup.findAll('div', {"class": "card product-card mb-3"})
+    href = []
+    listing = soup.find('div', {"class": "frontpage"}).findAll('div', {"class": "wLf"})
 
-    for div in listing:
-        link = div.find('a')['href']
+    for a in listing:
+        bae = a.find('div', {"class": "wLfLeft"}).find('a', href=True)
+        link = bae['href']
         href.append(link)
 
-    return href
\ No newline at end of file
+    return href

From abeb2c0df0f87e6d22760d0f89d2591c6948235a Mon Sep 17 00:00:00 2001
From: Joshua <student.joshuaestrada@gmail.com>
Date: Fri, 14 Jul 2023 12:21:36 -0700
Subject: [PATCH 3/4] fixed bugs with crawler not getting all pages and parser
 breaking

---
 MarketPlaces/Initialization/geckodriver.log | 147 ++++++++++++++++++++
 MarketPlaces/Initialization/marketsList.txt |   2 +-
 MarketPlaces/ViceCity/crawler_selenium.py   |  16 +--
 MarketPlaces/ViceCity/parser.py             |  58 ++++++--
 setup.ini                                   |  13 +-
 5 files changed, 206 insertions(+), 30 deletions(-)

diff --git a/MarketPlaces/Initialization/geckodriver.log b/MarketPlaces/Initialization/geckodriver.log
index 8b61c4e..c45f256 100644
--- a/MarketPlaces/Initialization/geckodriver.log
+++ b/MarketPlaces/Initialization/geckodriver.log
@@ -15532,3 +15532,150 @@ DevTools listening on ws://localhost:51081/devtools/browser/ef699bfb-b8a4-403a-a
 1689136181511	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+1689305282590	geckodriver	INFO	Listening on 127.0.0.1:57612
+1689305286344	mozrunner::runner	INFO	Running command: "C:\\Users\\John Wick\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "57613" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\JOHNWI~1\\AppData\\Local\\Temp\\rust_mozprofileW1wjHz"
+console.log: "TorSettings: loadFromPrefs()"
+console.log: "TorConnect: init()"
+console.log: "TorConnect: Entering Initial state"
+console.log: "TorConnect: Observed profile-after-change"
+console.log: "TorConnect: Observing topic 'TorProcessExited'"
+console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
+console.log: "TorConnect: Observing topic 'torsettings:ready'"
+console.log: "TorSettings: Observed profile-after-change"
+1689305287006	Marionette	INFO	Marionette enabled
+console.log: "TorConnect: Will load after bootstrap => [about:blank]"
+console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid"
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
+DevTools listening on ws://localhost:57613/devtools/browser/9cb995f7-f1d0-45e1-a9ae-0903f91679e2
+1689305288403	Marionette	INFO	Listening on port 57618
+1689305288510	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
+1689305558621	Marionette	WARN	Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
+1689305591430	Marionette	WARN	Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
+1689305927779	Marionette	INFO	Stopped listening on port 57618
+JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
+!!! error running onStopped callback: TypeError: callback is not a function
+1689305927959	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
+unwatchForTabs()@TargetList.jsm:70
+unwatchForTargets()@TargetList.jsm:37
+destructor()@TargetList.jsm:109
+stop()@CDP.jsm:104
+close()@RemoteAgent.jsm:138
+1689306152997	geckodriver	INFO	Listening on 127.0.0.1:62728
+1689306156730	mozrunner::runner	INFO	Running command: "C:\\Users\\John Wick\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "62729" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\JOHNWI~1\\AppData\\Local\\Temp\\rust_mozprofileQfWfpc"
+console.log: "TorSettings: loadFromPrefs()"
+console.log: "TorConnect: init()"
+console.log: "TorConnect: Entering Initial state"
+console.log: "TorConnect: Observed profile-after-change"
+console.log: "TorConnect: Observing topic 'TorProcessExited'"
+console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
+console.log: "TorConnect: Observing topic 'torsettings:ready'"
+console.log: "TorSettings: Observed profile-after-change"
+1689306157335	Marionette	INFO	Marionette enabled
+console.log: "TorConnect: Will load after bootstrap => [about:blank]"
+console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid"
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
+DevTools listening on ws://localhost:62729/devtools/browser/90212f30-1413-403a-a4d6-85a9ad71de86
+1689306158784	Marionette	INFO	Listening on port 62734
+1689306158827	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
+1689306327168	Marionette	WARN	Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
+1689306352097	Marionette	WARN	Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
+1689306672567	Marionette	INFO	Stopped listening on port 62734
+JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
+!!! error running onStopped callback: TypeError: callback is not a function
+
+###!!! [Parent][MessageChannel] Error: (msgtype=0x390076,name=PContent::Msg_DestroyBrowsingContextGroup) Closed channel: cannot send/recv
+
+
+###!!! [Child][MessageChannel] Error: (msgtype=0x23002E,name=PBrowser::Msg___delete__) Channel closing: too late to send/recv, messages will be lost
+
+
+###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
+
+1689306672742	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
+unwatchForTabs()@TargetList.jsm:70
+unwatchForTargets()@TargetList.jsm:37
+destructor()@TargetList.jsm:109
+stop()@CDP.jsm:104
+close()@RemoteAgent.jsm:138
+1689353148352	geckodriver	INFO	Listening on 127.0.0.1:57720
+1689353152386	mozrunner::runner	INFO	Running command: "C:\\Users\\John Wick\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "57721" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\JOHNWI~1\\AppData\\Local\\Temp\\rust_mozprofilebdVBHT"
+console.log: "TorSettings: loadFromPrefs()"
+console.log: "TorConnect: init()"
+console.log: "TorConnect: Entering Initial state"
+console.log: "TorConnect: Observed profile-after-change"
+console.log: "TorConnect: Observing topic 'TorProcessExited'"
+console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
+console.log: "TorConnect: Observing topic 'torsettings:ready'"
+console.log: "TorSettings: Observed profile-after-change"
+1689353153078	Marionette	INFO	Marionette enabled
+console.log: "TorConnect: Will load after bootstrap => [about:blank]"
+console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid"
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
+DevTools listening on ws://localhost:57721/devtools/browser/66aa6550-8450-49a2-be19-7728fc52cb65
+1689353154754	Marionette	INFO	Listening on port 57726
+1689353155234	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
+JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\John Wick\AppData\Local\Temp\rust_mozprofilebdVBHT\thumbnails) because it does not exist
+1689353351388	Marionette	WARN	Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
+1689353375169	Marionette	WARN	Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
+1689353609409	Marionette	INFO	Stopped listening on port 57726
+JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
+!!! error running onStopped callback: TypeError: callback is not a function
+1689353609555	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
+unwatchForTabs()@TargetList.jsm:70
+unwatchForTargets()@TargetList.jsm:37
+destructor()@TargetList.jsm:109
+stop()@CDP.jsm:104
+close()@RemoteAgent.jsm:138
+1689358893192	geckodriver	INFO	Listening on 127.0.0.1:53304
+1689358897088	mozrunner::runner	INFO	Running command: "C:\\Users\\John Wick\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "53305" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\JOHNWI~1\\AppData\\Local\\Temp\\rust_mozprofile5c9ZQ4"
+console.log: "TorSettings: loadFromPrefs()"
+console.log: "TorConnect: init()"
+console.log: "TorConnect: Entering Initial state"
+console.log: "TorConnect: Observed profile-after-change"
+console.log: "TorConnect: Observing topic 'TorProcessExited'"
+console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
+console.log: "TorConnect: Observing topic 'torsettings:ready'"
+console.log: "TorSettings: Observed profile-after-change"
+1689358897866	Marionette	INFO	Marionette enabled
+console.log: "TorConnect: Will load after bootstrap => [about:blank]"
+console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid"
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
+DevTools listening on ws://localhost:53305/devtools/browser/2f57a39e-c4c4-4c89-af0b-cc8d26d8a863
+1689358899540	Marionette	INFO	Listening on port 53310
+1689358899767	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
+JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\John Wick\AppData\Local\Temp\rust_mozprofile5c9ZQ4\thumbnails) because it does not exist
+1689359085260	Marionette	WARN	Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
+1689359112369	Marionette	WARN	Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+1689360786237	Marionette	INFO	Stopped listening on port 53310
+JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
+!!! error running onStopped callback: TypeError: callback is not a function
+
+###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
+
+
+###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
+
+1689360786406	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
+unwatchForTabs()@TargetList.jsm:70
+unwatchForTargets()@TargetList.jsm:37
+destructor()@TargetList.jsm:109
+stop()@CDP.jsm:104
+close()@RemoteAgent.jsm:138
diff --git a/MarketPlaces/Initialization/marketsList.txt b/MarketPlaces/Initialization/marketsList.txt
index 19c6d10..a427446 100644
--- a/MarketPlaces/Initialization/marketsList.txt
+++ b/MarketPlaces/Initialization/marketsList.txt
@@ -1 +1 @@
-M00nkeyMarket
+ViceCity
diff --git a/MarketPlaces/ViceCity/crawler_selenium.py b/MarketPlaces/ViceCity/crawler_selenium.py
index 0b22082..584a90c 100644
--- a/MarketPlaces/ViceCity/crawler_selenium.py
+++ b/MarketPlaces/ViceCity/crawler_selenium.py
@@ -46,7 +46,7 @@ def startCrawling():
     #         print(driver.current_url, e)
     #     closetor(driver)
 
-    new_parse(mktName, baseURL, True)
+    new_parse(mktName, baseURL, False)
 
 
 # Opens Tor Browser
@@ -189,15 +189,14 @@ def savePage(page, url):
 # Gets the full path of the page to be saved along with its appropriate file name
 #@param: raw url as crawler crawls through every site
 def getFullPathName(url):
+    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
+
+    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
     fileName = getNameFromURL(url)
     if isDescriptionLink(url):
-        fullPath = r'..\ViceCity\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
+        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
     else:
-        fullPath = r'..\ViceCity\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
+        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
     return fullPath
 
 
@@ -266,10 +265,11 @@ def crawlForum(driver):
                         driver.refresh()
                     time.sleep(2.5) # to let page catchup
                     savePage(driver.page_source, item)
+                    time.sleep(2.5) # so site doesnt crash
                     driver.back()
 
                     #comment out
-                    break
+                    # break
 
                 # # comment out
                 # if count == 1:
diff --git a/MarketPlaces/ViceCity/parser.py b/MarketPlaces/ViceCity/parser.py
index 65d6b8f..eeb97a3 100644
--- a/MarketPlaces/ViceCity/parser.py
+++ b/MarketPlaces/ViceCity/parser.py
@@ -1,5 +1,7 @@
 __author__ = 'DarkWeb'
 
+import re
+
 # Here, we are importing the auxiliary functions to clean or convert data
 from MarketPlaces.Utilities.utilities import *
 
@@ -39,11 +41,16 @@ def vicecity_description_parser(soup):
     name = name.strip()
 
     # Finding Vendor
-    vendor = soup.find('div', {'class': "listing_info"}).find('div', {'class': "listing_right"}).find('a').text.strip()
+    vendor = soup.find('div', {'class': "listing_info"})
+    vendor = vendor.find('div', {'class': "listing_right"})
+    numbers = vendor.find('a').find('span').text
+    vendor = vendor.find('a').text
+    vendor = vendor.replace(numbers, "").strip() # removes numbers at the end of vendor name
 
     # Finding Vendor Rating
     rating = soup.find('div', {'class': "listing_info"}).find('div', {'class': "listing_right"}).find('a').get('title')
-    rating = str(re.match(r"\d+%", rating)).strip()
+    rating = re.search(r"\d+%", rating)
+    rating_vendor = rating.group(0).strip()
 
     # Finding Quantity Sold and Left
     # temp = mb[4].text.split(',')
@@ -56,28 +63,31 @@ def vicecity_description_parser(soup):
 
     # Finding Successful Transactions
     success = soup.find('div', {'class': "listing_info"}).find('div', {'class': "listing_right"}).find('a').get('title')
-    success = str(re.compile(r"\d+(?= sales)", success)).strip()
+    success = re.search(r"\d+(?= sales)", success)
+    success = success.group(0).strip()
+
 
     bae = soup.find('pre')
 
     # Finding USD
     USD = bae.find('span').text
-    USD = str(re.compile(r"\$\d+(?:\.\d+)?", USD))
+    USD = re.search(r"\$\d+(?:\.\d+)?", USD).group(0)
     USD = USD.replace("$", "").strip()
 
     # Finding BTC
-    BTC = bae.findall('span')
-    BTC = str(re.compile(r"\d+(?:\.\d+)?", BTC[1].text)).strip()
+    BTC = bae.find_all('span')
+    BTC = re.search(r"\d+(?:\.\d+)?", BTC[1].text).group(0)
+    BTC = BTC.strip()
 
     # Finding the Product Category
     category = soup.find('div', {'class': "listing_info"}).find('div', {'class': "listing_right"})
     category = category.find('span', {'style': "font-size:15px;color: #a1a1a1"}).text
     category = category.replace("Category:", "").strip()
 
-    li = bae.find('span', {'style': "float:right"}).find_all('span')
+    li = bae.find_all('span')
 
     # Finding Shipment Information (Origin)
-    shipFrom = li[1].text.strip()
+    shipFrom = li[-4].text.strip()
 
     # Finding Shipment Information (Destination)
     shipTo = li[-2].text.strip()
@@ -91,7 +101,11 @@ def vicecity_description_parser(soup):
     # Finding the Number of Product Reviews
     li = soup.find_all('label', {'class': "tc_label threetabs"})
     review = li[1].text
-    review = str(re.compile(r"\d+", review)).strip()
+    review = re.search(r"\d+", review)
+    if review:
+        reviews = review.group(0).strip()
+    else:
+        reviews = '0'
 
     # Searching for CVE and MS categories
     cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
@@ -145,7 +159,7 @@ def vicecity_listing_parser(soup):
     shipTo = []  # 19 Product_ShippedTo
     href = []  # 20 Product_Links
 
-    listing = soup.find('div', {"class": "frontpage"}).findAll('div', {"class": "wLf"})
+    listing = soup.findAll('div', {"class": "wLf"}) # should be 30
 
     # Populating the Number of Products
     nm = len(listing)
@@ -177,7 +191,9 @@ def vicecity_listing_parser(soup):
 
         # Finding the Vendor
         vendor_name = a.find('div', {"class": "wLfVendor"}).find('a').text
+        addedNums = a.find('div', {"class": "wLfVendor"}).find('a').find('span').text # finds numbers added at end
         vendor_name = vendor_name.replace(",", "")
+        vendor_name = vendor_name.replace(addedNums, "") # removes numbers added at end
         vendor_name = vendor_name.strip()
         vendor.append(vendor_name)
 
@@ -185,11 +201,12 @@ def vicecity_listing_parser(soup):
         price = a.find('div', {"class": "wLfPrice"}).find_all('span')
         ud = price[0].text.replace(" USD", " ")
         # u = ud.replace("$","")
-        u = ud.replace(",", "")
+        ud = ud.replace(",", "")
+        u = ud.replace(price[1].text, "")
         u = u.strip()
         USD.append(u)
         bc = price[1].text
-        bc = str(re.compile(r"\d+(?:\.\d+)?", bc))
+        bc = re.search(r"\d+(?:\.\d+)?", bc).group(0).strip()
         BTC.append(bc)
 
         # # Finding Reviews
@@ -202,10 +219,23 @@ def vicecity_listing_parser(soup):
 
         # Finding Successful Transactions
         freq = a.find('div', {"class": "wLfVendor"}).find('a').get('title')
-        freq = re.compile(r'\d+(?= sales)', freq)
+        freq = re.search(r'\d+(?= sales)', freq).group(0)
         freq = freq.strip()
         success.append(freq)
 
+        # Finding Ship from and ship to
+        place = a.find('div', {"class": "wLfPrice"})
+        place = place.find('span', {'style': "font-size: 12px;"}).text
+        place = place.split('⟶')
+        varFrom = place[0].strip()
+        varTo = place[1].strip()
+        if varFrom == "WW":
+            varFrom = "Worldwide"
+        if varTo == "WW":
+            varTo = "Worldwide"
+        shipFrom.append(varFrom)
+        shipTo.append(varTo)
+
         # Searching for CVE and MS categories
         cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
         if not cve:
@@ -242,7 +272,7 @@ def vicecity_links_parser(soup):
     # Returning all links that should be visited by the Crawler
 
     href = []
-    listing = soup.find('div', {"class": "frontpage"}).findAll('div', {"class": "wLf"})
+    listing = soup.findAll('div', {"class": "wLf"})
 
     for a in listing:
         bae = a.find('div', {"class": "wLfLeft"}).find('a', href=True)
diff --git a/setup.ini b/setup.ini
index 641d3f1..c87990a 100644
--- a/setup.ini
+++ b/setup.ini
@@ -1,15 +1,14 @@
-
 [TOR]
-firefox_binary_path = C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe
-firefox_profile_path = C:\\Users\\Helium\\Desktop\\Tor Browser\\Browser\\TorBrowser\\Data\\Browser\\profile.default
-geckodriver_path = C:\\Users\\Helium\\PycharmProjects\\dw_pipeline_test\\selenium\\geckodriver.exe
+firefox_binary_path = C:\Users\John Wick\Desktop\Tor Browser\Browser\firefox.exe
+firefox_profile_path = C:\Users\John Wick\Desktop\Tor Browser\Browser\TorBrowser\Data\Browser\profile.default
+geckodriver_path = C:\Users\John Wick\PycharmProjects\dw_pipeline_test\selenium\geckodriver.exe
 
 [Project]
-project_directory = C:\\Users\\Helium\\PycharmProjects\\dw_pipeline_test
-shared_folder = \\VBoxSvr\\Shared
+project_directory = C:\Users\John Wick\PycharmProjects\dw_pipeline_test
+shared_folder = Z:\\VBoxSvr\\VM_Files_ (shared)
 
 [PostgreSQL]
 ip = localhost
 username = postgres
-password = password
+password = postgres
 database = darkweb_markets_forums
\ No newline at end of file

From c50fe116c210663ae9673ed32a3dcc7b74df4898 Mon Sep 17 00:00:00 2001
From: Joshua <student.joshuaestrada@gmail.com>
Date: Mon, 17 Jul 2023 19:39:45 -0700
Subject: [PATCH 4/4] Completed and Tested Vicecity

---
 MarketPlaces/Initialization/geckodriver.log | 67 +++++++++++++++++++++
 MarketPlaces/ViceCity/crawler_selenium.py   | 26 ++++----
 2 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/MarketPlaces/Initialization/geckodriver.log b/MarketPlaces/Initialization/geckodriver.log
index c45f256..3aaedfc 100644
--- a/MarketPlaces/Initialization/geckodriver.log
+++ b/MarketPlaces/Initialization/geckodriver.log
@@ -15679,3 +15679,70 @@ unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
+1689645823453	geckodriver	INFO	Listening on 127.0.0.1:62446
+1689645827490	mozrunner::runner	INFO	Running command: "C:\\Users\\John Wick\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "62447" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\JOHNWI~1\\AppData\\Local\\Temp\\rust_mozprofilepVLlk4"
+console.log: "TorSettings: loadFromPrefs()"
+console.log: "TorConnect: init()"
+console.log: "TorConnect: Entering Initial state"
+console.log: "TorConnect: Observed profile-after-change"
+console.log: "TorConnect: Observing topic 'TorProcessExited'"
+console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
+console.log: "TorConnect: Observing topic 'torsettings:ready'"
+console.log: "TorSettings: Observed profile-after-change"
+1689645828210	Marionette	INFO	Marionette enabled
+console.log: "TorConnect: Will load after bootstrap => [about:blank]"
+console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid"
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
+DevTools listening on ws://localhost:62447/devtools/browser/a3aaa3d3-8117-40bb-a1c4-fb8dd951c8b4
+1689645829865	Marionette	INFO	Listening on port 62452
+1689645830203	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
+1689645966231	Marionette	INFO	Stopped listening on port 62452
+JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
+JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
+!!! error running onStopped callback: TypeError: callback is not a function
+1689645966414	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
+unwatchForTabs()@TargetList.jsm:70
+unwatchForTargets()@TargetList.jsm:37
+destructor()@TargetList.jsm:109
+stop()@CDP.jsm:104
+close()@RemoteAgent.jsm:138
+1689646027040	geckodriver	INFO	Listening on 127.0.0.1:51479
+1689646031030	mozrunner::runner	INFO	Running command: "C:\\Users\\John Wick\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "51480" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\JOHNWI~1\\AppData\\Local\\Temp\\rust_mozprofilea9Xzpl"
+console.log: "TorSettings: loadFromPrefs()"
+console.log: "TorConnect: init()"
+console.log: "TorConnect: Entering Initial state"
+console.log: "TorConnect: Observed profile-after-change"
+console.log: "TorConnect: Observing topic 'TorProcessExited'"
+console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
+console.log: "TorConnect: Observing topic 'torsettings:ready'"
+console.log: "TorSettings: Observed profile-after-change"
+1689646031751	Marionette	INFO	Marionette enabled
+console.log: "TorConnect: Will load after bootstrap => [about:blank]"
+console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid"
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
+DevTools listening on ws://localhost:51480/devtools/browser/8c1f9a42-7805-4208-a9e1-870a0b3b2d6b
+1689646033369	Marionette	INFO	Listening on port 56864
+1689646033727	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
+1689646216510	Marionette	WARN	Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
+1689646242045	Marionette	WARN	Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
+1689647720305	Marionette	INFO	Stopped listening on port 56864
+JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
+JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
+!!! error running onStopped callback: TypeError: callback is not a function
+
+###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
+
+1689647720428	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
+unwatchForTabs()@TargetList.jsm:70
+unwatchForTargets()@TargetList.jsm:37
+destructor()@TargetList.jsm:109
+stop()@CDP.jsm:104
+close()@RemoteAgent.jsm:138
diff --git a/MarketPlaces/ViceCity/crawler_selenium.py b/MarketPlaces/ViceCity/crawler_selenium.py
index 584a90c..e49bbda 100644
--- a/MarketPlaces/ViceCity/crawler_selenium.py
+++ b/MarketPlaces/ViceCity/crawler_selenium.py
@@ -34,17 +34,17 @@ baseURL = 'http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
-    # opentor()
+    opentor()
     mktName = getMKTName()
-    # driver = getAccess()
-    #
-    # if driver != 'down':
-    #     try:
-    #         login(driver)
-    #         crawlForum(driver)
-    #     except Exception as e:
-    #         print(driver.current_url, e)
-    #     closetor(driver)
+    driver = getAccess()
+
+    if driver != 'down':
+        try:
+            login(driver)
+            crawlForum(driver)
+        except Exception as e:
+            print(driver.current_url, e)
+        closetor(driver)
 
     new_parse(mktName, baseURL, False)
 
@@ -221,11 +221,11 @@ def getInterestedLinks():
     # Digital - Fraud Software, Has Hacking and Guides
     links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=150')
     # Digital - Guides and Tutorials
-    # links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=94')
+    links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=94')
     # Carding Services
-    # links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=155')
+    links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=155')
     # Digital - Other (half junk half random stuff like: bots, rats, viruses, and guides)
-    # links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=153')
+    links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=153')
 
     return links