Merge branch 'main' of https://gitlab.com/dw9372422/dw_pipeline_test

1 year ago · 34af539238
--- a/.idea/DW_Pipeline_Test.iml
+++ b/.idea/DW_Pipeline_Test.iml
@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="inheritedJdk" />
    <orderEntry type="jdk" jdkName="C:\Users\calsyslab\anaconda3" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PyNamespacePackagesService">
@ -10,10 +10,23 @@
      <list>
        <option value="$MODULE_DIR$/Forums/BestCardingWorld" />
        <option value="$MODULE_DIR$/Forums/CryptBB" />
        <option value="$MODULE_DIR$/MarketPlaces/DarkFox" />
        <option value="$MODULE_DIR$/MarketPlaces/Tor2door" />
        <option value="$MODULE_DIR$/Forums/OnniForums" />
        <option value="$MODULE_DIR$/MarketPlaces/ThiefWorld" />
        <option value="$MODULE_DIR$/MarketPlaces/Apocalypse" />
        <option value="$MODULE_DIR$/MarketPlaces/DarkMatter" />
        <option value="$MODULE_DIR$/MarketPlaces/DigitalThriftShop" />
        <option value="$MODULE_DIR$/MarketPlaces/HiddenMarket" />
        <option value="$MODULE_DIR$/MarketPlaces/LionMarketplace" />
        <option value="$MODULE_DIR$/MarketPlaces/Nexus" />
        <option value="$MODULE_DIR$/MarketPlaces/RobinhoodMarket" />
        <option value="$MODULE_DIR$/MarketPlaces/TorBay" />
        <option value="$MODULE_DIR$/MarketPlaces/TorMarket" />
        <option value="$MODULE_DIR$/MarketPlaces/ViceCity" />
        <option value="$MODULE_DIR$/Forums/Altenens" />
        <option value="$MODULE_DIR$/Forums/Cardingleaks" />
        <option value="$MODULE_DIR$/Forums/HiddenAnswers" />
        <option value="$MODULE_DIR$/Forums/Libre" />
        <option value="$MODULE_DIR$/Forums/Procrax" />
      </list>
    </option>
  </component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectRootManager" version="2" project-jdk-name="C:\Users\John Wick\anaconda3" project-jdk-type="Python SDK" />
  <component name="ProjectRootManager" version="2" project-jdk-name="C:\Users\calsyslab\anaconda3" project-jdk-type="Python SDK" />
 </project>
--- a/Forums/AbyssForum/crawler_selenium.py
+++ b/Forums/AbyssForum/crawler_selenium.py
@ -30,32 +30,18 @@ baseURL = 'http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # opentor()
    forumName = getForumName()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    driver = getAccess()

    new_parse(forumName, baseURL, True)


 # Opens Tor Browser
 def opentor():
    from Forums.Initialization.forums_mining import config
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closeDriver(driver)

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return
    new_parse(forumName, baseURL, True)


 # Login using premade account credentials and do login captcha manually
@ -78,7 +64,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -121,6 +107,8 @@ def createFFDriver():

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver

 def getAccess():
@ -241,14 +229,14 @@ def crawlForum(driver):
                        driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
                    break

                try:
                    link = driver.find_element(by=By.XPATH, value = '/html/body/div[2]/div[2]/div[2]/div[2]/ul/li[9]/a').get_attribute('href')
                    link = driver.find_element(by=By.XPATH, value='/html/body/div[2]/div[2]/div[2]/div[2]/ul/li[9]/a').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    count += 1
--- a/Forums/Altenens/crawler_selenium.py
+++ b/Forums/Altenens/crawler_selenium.py
@ -30,7 +30,6 @@ baseURL = 'https://altenens.is/'

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # opentor()
    forumName = getForumName()
    driver = getAccess()

@ -40,22 +39,9 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)
    
    # new_parse(forumName, baseURL, True)


 # Opens Tor Browser
 def opentor():
    from Forums.Initialization.forums_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return
    new_parse(forumName, baseURL, True)


 # Login using premade account credentials and do login captcha manually
@ -93,7 +79,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -118,8 +104,8 @@ def createFFDriver():
    ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
    ff_prof.set_preference("signon.rememberSignons", False)
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    # ff_prof.set_preference("network.dns.disablePrefetch", True)
    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
@ -136,6 +122,8 @@ def createFFDriver():

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver


@ -253,7 +241,7 @@ def crawlForum(driver):
                        driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
@ -272,7 +260,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    input("Crawling Altenens forum done successfully. Press ENTER to continue\n")
    print("Crawling the Altenens forum done.")


 # Returns 'True' if the link is Topic link, may need to change for every website
--- a/Forums/BestCardingWorld/crawler_selenium.py
+++ b/Forums/BestCardingWorld/crawler_selenium.py
@ -27,7 +27,6 @@ baseURL = 'http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    forumName = getForumName()
    driver = getAccess()

@ -36,25 +35,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(forumName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from Forums.Initialization.forums_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getForumName():
@ -71,7 +56,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -98,7 +83,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)#might need to turn off
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 2)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -114,6 +99,8 @@ def createFFDriver():

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver


@ -238,8 +225,8 @@ def crawlForum(driver):

                        try:
                            nav = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div[2]/div[4]/ul')
                            li = nav.find_element_by_class_name('next')
                            page = li.find_element_by_tag_name('a').get_attribute('href')
                            li = nav.find_element(by=By.CLASS_NAME, value='next')
                            page = li.find_element(by=By.TAG_NAME, value='a').get_attribute('href')
                            if page == "":
                                raise NoSuchElementException
                            counter += 1
@ -252,7 +239,7 @@ def crawlForum(driver):
                        driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
@ -260,8 +247,8 @@ def crawlForum(driver):

                try:
                    bar = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div[2]/div[3]/ul')
                    next = bar.find_element_by_class_name('next')
                    link = next.find_element_by_tag_name('a').get_attribute('href')
                    next = bar.find_element(by=By.CLASS_NAME, value='next')
                    link = next.find_element(by=By.TAG_NAME, value='a').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    count += 1
@ -273,7 +260,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    input("Crawling BestCardingWorld forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the BestCardingWorld forum done.")


 # Returns 'True' if the link is a description link
--- a/Forums/BestCardingWorld/parser.py
+++ b/Forums/BestCardingWorld/parser.py
@ -152,7 +152,7 @@ def bestcardingworld_description_parser(soup):

    # Populate the final variable (this should be a list with all fields scraped)

    row = (topic, post, user, addDate, feedback, status, reputation, sign, interest)
    row = (topic, user, status, reputation, interest, sign, post, feedback, addDate)

    # Sending the results

@ -166,15 +166,17 @@ def bestcardingworld_description_parser(soup):
 #return: 'row' that contains a variety of lists that each hold info on the listing page
 def bestcardingworld_listing_parser(soup):

    nm = 0              # this variable should receive the number of topics
    topic = []          # 1 all topics
    board = "-1"        # 2 board name (the previous level of the topic in the Forum categorization tree.
    nm = 0              # *this variable should receive the number of topics
    forum = "BestCardingWorld"   # 0 *forum name
    board = "-1"        # 1 *board name (the previous level of the topic in the Forum categorization tree.
                        # For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
    view = []           # 3 number of views of each topic
    post = []           # 4 number of posts of each topic
    user = []           # 5 all users of each topic
    addDate = []        # 6 when the topic was created (difficult to find)
    href = []           # 16 this variable should receive all cleaned urls (we will use this to do the marge between Listing and Description pages)
    author = []         # 2 *all authors of each topic
    topic = []          # 3 *all topics
    views = []          # 4 number of views of each topic
    posts = []          # 5 number of posts of each topic
    href = []           # 6 this variable should receive all cleaned urls (we will use this to do the marge between
                        # Listing and Description pages)
    addDate = []        # 7 when the topic was created (difficult to find)

    # Finding the board (should be just one)

@ -187,7 +189,12 @@ def bestcardingworld_listing_parser(soup):

    itopics = soup.find('ul', {"class": "topiclist topics"}).findAll('div',{"class": "list-inner"})
    replies = soup.find('ul', {"class": "topiclist topics"}).findAll('dd',{"class": "posts"})
    views = soup.find('ul', {"class": "topiclist topics"}).findAll('dd',{"class": "views"})
    view = soup.find('ul', {"class": "topiclist topics"}).findAll('dd',{"class": "views"})

    # Counting how many topics we have found so far

    nm = len(itopics)

    index = 0
    for itopic in itopics:

@ -213,10 +220,6 @@ def bestcardingworld_listing_parser(soup):
        topics = itopic.find('a', {"class": "topictitle"}).text
        topic.append(cleanString(topics))

        # Counting how many topics we have found so far

        nm = len(topic)

        # Adding the url to the list of urls
        link = itopic.find('a', {"class": "topictitle"}).get('href')
        link = cleanLink(link)
@ -224,18 +227,18 @@ def bestcardingworld_listing_parser(soup):

        # Finding the author of the topic
        ps = itopic.find('div', {"class":"responsive-hide"}).find('a', {"class": "username-coloured"}).text
        author = ps.strip()
        user.append(cleanString(author))
        user = ps.strip()
        author.append(cleanString(user))

        # Finding the number of replies
        posts = replies[index].text.split()[0]
        posts = posts.strip()
        post.append(cleanString(posts))
        post = replies[index].text.split()[0]
        post = post.strip()
        posts.append(cleanString(post))

        # Finding the number of Views
        tview = views[index].text.split()[0]
        tview = view[index].text.split()[0]
        tview = tview.strip()
        view.append(cleanString(tview))
        views.append(cleanString(tview))

        # If no information about when the topic was added, just assign "-1" to the variable
        #CryptBB doesn't show when topic was first posted on listing page
@ -245,10 +248,9 @@ def bestcardingworld_listing_parser(soup):
        addDate.append(date_time_obj)
        #addDate.append("-1")



        index += 1
    return organizeTopics("BestCardingWorld", nm, topic, board, view, post, user, addDate, href)

    return organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate)


 #called by the crawler to get description links on a listing page
--- a/Forums/Cardingleaks/crawler_selenium.py
+++ b/Forums/Cardingleaks/crawler_selenium.py
@ -32,32 +32,18 @@ baseURL = 'https://leaks.ws/'

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # opentor()
    forumName = getForumName()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    driver = getAccess()

    new_parse(forumName, baseURL, True)


 # Opens Tor Browser
 def opentor():
    from Forums.Initialization.forums_mining import config
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closeDriver(driver)

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return
    new_parse(forumName, baseURL, True)


 # Login using premade account credentials and do login captcha manually
@ -101,7 +87,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -144,6 +130,8 @@ def createFFDriver():

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver


@ -159,7 +147,7 @@ def getAccess():


 # Saves the crawled html page
 def savePage(page, url):
 def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
    filePath = getFullPathName(url)
    os.makedirs(os.path.dirname(filePath), exist_ok=True)
@ -242,7 +230,7 @@ def crawlForum(driver):
                            driver.get(itemURL)
                        except:
                            driver.refresh()
                        savePage(driver.page_source, topic + f"page{counter}")  # very important
                        savePage(driver, driver.page_source, topic + f"page{counter}")  # very important

                        # comment out
                        if counter == 2:
@ -261,7 +249,7 @@ def crawlForum(driver):
                        driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/Forums/CryptBB/crawler_selenium.py
+++ b/Forums/CryptBB/crawler_selenium.py
@ -28,32 +28,18 @@ baseURL = 'http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # opentor()
    forumName = getForumName()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    driver = getAccess()

    new_parse(forumName, baseURL, True)


 # Opens Tor Browser
 def opentor():
    from Forums.Initialization.forums_mining import config
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closeDriver(driver)

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return
    new_parse(forumName, baseURL, True)


 # Login using premade account credentials and do login captcha manually
@ -119,7 +105,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -162,6 +148,8 @@ def createFFDriver():

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver


@ -289,7 +277,7 @@ def crawlForum(driver):
                        driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/Forums/CryptBB/parser.py
+++ b/Forums/CryptBB/parser.py
@ -124,7 +124,7 @@ def cryptBB_description_parser(soup):
            stime = dt.replace('Yesterday,','').strip()
            date_time_obj = yesterday+ ', '+stime
            date_time_obj = datetime.strptime(date_time_obj,'%m-%d-%Y, %I:%M %p')
        elif "hours ago" in dt:
        elif "hour ago" in dt or "hours ago" in dt:
            day = day.strftime('%m-%d-%Y')
            date_time_obj = postarea.find('span', {"class": "post_date"}).find('span')['title']
            date_time_obj = datetime.strptime(date_time_obj, '%m-%d-%Y, %I:%M %p')
--- a/Forums/DB_Connection/db_connection.py
+++ b/Forums/DB_Connection/db_connection.py
@ -2,7 +2,6 @@ __author__ = 'DarkWeb'

 import psycopg2
 import traceback
 import configparser


 def connectDataBase():
--- a/Forums/HiddenAnswers/crawler_selenium.py
+++ b/Forums/HiddenAnswers/crawler_selenium.py
@ -30,32 +30,18 @@ baseURL = 'http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # opentor()
    forumName = getForumName()
    # driver: webdriver.Firefox = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    driver: webdriver.Firefox = getAccess()

    new_parse(forumName, baseURL, True)


 # Opens Tor Browser
 def opentor():
    from Forums.Initialization.forums_mining import config
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closeDriver(driver)

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return
    new_parse(forumName, baseURL, True)


 # Login using premade account credentials and do login captcha manually
@ -78,7 +64,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -121,6 +107,8 @@ def createFFDriver():

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver

 def getAccess():
@ -235,7 +223,7 @@ def crawlForum(driver: webdriver.Firefox):
                        driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/Forums/HiddenAnswers/parser.py
+++ b/Forums/HiddenAnswers/parser.py
@ -127,15 +127,18 @@ def HiddenAnswers_listing_parser(soup: BeautifulSoup):
        if date_posted.find("day") > 0:
            datetime_obj = datetime.now() - timedelta(days=1)
        else:
            datetime_obj = datetime.strptime(f"{date_posted} {date.today().year}", "%b %d %Y")
            try:
                datetime_obj = datetime.strptime(f"{date_posted} {date.today().year}", "%b %d %Y")
            except ValueError:
                datetime_obj = datetime.strptime(f"{date_posted}", "%b %d, %Y")
        addDate.append(datetime_obj)
        #this link will be cleaned
        
        listing_href = queries.find("div", {"class": "qa-q-item-title"}).find("a").get("href")
        href.append(listing_href)
        
 #need to change this method
    nm = len(topic)

    return organizeTopics(forum, nm, board, user, topic, view, post, href, addDate)

 #need to change this method
--- a/Forums/Initialization/forumsList.txt
+++ b/Forums/Initialization/forumsList.txt
@ -1,4 +1,3 @@
 AbyssForum
 Altenens
 BestCardingWorld
 Cardingleaks
--- a/Forums/Initialization/prepare_parser.py
+++ b/Forums/Initialization/prepare_parser.py
@ -12,6 +12,7 @@ from Forums.OnniForums.parser import *
 from Forums.Altenens.parser import *
 from Forums.Procrax.parser import *
 from Forums.Libre.parser import *
 from Forums.HiddenAnswers.parser import *

 from Forums.Classifier.classify_product import predict
 # from DarkWebMining_Sample.Forums.Classifier.classify_product import predict_semi
@ -126,6 +127,8 @@ def parse_listing(forum, listingFile, soup, createLog, logFile):
            rw = procrax_listing_parser(soup)
        elif forum == "Libre":
            rw = libre_listing_parser(soup)
        elif forum == "HiddenAnswers":
            rw = HiddenAnswers_listing_parser(soup)
        else:
            print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
            raise Exception
@ -160,6 +163,8 @@ def parse_description(forum, descriptionFile, soup, createLog, logFile):
            rmm = procrax_description_parser(soup)
        elif forum == "Libre":
            rmm = libre_description_parser(soup)
        elif forum == "HiddenAnswers":
            rmm = HiddenAnswers_description_parser(soup)
        else:
            print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!")
            raise Exception
--- a/Forums/Libre/crawler_selenium.py
+++ b/Forums/Libre/crawler_selenium.py
@ -28,32 +28,18 @@ baseURL = 'http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # opentor()
    forumName = getForumName()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    driver = getAccess()

    new_parse(forumName, baseURL, True)


 # Opens Tor Browser
 def opentor():
    from Forums.Initialization.forums_mining import config
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closeDriver(driver)

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return
    new_parse(forumName, baseURL, True)


 # Login using premade account credentials and do login captcha manually
@ -101,7 +87,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -144,6 +130,8 @@ def createFFDriver():

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver


@ -255,7 +243,7 @@ def crawlForum(driver):
                        driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
@ -275,7 +263,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    input("Crawling the Libre forum done.")
    print("Crawling the Libre forum done.")


 # Returns 'True' if the link is Topic link, may need to change for every website
--- a/Forums/OnniForums/crawler_selenium.py
+++ b/Forums/OnniForums/crawler_selenium.py
@ -31,32 +31,18 @@ baseURL = 'http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # opentor()
    forumName = getForumName()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    driver = getAccess()

    new_parse(forum=forumName, url=baseURL, createLog=True)


 # Opens Tor Browser
 def opentor():
    from Forums.Initialization.forums_mining import config
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closeDriver(driver)

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return
    new_parse(forum=forumName, url=baseURL, createLog=True)


 # Login using premade account credentials and do login captcha manually
@ -96,7 +82,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -139,6 +125,7 @@ def createFFDriver():

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver

@ -267,7 +254,7 @@ def crawlForum(driver):
                        driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/Forums/OnniForums/parser.py
+++ b/Forums/OnniForums/parser.py
@ -139,12 +139,14 @@ def onniForums_listing_parser(soup: BeautifulSoup):
    nm = len(thread_arrays)
    
    for thread in thread_arrays: #getting the information from the posts and sorting them into the arrays defined above
        
        try: 
            post_subject: str = thread.find("span",{"class": "subject_new"}).text #getting the topic

        body = thread.find("span",{"class": "subject_new"})
        try:
            post_subject: str = body.text #getting the topic
            
        except AttributeError:
            post_subject: str = thread.find("span",{"class": "subject_old"}).text
            body = thread.find("span",{"class": "subject_old"})
            post_subject: str = body.text
            
        post_subject_cleaned = cleanString(post_subject.strip())    
        topic.append(post_subject_cleaned)
@ -163,9 +165,8 @@ def onniForums_listing_parser(soup: BeautifulSoup):
        author = thread.find("span",{"class" : "author smalltext"}).text
        author_cleaned = cleanString(author.strip())
        user.append(author_cleaned)
        
        reply_anchor = thread.find_all("td", {"align": "center"})[2].find('a')
        thread_link = reply_anchor.get('href')

        thread_link = body.find('a').get('href')
        href.append(thread_link)
        
    return organizeTopics(
--- a/Forums/Procrax/crawler_selenium.py
+++ b/Forums/Procrax/crawler_selenium.py
@ -32,16 +32,15 @@ FORUM_NAME = 'Procrax'

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # opentor()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    driver = getAccess()

    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closeDriver(driver)

    new_parse(
        forum=FORUM_NAME,
@ -50,19 +49,6 @@ def startCrawling():
    )


 # Opens Tor Browser
 def opentor():
    from Forums.Initialization.forums_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Login using premade account credentials and do login captcha manually
 def login(driver):
    WebDriverWait(driver, 50).until(EC.visibility_of_element_located(
@ -97,7 +83,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -140,6 +126,8 @@ def createFFDriver():

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver

 def getAccess():
@ -257,7 +245,7 @@ def crawlForum(driver):
                        driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/MarketPlaces/AnonymousMarketplace/crawler_selenium.py
+++ b/MarketPlaces/AnonymousMarketplace/crawler_selenium.py
@ -32,7 +32,6 @@ baseURL = 'http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -42,25 +41,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -77,7 +62,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -104,7 +89,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -146,6 +131,7 @@ def login(driver):
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.ID, "woocommerce_product_categories-2")))


 # Saves the crawled html page, makes the directory path for html pages if not made
 def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
@ -187,12 +173,8 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []

    # # carding
    # links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/carding/')
    # # hacked paypal
    # links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacked-paypal-accounts/')
    # hacking services
    links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacking-services/')
    # home
    links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/')

    return links

@ -232,7 +214,7 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
@ -240,7 +222,7 @@ def crawlForum(driver):

                #left in in case site changes
                try:
                    link = ""
                    link = driver.find_element(by=By.LINK_TEXT, value="→").get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    count += 1
--- a/MarketPlaces/AnonymousMarketplace/parser.py
+++ b/MarketPlaces/AnonymousMarketplace/parser.py
@ -41,12 +41,12 @@ def anonymousMarketplace_description_parser(soup: Tag):
        describe_output += div.text
    describe = cleanString(describe_output.strip())
    
    product_ratings: Tag = soup.find("div", {"class": "star-rating"})
    product_ratings: Tag = soup.find("div", {"class": "woocommerce-product-rating"})
    
    product_reviews = product_ratings.find("div", {"class": "woocommerce-product-rating"}).find("strong", {"class": "rating"}).text
    product_reviews = product_ratings.find("span", {"class": "rating"}).text
    reviews = cleanString(product_reviews.strip())
    
    product_star_rating = product_ratings.find("span", {"class": "rating"}).text
    product_star_rating = product_ratings.find("strong", {"class": "rating"}).text
    rating_item = cleanString(product_star_rating.strip())
    
    product_price = soup.find("span", {"class": "woocommerce-Price-amount amount"}).text
@ -86,15 +86,16 @@ def anonymousMarketplace_listing_parser(soup: Tag):
    shipFrom = []                             # 18 Product_ShippedFrom
    shipTo = []                               # 19 Product_ShippedTo
    href = []                                 # 20 Product_Links
    
    
    product_list: ResultSet[Tag] = soup.find("ul", {"class": "product_list_widget"}).find_all("li")

    woo = soup.find('div', {"class": "woocommerce"})

    product_list = woo.find('ul', {"class": "products columns-4"}, recursive=False).find_all('li')
    
    for item in product_list:
        item_href = item.find("a").get("href")
        item_href = item.find("a", recursive=False).get("href")
        href.append(item_href)
        
        item_name = item.find("span", {"class": "product-title"}).text
        item_name = item.find("h2").text
        name.append(cleanString(item_name.strip()))
        
        item_rating = item.find("div", {"class": "star-rating"}).find("strong", {"class": "rating"}).text
@ -103,14 +104,11 @@ def anonymousMarketplace_listing_parser(soup: Tag):
        try:
            item_price = item.find("span", {"class": "woocommerce-Price-amount amount"}).text
            item_price = item_price.replace("$", "").strip()
            USD.append(item_price)
            USD.append(cleanNumbers(item_price))
        except AttributeError:
            USD.append("-1")
        
        
            
        
        vendor.append("Anonymous")

        vendor.append("AnonymousMarketplace")
        rating_vendor.append("-1")
        success.append("-1")
        CVE.append("-1")
@ -153,10 +151,6 @@ def anonymousMarketplace_listing_parser(soup: Tag):
        shipTo=shipTo,
        href=href
    )
    
        
        
        


 #called by the crawler to get description links on a listing page
@ -167,10 +161,13 @@ def anonymous_links_parser(soup):
    # Returning all links that should be visited by the Crawler

    href = []
    listing = soup.find('ul', {"class": "product_list_widget"}).findAll('li')

    woo = soup.find('div', {"class": "woocommerce"})

    listing = woo.find('ul', {"class": "products columns-4"}, recursive=False).find_all('li')

    for a in listing:
        bae = a.find('a', href=True)
        bae = a.find('a', href=True, recursive=False)

        link = bae['href']
        href.append(link)
--- a/MarketPlaces/Apocalypse/crawler_selenium.py
+++ b/MarketPlaces/Apocalypse/crawler_selenium.py
@ -32,7 +32,6 @@ baseURL = 'http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -47,20 +46,6 @@ def startCrawling():
    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -104,7 +89,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 1)##
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -162,6 +147,7 @@ def login(driver):
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, "/html/body/div[1]/div[2]/div[1]/div[1]/a[13]")))


 # Saves the crawled html page, makes the directory path for html pages if not made
 def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
@ -203,8 +189,12 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []

    # # Hacking Services
    # links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/subcategory/19')
    # # Digital Goods
    # links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/category/74')
    # # Fraud
    # links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/category/75')
    # # Services
    # links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/category/76')
    # software and malware
    links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/subcategory/30')

@ -243,7 +233,11 @@ def crawlForum(driver):
                    except:
                        driver.refresh()
                    savePage(driver, driver.page_source, item)
                    driver.back()
                    # driver.back()
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()

                    # comment out
                    # break
@ -282,7 +276,7 @@ def isDescriptionLink(url):
 #@param: url of any url crawled
 #return: true if is a Listing page, false if not
 def isListingLink(url):
    if 'subcategory' in url:
    if 'category' in url:
        return True
    return False

--- a/MarketPlaces/Apocalypse/parser.py
+++ b/MarketPlaces/Apocalypse/parser.py
@ -30,7 +30,9 @@ def apocalypse_description_parser(soup: Tag):
    left = "-1"                         # 16 Product_QuantityLeft
    shipFrom = "-1"                     # 17 Product_ShippedFrom
    shipTo = "-1"                       # 18 Product_ShippedTo
    
    image = "-1"                        # 19 Product_Image
    vendor_image = "-1"                 # 20 Vendor_Image

    content: Tag = soup.find("div", {'id': "article_page"})
    
    product_name = content.find("p", {"class": "list-group-item text-center mb-0 box"}).text
@ -38,7 +40,11 @@ def apocalypse_description_parser(soup: Tag):

    product_description = content.find("pre").text 
    describe = cleanString(product_description.strip())
    

    # Finding Product Image
    image = soup.find('div', {'class': 'col-md-7 text-center'}).find('img')
    image = image.get('src').split('base64,')[-1]

    product_reviews_list: Tag = content.find("table", {"class": "table product_reviews"}) \
                               .find_all("li")
    
@ -72,7 +78,7 @@ def apocalypse_description_parser(soup: Tag):
    
    # Populating the final variable (this should be a list with all fields scraped)
    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
           BTC, USD, EURO, sold, left, shipFrom, shipTo)
           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)

    # Sending the results
    return row
@ -103,15 +109,21 @@ def apocalypse_listing_parser(soup: Tag):
    vendor = []                               # 18 Vendor
    rating = []                               # 19 Vendor_Rating
    success = []                              # 20 Vendor_Successful_Transactions
    href = []                                 # 23 Product_Links (Urls)
    
    
    image = []                                # 20 Product_Image
    image_vendor = []                         # 21 Vendor_Image
    href = []                                 # 22 Product_Links

    listings: ResultSet[Tag] = soup.find("div", {"class": "col-lg-9 my-4"}).find_all("div", {"class": "col-lg-4 col-md-6 mb-1"})
    
    for prod in listings:
        
        product_name = prod.find('h5', {"class": "art_title"}).text
        name.append(cleanString(product_name.strip()))

        # Finding Product Image
        product_image = prod.find('img', {'class': 'customHeight'})
        product_image = product_image.get('src').split('base64,')[-1]
        image.append(product_image)
        
        CVE.append("-1")
        MS.append("-1")
@ -124,6 +136,7 @@ def apocalypse_listing_parser(soup: Tag):
        EURO.append("-1")
        shipTo.append("-1")
        success.append("-1")
        image_vendor.append("-1")
        
        product_price = prod.find("span", {"class": "priceP"}).text
        USD.append(cleanString(product_price.strip()))
@ -161,7 +174,7 @@ def apocalypse_listing_parser(soup: Tag):
            rating.append(cleanString(product_vendor_rating.strip()))
        except Exception as e:
            raise e
        

        product_href = prod.find('a').get('href')
        href.append(product_href)
        
@ -190,7 +203,9 @@ def apocalypse_listing_parser(soup: Tag):
        qLeft=qLeft,
        shipFrom=shipFrom,
        shipTo=shipTo,
        href=href
        href=href,
        image=image,
        image_vendor=image_vendor
    )

 #called by the crawler to get description links on a listing page
--- a/MarketPlaces/BlackPyramid/crawler_selenium.py
+++ b/MarketPlaces/BlackPyramid/crawler_selenium.py
@ -33,7 +33,6 @@ baseURL = 'http://blackpyoc3gbnrlvxqvvytd3kxqj7pd226i2gvfyhysj24ne2snkmnyd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -43,25 +42,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -78,7 +63,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -105,7 +90,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 2)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
--- a/MarketPlaces/CityMarket/crawler_selenium.py
+++ b/MarketPlaces/CityMarket/crawler_selenium.py
@ -33,7 +33,6 @@ baseURL = 'http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -43,25 +42,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -78,7 +63,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -105,7 +90,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 1)##
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
--- a/MarketPlaces/CypherMarketplace/crawler_selenium.py
+++ b/MarketPlaces/CypherMarketplace/crawler_selenium.py
@ -32,7 +32,6 @@ baseURL = 'http://6c5qa2ke2esh6ake6u6yoxjungz2czbbl7hqxl75v5k37frtzhxuk7ad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -42,25 +41,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -77,7 +62,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -104,7 +89,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 2)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
--- a/MarketPlaces/DB_Connection/db_connection.py
+++ b/MarketPlaces/DB_Connection/db_connection.py
@ -3,6 +3,7 @@ __author__ = 'DarkWeb'
 import psycopg2
 import traceback
 import configparser
 from MarketPlaces.Utilities.utilities import *


 def connectDataBase():
@ -146,7 +147,7 @@ def create_marketPlace(cur, row, url):
       sql = "Insert into marketplaces (market_id, name_market, url_market, dateinserted_market) " \
             "Values (%s, %s, %s, %s)"

       recset = [marketId, row[0], url, row[21]]
       recset = [marketId, row[0], url, row[23]]

       cur.execute(sql, recset)

@ -165,13 +166,15 @@ def create_vendor(cur, row, marketId):

    if newVendor:

       sql = "Insert into vendors (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, dateinserted_vendor) Values (%s, %s, %s, %s, %s, %s)"
       sql = "Insert into vendors (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, image_vendor, dateinserted_vendor) " \
             "Values (%s, %s, %s, %s, %s, %s, %s)"

       recset = [vendorId, marketId,
                 row[1],
                 row[2] if row[2] != '-1' else None,
                 row[3] if row[3] != '-1' else None,
                 row[21]]
                 row[21] if row[21] != '-1' else None,
                 row[23]]

       cur.execute(sql, recset)

@ -183,24 +186,30 @@ def create_vendor(cur, row, marketId):

        recset = cur.fetchall()

        # decode_decrypt_image_in_base64(recset[0][5])

        if (str(recset[0][3]) != str(row[2] if row[2] != '-1' else None) or # there was a change in the vendor information
            str(recset[0][4]) != str(row[3] if row[3] != '-1' else None)):
            str(recset[0][4]) != str(row[3] if row[3] != '-1' else None) or
            str(recset[0][5]) != str(row[21] if row[21] != '-1' else None)):

            sql = "Insert into vendors_history (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, dateinserted_vendor) Values (%s, %s, %s, %s, %s, %s)"
            sql = "Insert into vendors_history (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, image_vendor, dateinserted_vendor) " \
                  "Values (%s, %s, %s, %s, %s, %s, %s)"

            recset = [vendorId, marketId,
                      recset[0][2],
                      recset[0][3],
                      recset[0][4],
                      recset[0][5]]
                      recset[0][5],
                      recset[0][6]]

            cur.execute(sql, recset)

            sql = "Update vendors set rating_vendor = %(rating_vendor)s, successfultransactions_vendor = %(successfultransactions_vendor)s, " \
                  "dateinserted_vendor = %(dateinserted_vendor)s where vendor_id = %(vendorId)s"
                  "image_vendor = %(image_vendor)s, dateinserted_vendor = %(dateinserted_vendor)s where vendor_id = %(vendorId)s"
            cur.execute(sql, {'rating_vendor': row[2] if row[2] != '-1' else None,
                             'successfultransactions_vendor': row[3] if row[3] != '-1' else None,
                             'dateinserted_vendor': row[21],
                             'image_vendor': row[21] if row[21] != '-1' else None,
                             'dateinserted_vendor': row[23],
                             'vendorId': vendorId})

    return vendorId
@ -220,9 +229,9 @@ def create_items(cur, row, marketId, vendorId):

        sql = "Insert into items (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \
              "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \
              "quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \
              "quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \
              "classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \
              "%s, %s, %s, %s)"
              "%s, %s, %s, %s, %s)"

        recset = [itemId, marketId, vendorId,
                  row[4],
@ -241,10 +250,11 @@ def create_items(cur, row, marketId, vendorId):
                  row[17] if row[17] != '-1' else None,
                  row[18] if row[18] != '-1' else None,
                  row[19] if row[19] != '-1' else None,
                  row[23],
                  row[20] if row[20] != '-1' else None,
                  row[21],
                  row[21],
                  row[22]]
                  row[22] if row[22] != '-1' else None,
                  row[23],
                  row[24]]

        cur.execute(sql, recset)

@ -256,19 +266,22 @@ def create_items(cur, row, marketId, vendorId):

            recset = cur.fetchall()

            #decode_decrypt_image_in_base64(recset[0][20])

            if (str(recset[0][4]) != str(row[5] if row[5] != '-1' else None) or str(recset[0][5]) != str(row[6] if row[6] != '-1' else None) or
                str(recset[0][6]) != str(row[7] if row[7] != '-1' else None) or str(recset[0][7]) != str(row[8] if row[8] != '-1' else None) or
                str(recset[0][8]) != str(row[9] if row[9] != '-1' else None) or str(recset[0][9]) != str(row[10] if row[10] != '-1' else None) or
                str(recset[0][10]) != str(row[11] if row[11] != '-1' else None) or str(recset[0][11]) != str(row[12] if row[12] != '-1' else None) or
                str(recset[0][12]) != str(row[13] if row[13] != '-1' else None) or str(recset[0][13]) != str(row[14] if row[14] != '-1' else None) or
                str(recset[0][14]) != str(row[15] if row[15] != '-1' else None) or str(recset[0][15]) != str(row[16] if row[16] != '-1' else None) or
                str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None)):
                str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None) or
                str(recset[0][18]) != str(row[19] if row[19] != '-1' else None) or str(recset[0][20]) != str(row[20] if row[20] != '-1' else None)):

               sql = "Insert into items_history (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \
                      "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \
                      "quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \
                      "quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \
                      "classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \
                      "%s, %s, %s, %s)"
                      "%s, %s, %s, %s, %s)"

               recset = [itemId, marketId, vendorId,
                         recset[0][3],
@ -290,7 +303,8 @@ def create_items(cur, row, marketId, vendorId):
                         recset[0][19],
                         recset[0][20],
                         recset[0][21],
                         recset[0][22]]
                         recset[0][22],
                         recset[0][23]]

               cur.execute(sql, recset)

@ -299,7 +313,7 @@ def create_items(cur, row, marketId, vendorId):
                     "rating_item = %(rating_item)s, dateadded_item = %(dateadded_item)s, btc_item = %(btc_item)s, " \
                     "usd_item = %(usd_item)s, euro_item = %(euro_item)s, quantitysold_item = %(quantitysold_item)s, " \
                     "quantityleft_item = %(quantityleft_item)s, shippedfrom_item = %(shippedfrom_item)s, shippedto_item = %(shippedto_item)s, " \
                     "lastseen_item = %(lastseen_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s"
                     "lastseen_item = %(lastseen_item)s, image_item = %(image_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s"

               cur.execute(sql, {'description_item': row[5] if row[5] != '-1' else None,
                                 'cve_item': row[6] if row[6] != '-1' else None,
@ -316,8 +330,9 @@ def create_items(cur, row, marketId, vendorId):
                                 'quantityleft_item': row[17] if row[17] != '-1' else None,
                                 'shippedfrom_item': row[18] if row[18] != '-1' else None,
                                 'shippedto_item': row[19] if row[19] != '-1' else None,
                                 'dateinserted_item': row[21],
                                 'lastseen_item': row[21],
                                 'dateinserted_item': row[23],
                                 'lastseen_item': row[23],
                                 'image_item': row[20],
                                 'itemId': itemId})


@ -325,7 +340,7 @@ def create_items(cur, row, marketId, vendorId):

               sql = "Update items set lastseen_item = %(lastseen_item)s where item_id = %(itemId)s"

               cur.execute(sql, {'lastseen_item': row[21],
               cur.execute(sql, {'lastseen_item': row[23],
                                 'itemId': itemId})

    return itemId
@ -344,8 +359,8 @@ def create_database(cur, con):

        sql = "create table vendors(vendor_id integer not null, market_id integer not null, name_vendor character " \
              "varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor integer " \
              "null, dateinserted_vendor timestamp(6) with time zone not null, constraint vendors_pk primary key (" \
              "vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \
              "null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
              "constraint vendors_pk primary key (vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \
              "market_id))"
        cur.execute(sql)

@ -354,8 +369,8 @@ def create_database(cur, con):

        sql = "create table vendors_history(vendor_id integer not null, market_id integer not null, name_vendor " \
              "character varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor " \
              "integer null, dateinserted_vendor timestamp(6) with time zone not null, constraint vendors_history_pk " \
              "primary key (vendor_id, dateinserted_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \
              "integer null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
              "constraint vendors_history_pk primary key (vendor_id, dateinserted_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \
              "vendor_id) references vendors (vendor_id), constraint vendors_history_market_id_fkey foreign key (" \
              "market_id) references marketplaces (market_id))"
        cur.execute(sql)
@ -367,9 +382,9 @@ def create_database(cur, con):
              "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \
              "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \
              "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \
              "varying(255) null, href_item character varying(255) not null, lastseen_item timestamp(6) with time zone " \
              "not null, dateinserted_item timestamp(6) with time zone not null, classification_item double " \
              "precision not null, constraint items_pk primary key (item_id), constraint " \
              "varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \
              "href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \
              "classification_item double precision not null, constraint items_pk primary key (item_id), constraint " \
              "items_market_id_fkey foreign key (market_id) references marketplaces (market_id),constraint " \
              "items_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id))"
        cur.execute(sql)
@ -384,9 +399,9 @@ def create_database(cur, con):
              "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \
              "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \
              "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \
              "varying(255) null, href_item character varying(255) not null, lastseen_item timestamp(6) with time zone " \
              "not null, dateinserted_item timestamp(6) with time zone not null, classification_item double " \
              "precision not null, constraint items_history_pk primary key (item_id, dateinserted_item), " \
              "varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \
              "href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \
              "classification_item double precision not null, constraint items_history_pk primary key (item_id, dateinserted_item), " \
              "constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \
              "constraint items_history_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id), " \
              "constraint items_history_item_id_fkey foreign key (item_id) references items (item_id))"
--- a/MarketPlaces/DarkBazar/crawler_selenium.py
+++ b/MarketPlaces/DarkBazar/crawler_selenium.py
@ -0,0 +1,262 @@
 __author__ = 'DarkWeb'

 '''
 DarkBazar Marketplace Crawler (Selenium)
 '''

 from selenium import webdriver
 from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
 from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
 from selenium.webdriver.firefox.service import Service
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support.ui import Select
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.common.by import By

 from PIL import Image
 import urllib.parse as urlparse
 import os, re, time
 from datetime import date
 import subprocess
 import configparser
 from bs4 import BeautifulSoup
 from MarketPlaces.Initialization.prepare_parser import new_parse
 from MarketPlaces.DarkBazar.parser import darkbazar_links_parser
 from MarketPlaces.Utilities.utilities import cleanHTML

 counter = 1
 baseURL = 'http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/'


 def startCrawling():
    mktName = getMKTName()
    driver = getAccess()

    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Returns the name of the website
 def getMKTName():
    name = 'DarkBazar'
    return name


 # Return the base link of the website
 def getFixedURL():
    url = 'http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/'
    return url


 # Closes Tor Browser
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
    print('Closing Tor...')
    driver.close()
    time.sleep(3)
    return


 # Creates FireFox 'driver' and configure its 'Profile'
 # to use Tor proxy and socket
 def createFFDriver():
    from MarketPlaces.Initialization.markets_mining import config

    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))

    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
    ff_prof.set_preference("places.history.enabled", False)
    ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
    ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
    ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
    ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
    ff_prof.set_preference("signon.rememberSignons", False)
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    # ff_prof.set_preference("network.dns.disablePrefetch", True)
    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference('network.proxy.type', 1)
    ff_prof.set_preference("network.proxy.socks_version", 5)
    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
    ff_prof.set_preference('network.proxy.socks_port', 9150)
    ff_prof.set_preference('network.proxy.socks_remote_dns', True)
    ff_prof.set_preference("javascript.enabled", False)
    ff_prof.update_preferences()

    service = Service(config.get('TOR', 'geckodriver_path'))

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver


 #the driver 'gets' the url, attempting to get on the site, if it can't access return 'down'
 def getAccess():
    url = getFixedURL()
    driver = createFFDriver()
    try:
        driver.get(url)
        return driver
    except:
        driver.close()
        return 'down'


 def login(driver):
    input("Press ENTER when CAPTCHA is complete and login page has loaded\n")

    # entering username and password into input boxes
    usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="username"]')
    # Username here
    usernameBox.send_keys('aliciamykeys')
    passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="password"]')
    # Password here
    passwordBox.send_keys('aliciawherearemykey$')
    # session time
    session_select = Select(driver.find_element(by=By.XPATH, value='/html/body/main/div/div/div/div/div/form/div[4]/div/div[2]/select'))
    session_select.select_by_visible_text('Session 60min')

    input("Press ENTER when CAPTCHA is completed and you exit the newsletter\n")

    # wait for listing page show up (This Xpath may need to change based on different seed url)
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, '//*[@id="submit"]')))


 def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
    filePath = getFullPathName(url)
    os.makedirs(os.path.dirname(filePath), exist_ok=True)
    open(filePath, 'wb').write(cleanPage.encode('utf-8'))
    return


 def getFullPathName(url):
    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE

    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
    else:
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
    return fullPath


 def getMKTName() -> str:
    name = 'DarkBazar'
    return name


 def getNameFromURL(url):
    global counter
    name = ''.join(e for e in url if e.isalnum())
    if name == '':
        name = str(counter)
        counter = counter + 1
    return name


 def getInterestedLinks():
    links = []

    # # Digital Goods
    # links.append('http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/cat.php?category=4')
    # Services
    links.append('http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/cat.php?category=5')

    return links


 def crawlForum(driver):

    print("Crawling the DarkBazar market")

    linksToCrawl = getInterestedLinks()

    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(driver, html, link)

                list = productPages(html)

                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver, driver.page_source, item)
                    driver.back()

                    # comment out
                    # break

                # comment out
                if count == 1:
                    break

                try:
                    link = driver.find_element(by=By.XPATH, value='//a[contains(text(), "Next")]').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    count += 1

                except NoSuchElementException:
                    has_next_page = False

        except Exception as e:
            print(link, e)
        i += 1

        print("Crawling the DarkBazar market done.")


 # Returns 'True' if the link is Topic link, may need to change for every website
 def isDescriptionLink(url):
    if 'item' in url:
        return True
    return False


 # Returns True if the link is a listingPage link, may need to change for every website
 def isListingLink(url):
    if 'category=' in url:
        return True
    return False


 def productPages(html):
    soup = BeautifulSoup(html, "html.parser")
    return darkbazar_links_parser(soup)


 def crawler():
    startCrawling()
--- a/MarketPlaces/DarkBazar/parser.py
+++ b/MarketPlaces/DarkBazar/parser.py
@ -0,0 +1,289 @@
 __author__ = 'DarkWeb'

 # Here, we are importing the auxiliary functions to clean or convert data
 from MarketPlaces.Utilities.utilities import *

 # Here, we are importing BeautifulSoup to search through the HTML tree
 from bs4 import BeautifulSoup


 # parses description pages, so takes html pages of description pages using soup object, and parses it for info it needs
 # stores info it needs in different lists, these lists are returned after being organized
 # @param: soup object looking at html page of description page
 # return: 'row' that contains a variety of lists that each hold info on the description page
 def darkbazar_description_parser(soup):
    # Fields to be parsed

    vendor = "-1"                       # 0 *Vendor_Name
    success = "-1"                      # 1 Vendor_Successful_Transactions
    rating_vendor = "-1"                # 2 Vendor_Rating
    name = "-1"                         # 3 *Product_Name
    describe = "-1"                     # 4 Product_Description
    CVE = "-1"                          # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = "-1"                           # 6 Product_MS_Classification (Microsoft Security)
    category = "-1"                     # 7 Product_Category
    views = "-1"                        # 8 Product_Number_Of_Views
    reviews = "-1"                      # 9 Product_Number_Of_Reviews
    rating_item = "-1"                  # 10 Product_Rating
    addDate = "-1"                      # 11 Product_AddedDate
    BTC = "-1"                          # 12 Product_BTC_SellingPrice
    USD = "-1"                          # 13 Product_USD_SellingPrice
    EURO = "-1"                         # 14 Product_EURO_SellingPrice
    sold = "-1"                         # 15 Product_QuantitySold
    left = "-1"                         # 16 Product_QuantityLeft
    shipFrom = "-1"                     # 17 Product_ShippedFrom
    shipTo = "-1"                       # 18 Product_ShippedTo
    image = "-1"                        # 19 Product_Image
    vendor_image = "-1"                 # 20 Vendor_Image

    # Finding Product Name
    divmb = soup.findAll('div', {'class': "mb-1"})

    name = divmb[0].text
    name = name.replace('\n', ' ')
    name = name.replace(",", "")
    name = name.strip()

    # Finding Vendor
    vendor = divmb[1].find('a').text.strip()

    # Finding Vendor Rating
    temp = soup.find('div', {'class': ""}).text
    temp = temp.split('(')
    rating = temp[0].replace("Vendor's Review : ", "")
    rating = rating.replace("%", "")
    rating_vendor = rating.strip()

    # Finding the Product Rating and Number of Product Reviews
    reviews = temp[2].replace(" review)", "")
    reviews = reviews.strip()

    temp = temp[1].split(")")
    rating = temp[1].replace("Product Review : ", "")
    rating = rating.replace("%", "")
    rating_item = rating.strip()

    # Finding Prices
    USD = soup.find('div', {'class': "h3 text-primary"}).text.strip()

    # Finding the Product Category
    pmb = soup.findAll('p', {'class': "mb-1"})

    category = pmb[-1].text
    category = category.replace("Category: ", "").strip()

    # Finding the Product Quantity Available
    left = divmb[-1].text
    left = left.split(",", 1)[1]
    left = left.replace("in stock", "")
    left = left.strip()

    # Finding Number Sold
    sold = divmb[-1].text
    sold = sold.split(",", 1)[0]
    sold = sold.replace("sold", "")
    sold = sold.strip()

    # Finding Shipment Information (Origin)
    pmb[0].text
    shipFrom = shipFrom.replace("Ships from: ", "").strip()

    # Finding Shipment Information (Destination)
    pmb[1].text
    shipTo = shipTo.replace("Ships to: ", "").strip()

    # Finding the Product description
    cardbody = soup.findAll('div', {'class': "card-body"})
    describe = cardbody[1].text.strip()

    # Finding Product Image
    image = soup.find('div', {'class': 'product-primary'}).find('img')
    image = image.get('src')
    image = image.split('base64,')[-1]

    # Searching for CVE and MS categories
    cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
    if cve:
        CVE = " "
        for idx in cve:
            CVE += (idx)
            CVE += "  "
            CVE = CVE.replace(',', ' ')
            CVE = CVE.replace('\n', '')
    ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}'))
    if ms:
        MS = " "
        for im in ms:
            MS += (im)
            MS += " "
            MS = MS.replace(',', ' ')
            MS = MS.replace('\n', '')

    # Populating the final variable (this should be a list with all fields scraped)
    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)

    # Sending the results
    return row


 # parses listing pages, so takes html pages of listing pages using soup object, and parses it for info it needs
 # stores info it needs in different lists, these lists are returned after being organized
 # @param: soup object looking at html page of listing page
 # return: 'row' that contains a variety of lists that each hold info on the listing page
 def darkbazar_listing_parser(soup):

    # Fields to be parsed
    nm = 0  # *Total_Products (Should be Integer)
    mktName = "DarkBazar"                     # 0 *Marketplace_Name
    vendor = []                               # 1 *Vendor y
    rating_vendor = []                        # 2 Vendor_Rating
    success = []                              # 3 Vendor_Successful_Transactions
    name = []                                 # 4 *Product_Name y
    CVE = []                                  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this
    MS = []                                   # 6 Product_MS_Classification (Microsoft Security) dont worry about this
    category = []                             # 7 Product_Category y
    describe = []                             # 8 Product_Description
    views = []                                # 9 Product_Number_Of_Views
    reviews = []                              # 10 Product_Number_Of_Reviews
    rating_item = []                          # 11 Product_Rating
    addDate = []                              # 12 Product_AddDate
    BTC = []                                  # 13 Product_BTC_SellingPrice
    USD = []                                  # 14 Product_USD_SellingPrice y
    EURO = []                                 # 15 Product_EURO_SellingPrice
    sold = []                                 # 16 Product_QuantitySold
    qLeft = []                                # 17 Product_QuantityLeft
    shipFrom = []                             # 18 Product_ShippedFrom
    shipTo = []                               # 19 Product_ShippedTo
    image = []                                # 20 Product_Image
    image_vendor = []                         # 21 Vendor_Image
    href = []                                 # 22 Product_Links

    listing = soup.findAll('div', {"id": "itembox"})

    # Populating the Number of Products
    nm = len(listing)

    for a in listing:
        bae = a.findAll('a', href=True)
        lb = a.findAll('div', {"id": "littlebox"})

        # Adding the url to the list of urls
        link = bae[0].get('href')
        link = cleanLink(link)
        href.append(link)

        # Finding the Product
        product = lb[1].find('a').text
        product = product.replace('\n', ' ')
        product = product.replace(",", "")
        product = product.replace("...", "")
        product = product.strip()
        name.append(product)

        # Finding Product Image
        product_image = a.find('img')
        product_image = product_image.get('src')
        product_image = product_image.split('base64,')[-1]
        image.append(product_image)

        # Finding Prices
        price = lb[-1].find('div', {"class": "mb-1"}).text
        price = price.replace("$","")
        price = price.strip()
        USD.append(price)

        # Finding the Vendor
        vendor_name = lb[-1].find("a").text
        vendor_name = vendor_name.replace(",", "")
        vendor_name = vendor_name.strip()
        vendor.append(vendor_name)

        image_vendor.append("-1")

        # Finding the Category
        cat = lb[-1].find("span").text
        cat = cat.replace("class:", "")
        cat = cat.strip()
        category.append(cat)

        span = lb[0].findAll("span")

        # Finding Number of Views
        num = span[0].text
        num = num.replace("views:", "")
        num = num.strip()
        sold.append(num)

        # Finding Number Sold
        num = span[2].text
        num = num.replace("Sold:", "")
        num = num.strip()
        sold.append(num)

        # Finding Quantity Left
        quant = span[1].text
        quant = quant.replace("stock:", "")
        quant = quant.strip()
        qLeft.append(quant)

        # add shipping information
        ship = lb[2].findAll('small')[1].findAll('span')[1].text.split("->")
        shipFrom.append(ship[0].replace("Ship from ", "").strip())
        shipTo.append(ship[1].replace("to ", "").strip())


        # Searching for CVE and MS categories
        cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
        if not cve:
            cveValue = "-1"
        else:
            cee = " "
            for idx in cve:
                cee += (idx)
                cee += "  "
                cee = cee.replace(',', ' ')
                cee = cee.replace('\n', '')
            cveValue = cee
        CVE.append(cveValue)

        ms = a.findAll(text=re.compile('MS\d{2}-\d{3}'))
        if not ms:
            MSValue = "-1"
        else:
            me = " "
            for im in ms:
                me += (im)
                me += " "
                me = me.replace(',', ' ')
                me = me.replace('\n', '')
            MSValue = me
        MS.append(MSValue)

    # Populate the final variable (this should be a list with all fields scraped)
    return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)


 # called by the crawler to get description links on a listing page
 # @param: beautifulsoup object that is using the correct html page (listing page)
 # return: list of description links from a listing page
 def darkbazar_links_parser(soup):
    # Returning all links that should be visited by the Crawler

    href = []
    listing = soup.findAll('div', {"id": "itembox"})

    # for a in listing:
    #     bae = a.find('a', {"class": "text-info"}, href=True)
    #     link = bae['href']
    #     href.append(link)

    for a in listing:
        bae = a.findAll('a', href=True)

        # Adding the url to the list of urls
        link = bae[0].get('href')
        href.append(link)

    return href
--- a/MarketPlaces/DarkFox/crawler_selenium.py
+++ b/MarketPlaces/DarkFox/crawler_selenium.py
@ -30,7 +30,6 @@ baseURL = 'http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -40,25 +39,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -81,7 +66,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -108,10 +93,10 @@ def createFFDriver():
    # ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    # ff_prof.set_preference("network.dns.disablePrefetch", True)
    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
    # ff_prof.set_preference("permissions.default.image", 2)
    # ff_prof.set_preference("browser.download.folderList", 2)
    # ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    # ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference('network.proxy.type', 1)
    ff_prof.set_preference("network.proxy.socks_version", 5)
    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
--- a/MarketPlaces/DarkMatter/crawler_selenium.py
+++ b/MarketPlaces/DarkMatter/crawler_selenium.py
@ -32,7 +32,6 @@ baseURL = 'http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -42,25 +41,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -77,7 +62,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -94,7 +79,6 @@ def createFFDriver():

    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))


    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
    ff_prof.set_preference("places.history.enabled", False)
    ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
@ -105,7 +89,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    #ff_prof.set_preference("network.dns.disablePrefetch", True)#connection issue
    #ff_prof.set_preference("network.http.sendRefererHeader", 0)#connection issue
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -229,17 +213,16 @@ def crawlForum(driver):
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        time.sleep(1.5) # to keep from detecting click speed
                        time.sleep(3) # to keep from detecting click speed
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver, driver.page_source, item)
                    time.sleep(1.5)
                    time.sleep(3) # to keep from detecting click speed
                    driver.back()
                     # to keep from detecting click speed

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/MarketPlaces/DarkMatter/parser.py
+++ b/MarketPlaces/DarkMatter/parser.py
@ -34,36 +34,29 @@ def darkmatter_description_parser(soup):
    left = "-1"  # 16 Product_QuantityLeft
    shipFrom = "-1"  # 17 Product_ShippedFrom
    shipTo = "-1"  # 18 Product_ShippedTo
    image = "-1"   # 19 Product_Image
    vendor_image = "-1" # 20 Vendor_Image

    # 0 *Vendor_Name
    try:
        temp = soup.find('table', {'class', 'vtable'})
        temp = temp.findAll('tr')
        temp2 = temp[3].find('a').text
        name = cleanString(temp2.strip())
        vendor = cleanString(temp2.strip())
    except:
        try:
            temp = soup.find('table', {'class', 'vtable'})
            temp = temp.findAll('tr')
            temp2 = temp[4].find('a').text
            name = cleanString(temp2.strip())
        except:
            print("vendor")
        temp = soup.find('table', {'class', 'vtable'})
        temp = temp.findAll('tr')
        temp2 = temp[4].find('a').text
        vendor = cleanString(temp2.strip())

    # product name
    try:
        name = soup.find('div', {'class', 'title-h2'}).text
        name = cleanString(name.strip())
    except:
        print("name")
    name = soup.find('div', {'class', 'title-h2'}).text
    name = cleanString(name.strip())

    #product description
    try:
        temp = soup.find('pre', {'class', 'description'}).text
        temp = temp.replace('\n', ' ')
        describe = cleanString(temp.strip())
    except:
        print("description")
    temp = soup.find('pre', {'class', 'description'}).text
    temp = temp.replace('\n', ' ')
    describe = cleanString(temp.strip())

    #product category
    try:
@ -75,48 +68,42 @@ def darkmatter_description_parser(soup):
            temp2 = temp[4].find('a').text
            category = cleanString(temp2.strip())
    except:
        try:
            temp = soup.find('table', {'class', 'vtable'})
            temp = temp.findAll('tr')
            temp2 = temp[5].find('th').text
            temp2 = cleanString(temp2.strip)
            if (temp2 == "Category"):
                temp2 = temp[5].find('a').text
                category = cleanString(temp2.strip())
        except:
            print('category')

    # usd
    try:
        temp = soup.find('table', {'class', 'vtable'})
        temp = temp.findAll('tr')
        temp2 = temp[1].find('td').text
        temp2 = temp2.replace(' USD', '')
        USD = cleanString(temp2)
    except:
        print('USD')

    # 15 Product_QuantitySold
    try:
        temp = soup.find('table', {'class', 'vtable'})
        temp = temp.findAll('tr')
        temp2 = temp[5].find('th').text
        temp2 = cleanString(temp2)
        temp3 = temp[6].find('th').text
        temp3 = cleanString(temp3)
        if (temp2 == "Sold"):
            temp2 = temp[5].find('td').text
            sold = cleanString(temp2.strip())
        elif (temp3 == "Sold"):
            temp2 = temp[6].find('td').text
            sold = cleanString(temp2.strip())
    except:
        print('sold')
        temp2 = cleanString(temp2.strip)
        if (temp2 == "Category"):
            temp2 = temp[5].find('a').text
            category = cleanString(temp2.strip())

    # usd
    temp = soup.find('table', {'class', 'vtable'})
    temp = temp.findAll('tr')
    temp2 = temp[1].find('td').text
    temp2 = temp2.replace(' USD', '')
    USD = cleanString(temp2)

    # 15 Product_QuantitySold
    temp = soup.find('table', {'class', 'vtable'})
    temp = temp.findAll('tr')
    temp2 = temp[5].find('th').text
    temp2 = cleanString(temp2)
    temp3 = temp[6].find('th').text
    temp3 = cleanString(temp3)
    if (temp2 == "Sold"):
        temp2 = temp[5].find('td').text
        sold = cleanString(temp2.strip())
    elif (temp3 == "Sold"):
        temp2 = temp[6].find('td').text
        sold = cleanString(temp2.strip())

    # Finding Product Image
    image = soup.find('td', {"class": "vtop"}).find('img').get('src')
    image = image.split('base64,')[-1]

    # Populating the final variable (this should be a list with all fields scraped)
    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
           BTC, USD, EURO, sold, left, shipFrom, shipTo)
           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)

    # Sending the results
    return row
@ -129,34 +116,36 @@ def darkmatter_description_parser(soup):
 def darkmatter_listing_parser(soup):

    # Fields to be parsed
    nm = 0                                    # Total_Products (Should be Integer)
    mktName = "DarkMatter"                       # 0 Marketplace_Name
    name = []                                 # 1 Product_Name
    CVE = []                                  # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = []                                   # 3 Product_MS_Classification (Microsoft Security)
    category = []                             # 4 Product_Category
    describe = []                             # 5 Product_Description
    escrow = []                               # 6 Vendor_Warranty
    views = []                                # 7 Product_Number_Of_Views
    reviews = []                              # 8 Product_Number_Of_Reviews
    addDate = []                              # 9 Product_AddDate
    rating_item = []  # 11 Product_Rating
    lastSeen = []                             # 10 Product_LastViewDate
    BTC = []                                  # 11 Product_BTC_SellingPrice
    USD = []                                  # 12 Product_USD_SellingPrice
    EURO = []                                 # 13 Product_EURO_SellingPrice
    sold = []                                 # 14 Product_QuantitySold
    qLeft =[]                                 # 15 Product_QuantityLeft
    shipFrom = []                             # 16 Product_ShippedFrom
    shipTo = []                               # 17 Product_ShippedTo
    vendor = []                               # 18 Vendor
    rating = []                               # 19 Vendor_Rating
    success = []                              # 20 Vendor_Successful_Transactions
    href = []                                 # 23 Product_Links (Urls)

    nm = 0                                    # *Total_Products (Should be Integer)
    mktName = "DarkMatter"                    # 0 *Marketplace_Name
    vendor = []                               # 1 *Vendor y
    rating = []                              # 2 Vendor_Rating
    success = []                              # 3 Vendor_Successful_Transactions
    name = []                                 # 4 *Product_Name y
    CVE = []                                  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = []                                   # 6 Product_MS_Classification (Microsoft Security)
    category = []                             # 7 Product_Category y
    describe = []                             # 8 Product_Description
    views = []                                # 9 Product_Number_Of_Views
    reviews = []                              # 10 Product_Number_Of_Reviews
    rating_item = []                          # 11 Product_Rating
    addDate = []                              # 12 Product_AddDate
    BTC = []                                  # 13 Product_BTC_SellingPrice
    USD = []                                  # 14 Product_USD_SellingPrice y
    EURO = []                                 # 15 Product_EURO_SellingPrice
    sold = []                                 # 16 Product_QuantitySold
    qLeft =[]                                 # 17 Product_QuantityLeft
    shipFrom = []                             # 18 Product_ShippedFrom
    shipTo = []                               # 19 Product_ShippedTo
    image = []                                # 20 Product_Image
    image_vendor = []                         # 21 Vendor_Image
    href = []                                 # 22 Product_Links

    names = soup.find('div', {"class": "content"}).findAll('td', {"class": "lefted", "colspan": "3"})
    left = soup.find('div', {"class": "content"}).findAll('table', {"class": "vtable"})
    right = soup.find('div', {"class": "content"}).findAll('td', {"class": "vtop centered"})
    images = soup.find('div', {"class": "content"}).findAll('td', {"class": "vcentered"})

    # vtop centered
    count = 0
@ -165,18 +154,15 @@ def darkmatter_listing_parser(soup):

    for a in names:
        # product name
        try:
            temp = a.find('a').text
            if ("pcs x " in temp):
                index = temp.index("pcs x ")
                result = temp[index + len("pcs x "):]
                name.append(cleanString(result))
            elif("pks x " in temp):
                index = temp.index("pks x ")
                result = temp[index + len("pks x "):]
                name.append(cleanString(temp))
        except Exception as e:
            print("product name", e)
        temp = a.find('a').text
        if ("pcs x " in temp):
            index = temp.index("pcs x ")
            result = temp[index + len("pcs x "):]
            name.append(cleanString(result))
        elif("pks x " in temp):
            index = temp.index("pks x ")
            result = temp[index + len("pks x "):]
            name.append(cleanString(result))

        CVE.append("-1")
        MS.append("-1")
@ -186,74 +172,60 @@ def darkmatter_listing_parser(soup):
        length_2 = len(temp2) - 1

        # category
        try:
            temp = temp2[1].find('td').text
            category.append(cleanString(temp.strip()))
        except:
            print('category')
        temp = temp2[1].find('td').text
        category.append(cleanString(temp.strip()))

        describe.append("-1")
        escrow.append("-1")
        #escrow.append("-1")
        views.append("-1")
        reviews.append("-1")
        addDate.append("-1")
        lastSeen.append("-1")
        #lastSeen.append("-1")
        BTC.append("-1")
        image_vendor.append("-1")

        # usd
        try:
            temp3 = right[count*2].find('span').text
            temp = temp3.replace(' USD', '')
            USD.append(cleanString(temp))
        except:
            print('USD')
        temp3 = right[count*2].find('span').text
        temp = temp3.replace(' USD', '')
        USD.append(cleanString(temp))

        EURO.append("-1")

        # 14 Product_QuantitySold
        try:
            temp3 = temp2[length_2].find('th').text
            temp3 = cleanString(temp3)
            if (temp3 == "Sold:"):
                temp = temp2[length_2].find('td').text
                sold.append(cleanString(temp.strip()))
            else:
                sold.append("-1")
        except Exception as e:
        temp3 = temp2[length_2].find('th').text
        temp3 = cleanString(temp3)
        if (temp3 == "Sold:"):
            temp = temp2[length_2].find('td').text
            sold.append(cleanString(temp.strip()))
        else:
            sold.append("-1")
            print('sold', e)

        qLeft.append("-1")
        shipFrom.append("-1")

        # ship to
        try:
            temp3 = temp2[length_2].find('th').text
            temp3 = cleanString(temp3)
            if (temp3 == "Ship To:"):
                temp = temp2[length_2].find('td').text
                shipTo.append(cleanString(temp.strip()))
            else:
                shipTo.append("-1")
        except Exception as e:
        temp3 = temp2[length_2].find('th').text
        temp3 = cleanString(temp3)
        if (temp3 == "Ship To:"):
            temp = temp2[length_2].find('td').text
            shipTo.append(cleanString(temp.strip()))
        else:
            shipTo.append("-1")
            print('shopto')

        # vendor
        try:
            temp = temp2[0].find('a').text
            vendor.append(cleanString(temp.strip()))
        except:
            print('vendor')
        temp = temp2[0].find('a').text
        vendor.append(cleanString(temp.strip()))

        # add product rating (stars)
        rating.append("-1")
        success.append("-1")

        try:
            temp = a.find('a').get('href')
            href.append(temp)
        except:
            print('href')
        temp = a.find('a').get('href')
        href.append(temp)

        # Finding Product Image
        image = images[count*2].find('img').get('src')
        image = image.split('base64,')[-1]

        count += 1

@ -261,7 +233,7 @@ def darkmatter_listing_parser(soup):

    # Populate the final variable (this should be a list with all fields scraped)
    return organizeProducts(mktName, nm, vendor, rating, success, name, CVE, MS, category, describe, views,
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)


 #called by the crawler to get description links on a listing page
--- a/MarketPlaces/DarkTor/crawler_selenium.py
+++ b/MarketPlaces/DarkTor/crawler_selenium.py
@ -31,7 +31,6 @@ baseURL = 'http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -41,25 +40,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -76,7 +61,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -103,7 +88,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 2)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -145,6 +130,7 @@ def login(driver):
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, "/html/body/div[1]/div/div/div[2]/main/div/div/section[5]/div/div[1]/div")))


 # Saves the crawled html page, makes the directory path for html pages if not made
 def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
--- a/MarketPlaces/DigitalThriftShop/crawler_selenium.py
+++ b/MarketPlaces/DigitalThriftShop/crawler_selenium.py
@ -32,7 +32,6 @@ baseURL = 'http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -42,24 +41,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config
    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -76,7 +62,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -235,7 +221,7 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/MarketPlaces/DigitalThriftShop/parser.py
+++ b/MarketPlaces/DigitalThriftShop/parser.py
@ -34,7 +34,8 @@ def digitalThriftShop_description_parser(soup: Tag):
    left = "-1"                         # 16 Product_QuantityLeft
    shipFrom = "-1"                     # 17 Product_ShippedFrom
    shipTo = "-1"                       # 18 Product_ShippedTo

    image = "-1"                        # 19 Product_Image
    vendor_image = "-1"                 # 20 Vendor_Image


    product_name = soup.find("h1", {"class": "product_title entry-title"}).text
@ -42,7 +43,11 @@ def digitalThriftShop_description_parser(soup: Tag):

    product_description = soup.find("div", {"id": "tab-description"}).find("p").text
    describe = cleanString(product_description.strip())
    

    # Finding Product Image
    image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
    image = image.get('src').split('base64,')[-1]

    product_category = soup.find("span", {"class": "posted_in"}).find("a").text
    category = cleanString(product_category.strip())
    
@ -64,7 +69,7 @@ def digitalThriftShop_description_parser(soup: Tag):

    # Populating the final variable (this should be a list with all fields scraped)
    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
           BTC, USD, EURO, sold, left, shipFrom, shipTo)
           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)

    # Sending the results
    return row
@ -98,7 +103,9 @@ def digitalThriftShop_listing_parser(soup: Tag):
    qLeft =[]                                 # 17 Product_QuantityLeft
    shipFrom = []                             # 18 Product_ShippedFrom
    shipTo = []                               # 19 Product_ShippedTo
    href = []                                 # 20 Product_Links
    image = []                                # 20 Product_Image
    image_vendor = []                         # 21 Vendor_Image
    href = []                                 # 22 Product_Links

    product_category = soup.find("h1", {"class": "woocommerce-products-header__title page-title"}).text

@ -108,12 +115,17 @@ def digitalThriftShop_listing_parser(soup: Tag):
    
    for product in products_list:
        nm += 1
        vendor.append("-1")
        vendor.append(mktName)
        rating_vendor.append("-1")
        success.append("-1")
        
        product_name = product.find("h2", {"class": "woocommerce-loop-product__title"}).text
        name.append(cleanString(product_name.strip()))

        # Finding Product Image
        product_image = product.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'})
        product_image = product_image.get('src').split('base64,')[-1]
        image.append(product_image)
        
        CVE.append("-1")
        MS.append("-1")
@ -121,6 +133,7 @@ def digitalThriftShop_listing_parser(soup: Tag):
        describe.append("-1")
        views.append("-1")
        reviews.append("-1")
        image_vendor.append("-1")
        
        try:
            product_rating = product.find("div", {"class": "star-rating"}).find("strong", {"class": "rating"}).text
@ -146,7 +159,7 @@ def digitalThriftShop_listing_parser(soup: Tag):

    # Populate the final variable (this should be a list with all fields scraped)
    return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)


 #called by the crawler to get description links on a listing page
--- a/MarketPlaces/HiddenMarket/crawler_selenium.py
+++ b/MarketPlaces/HiddenMarket/crawler_selenium.py
@ -29,7 +29,6 @@ baseURL = 'http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # opentor()
    marketName = getMKTName()
    driver = getAccess()

@ -39,24 +38,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(marketName, baseURL, True)


 # Opens Tor Browser
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Login using premade account credentials and do login captcha manually
 def login(driver):
    # wait for login page
@ -118,7 +104,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -145,10 +131,10 @@ def createFFDriver():
    # ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    # ff_prof.set_preference("network.dns.disablePrefetch", True)
    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
    # ff_prof.set_preference("permissions.default.image", 3)
    # ff_prof.set_preference("browser.download.folderList", 2)
    # ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    # ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference('network.proxy.type', 1)
    ff_prof.set_preference("network.proxy.socks_version", 5)
    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
@ -277,7 +263,7 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/MarketPlaces/HiddenMarket/parser.py
+++ b/MarketPlaces/HiddenMarket/parser.py
@ -30,6 +30,8 @@ def hiddenmarket_description_parser(soup):
    left = "-1"  # 16 Product_QuantityLeft
    shipFrom = "-1"  # 17 Product_ShippedFrom
    shipTo = "-1"  # 18 Product_ShippedTo
    image = "-1"   # 19 Product_Image
    vendor_image = "-1" # 20 Vendor_Image

    bae = soup.find('div', {'class': "main"})

@ -84,6 +86,10 @@ def hiddenmarket_description_parser(soup):
    describe = describe.replace("-", " ")
    describe = describe.strip()

    # Finding Product Image
    image = soup.find('div', {"class": "thumbnails"}).find('img', {"class": "bigthumbnail"})
    image = image.get('src').split('base64,')[-1]

    # Finding the Product Category
    category = mb[-4].text
    category = category.replace("Category:", "")
@ -115,7 +121,7 @@ def hiddenmarket_description_parser(soup):

    # Populating the final variable (this should be a list with all fields scraped)
    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
           BTC, USD, EURO, sold, left, shipFrom, shipTo)
           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)

    # Sending the results
    return row
@ -145,7 +151,9 @@ def hiddenmarket_listing_parser(soup):
    qLeft = []  # 17 Product_QuantityLeft
    shipFrom = []  # 18 Product_ShippedFrom
    shipTo = []  # 19 Product_ShippedTo
    href = []  # 20 Product_Links
    image = []   # 20 Product_Image
    image_vendor = [] # 21 Vendor_Image
    href = [] # 22 Product_Links

    listing = soup.findAll('div', {"class": "item"})

@ -153,12 +161,13 @@ def hiddenmarket_listing_parser(soup):
    nm = len(listing)

    # Finding Category
    # cat = soup.find("div", {'class': "heading"}).text
    # cat = cat.replace(",", "")
    # cat = cat.strip()
    cat = soup.find("div", {'class': "heading"}).text
    cat = cat.replace(",", "")
    cat = cat.strip()

    for card in listing:
        # category.append(cat)

        category.append(cat)


        # Adding the url to the list of urls
@ -175,12 +184,17 @@ def hiddenmarket_listing_parser(soup):
        product = product.strip()
        name.append(product)

        # Finding Product Image
        image.append("-1")

        # Finding Vendor
        vendor_name = card.text
        vendor_name = vendor_name.replace(",", "")
        vendor_name = vendor_name.strip()
        vendor.append(vendor_name)

        image_vendor.append("-1")

        # Finding USD
        usd = card.next_sibling.find('div', {"class": "buttons"}).find('div', {'class': "price"}).text
        usd = usd.replace("USD", "")
@ -262,7 +276,7 @@ def hiddenmarket_listing_parser(soup):

    # Populate the final variable (this should be a list with all fields scraped)
    return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)


 def hiddenmarket_links_parser(soup):
--- a/MarketPlaces/Initialization/marketsList.txt
+++ b/MarketPlaces/Initialization/marketsList.txt
@ -1,8 +1,12 @@
 Apocalypse
 DarkBazar
 DarkMatter
 DigitalThriftShop
 HiddenMarket
 LionMarketplace
 Nexus
 Robinhood
 ThiefWorld
 TorBay
 TorMarket
 ViceCity
--- a/MarketPlaces/Initialization/markets_mining.py
+++ b/MarketPlaces/Initialization/markets_mining.py
@ -24,6 +24,8 @@ from MarketPlaces.HiddenMarket.crawler_selenium import crawler as crawlerHiddenM
 from MarketPlaces.RobinhoodMarket.crawler_selenium import crawler as crawlerRobinhoodMarket
 from MarketPlaces.Nexus.crawler_selenium import crawler as crawlerNexus
 from MarketPlaces.CypherMarketplace.crawler_selenium import crawler as crawlerCypher
 from MarketPlaces.DarkBazar.crawler_selenium import crawler as crawlerDarkBazar
 from MarketPlaces.PabloEscobarMarket.crawler_selenium import crawler as crawlerPabloEscobar

 import configparser
 import os
@ -137,5 +139,9 @@ if __name__ == '__main__':
            crawlerNexus()
        elif mkt == "CypherMarketplace":
            crawlerCypher()
        elif mkt == "DarkBazar":
            crawlerDarkBazar()
        elif mkt == "PabloEscobarMarket":
            crawlerPabloEscobar()

    print("\nScraping process completed!")
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@ -1,4 +1,4 @@
 __author__ = 'Helium'
 __author__ = 'DarkWeb'

 import glob
 import os
@ -21,6 +21,8 @@ from MarketPlaces.HiddenMarket.parser import *
 from MarketPlaces.RobinhoodMarket.parser import *
 from MarketPlaces.Nexus.parser import *
 from MarketPlaces.MikesGrandStore.parser import *
 from MarketPlaces.DarkBazar.parser import *
 from MarketPlaces.PabloEscobarMarket.parser import *

 from MarketPlaces.Classifier.classify_product import predict

@ -72,6 +74,10 @@ def mergePages(rmm, rec):
        rec[18] = rmm[17]
    if rec[19] == "-1":         # shippedto_item
        rec[19] = rmm[18]
    if rmm[19] != "-1":         # image
        rec[20] = rmm[19]
    if rmm[20] != "-1":         # image_vendor
        rec[21] = rmm[20]

    return rec

@ -148,6 +154,10 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
            rw = nexus_listing_parser(soup)
        elif marketPlace == "MikesGrandStore":
            rw = mikesGrandStore_listing_parser(soup)
        elif marketPlace == "DarkBazar":
            rw = darkbazar_listing_parser(soup)
        elif marketPlace == "PabloEscobarMarket":
            rw = pabloescobarmarket_listing_parser(soup)
        else:
            print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
            raise Exception
@ -199,6 +209,10 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile):
            rmm = nexus_description_parser(soup)
        elif marketPlace == "MikesGrandStore":
            rmm = mikesGrandStore_description_parser(soup)
        elif marketPlace == "DarkBazar":
            rmm = darkbazar_description_parser(soup)
        elif marketPlace == "PabloEscobarMarket":
            rmm = pabloescobarmarket_description_parser(soup)
        else:
            print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!")
            raise Exception
@ -318,7 +332,7 @@ def new_parse(marketPlace, url, createLog):

                rec = rec.split(',')

                descriptionPattern = cleanLink(rec[20]) + ".html"
                descriptionPattern = cleanLink(rec[22]) + ".html"

                # Reading the associated description Html Pages
                descriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", descriptionPattern))
--- a/MarketPlaces/LionMarketplace/crawler_selenium.py
+++ b/MarketPlaces/LionMarketplace/crawler_selenium.py
@ -31,7 +31,6 @@ baseURL = 'http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -41,25 +40,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -76,7 +61,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -103,7 +88,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -234,7 +219,7 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/MarketPlaces/LionMarketplace/parser.py
+++ b/MarketPlaces/LionMarketplace/parser.py
@ -12,37 +12,37 @@ from bs4 import BeautifulSoup
 #@param: soup object looking at html page of description page
 #return: 'row' that contains a variety of lists that each hold info on the description page
 def lionmarketplace_description_parser(soup):

    # Fields to be parsed
    vendor = "-1"  # 0 *Vendor_Name
    success = "-1"  # 1 Vendor_Successful_Transactions
    rating_vendor = "-1"  # 2 Vendor_Rating
    name = "-1"  # 3 *Product_Name
    describe = "-1"  # 4 Product_Description
    CVE = "-1"  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much
    MS = "-1"  # 6 Product_MS_Classification (Microsoft Security) dont worry about that much
    category = "-1"  # 7 Product_Category
    views = "-1"  # 8 Product_Number_Of_Views
    reviews = "-1"  # 9 Product_Number_Of_Reviews
    rating_item = "-1"  # 10 Product_Rating
    addDate = "-1"  # 11 Product_AddedDate
    BTC = "-1"  # 12 Product_BTC_SellingPrice
    USD = "-1"  # 13 Product_USD_SellingPrice
    EURO = "-1"  # 14 Product_EURO_SellingPrice
    sold = "-1"  # 15 Product_QuantitySold
    left = "-1"  # 16 Product_QuantityLeft
    shipFrom = "-1"  # 17 Product_ShippedFrom
    shipTo = "-1"  # 18 Product_ShippedTo

    vendor = "-1"                       # 0 *Vendor_Name
    success = "-1"                      # 1 Vendor_Successful_Transactions
    rating_vendor = "-1"                # 2 Vendor_Rating
    name = "-1"                         # 3 *Product_Name
    describe = "-1"                     # 4 Product_Description
    CVE = "-1"                          # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = "-1"                           # 6 Product_MS_Classification (Microsoft Security)
    category = "-1"                     # 7 Product_Category
    views = "-1"                        # 8 Product_Number_Of_Views
    reviews = "-1"                      # 9 Product_Number_Of_Reviews
    rating_item = "-1"                  # 10 Product_Rating
    addDate = "-1"                      # 11 Product_AddedDate
    BTC = "-1"                          # 12 Product_BTC_SellingPrice
    USD = "-1"                          # 13 Product_USD_SellingPrice
    EURO = "-1"                         # 14 Product_EURO_SellingPrice
    sold = "-1"                         # 15 Product_QuantitySold
    left = "-1"                         # 16 Product_QuantityLeft
    shipFrom = "-1"                     # 17 Product_ShippedFrom
    shipTo = "-1"                       # 18 Product_ShippedTo
    image = "-1"                        # 19 Product_Image
    vendor_image = "-1"                 # 20 Vendor_Image

    # vendor name
    try:
        temp = soup.find('div', {'class': 'btn-group'}).find('a').text
        vendor = (cleanString(temp.strip()))
    except:
        print('vendor')
        vendor = "-1"
    temp = soup.find('div', {'class': 'btn-group'}).find('a').text
    vendor = (cleanString(temp.strip()))

    # table with info
    table = soup.find('table', {'class', 'table border-0 text-left table-borderless'})
    table = soup.find('table')
    rows = table.findAll('tr')

    # successful transaction
@ -51,37 +51,25 @@ def lionmarketplace_description_parser(soup):
    # vendor rating 5
    rating_vendor = '-1'


    # product name
    try:
        temp = soup.find('div', {'class', 'row'}).find('h2').text
        name = (cleanString(temp.strip()))
    except:
        name = '-1'
        print('product name')
    temp = soup.find('div', {'class', 'row'}).find('h2').text
    name = (cleanString(temp.strip()))

    # product description
    try:
        temp = soup.find('div', {'class': "mt-4"}).findAll('p')
        temp = temp[1].text
        if "\n" in temp:
            temp = temp.replace("\n", " ")
            temp = temp.replace("\r", " ")
        describe = cleanString(temp.strip())
    except:
        describe="-1"
        print('describe')
    temp = soup.find('div', {'class': "mt-4"}).find(text=True, recursive=False)
    describe = cleanString(temp.strip())

    # Finding Product Image
    image = soup.find('div', {'id': 'slide-1'}).find('img')
    image = image.get('src')
    image = image.split('base64,')[-1]

    CVE = "-1"  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much
    MS = "-1"  # 6 Product_MS_Classification (Microsoft Security) dont worry about that much

    # product category
    try:
        temp = rows[1].find('strong').text
        category = cleanString(temp.strip())
    except:
        category = "-1"
        print('category')
    temp = rows[1].find('strong').text
    category = cleanString(temp.strip())

    # product number of views
    views = "-1"
@ -92,54 +80,38 @@ def lionmarketplace_description_parser(soup):
    BTC = "-1"

    # USD selling price
    try:
        temp = rows[2].find('strong').text
        if " $" in temp:
            temp = temp.replace(" $", "")
        elif "$" in temp:
            temp = temp.replace("$", "")
        USD = cleanString((temp.strip()))
    except:
        try:
            temp = soup.find('li').find('strong').text
            if " $" in temp:
                temp = temp.replace(" $", "")
            elif "$" in temp:
                temp = temp.replace("$", "")
            USD = cleanString((temp.strip()))
        except:
            print("USD")
    temp = rows[2].find('strong').text
    if " $" in temp:
        temp = temp.replace(" $", "")
    elif "$" in temp:
        temp = temp.replace("$", "")
    USD = cleanString((temp.strip()))

    EURO = "-1"  # 14 Product_EURO_SellingPrice

    # product sold
    try:
        if (len(rows) <= 5):
            temp = rows[4].find('td').text
            string = cleanString(temp)
            if (string == 'Left/Sold'):
                temp = rows[4].findAll('td')
                temp = temp[1].findAll('span')

                # left
                temp2 = temp[1].text
                temp3 = temp[1].text

                if(" items" in temp2):
                    temp2 = temp2.replace(" items", "")
                if(" items" in temp3):
                    temp3 = temp3.replace(" items", "")

                sold = (cleanString(temp2.strip()))
                left = cleanString(temp3.strip())
            else:
                sold = '-1'
                left = "-1"
    if (len(rows) <= 5):
        temp = rows[4].find('td').text
        string = cleanString(temp)
        if (string == 'Left/Sold'):
            temp = rows[4].findAll('td')
            temp = temp[1].findAll('span')

            # left
            temp2 = temp[1].text
            temp3 = temp[1].text

            if(" items" in temp2):
                temp2 = temp2.replace(" items", "")
            if(" items" in temp3):
                temp3 = temp3.replace(" items", "")

            sold = (cleanString(temp2.strip()))
            left = cleanString(temp3.strip())
        else:
            sold = '-1'
            left = "-1"
    except:
        print("success")
    else:
        sold = '-1'
        left = "-1"

@ -148,7 +120,7 @@ def lionmarketplace_description_parser(soup):

    # Populating the final variable (this should be a list with all fields scraped)
    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
           BTC, USD, EURO, sold, left, shipFrom, shipTo)
           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)

    # Sending the results
    return row
@ -159,45 +131,49 @@ def lionmarketplace_description_parser(soup):
 #@param: soup object looking at html page of listing page
 #return: 'row' that contains a variety of lists that each hold info on the listing page
 def lionmarketplace_listing_parser(soup):

    # Fields to be parsed
    nm = 0  # *Total_Products (Should be Integer)
    mktName = "M00nkeyMarket"  # 0 *Marketplace_Name
    vendor = []  # 1 *Vendor y
    rating_vendor = []  # 2 Vendor_Rating
    success = []  # 3 Vendor_Successful_Transactions
    name = []  # 4 *Product_Name y
    CVE = []  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this
    MS = []  # 6 Product_MS_Classification (Microsoft Security) dont worry about this
    category = []  # 7 Product_Category y
    describe = []  # 8 Product_Description
    views = []  # 9 Product_Number_Of_Views
    reviews = []  # 10 Product_Number_Of_Reviews
    rating_item = []  # 11 Product_Rating
    addDate = []  # 12 Product_AddDate
    BTC = []  # 13 Product_BTC_SellingPrice
    USD = []  # 14 Product_USD_SellingPrice y
    EURO = []  # 15 Product_EURO_SellingPrice
    sold = []  # 16 Product_QuantitySold
    qLeft = []  # 17 Product_QuantityLeft
    shipFrom = []  # 18 Product_ShippedFrom
    shipTo = []  # 19 Product_ShippedTo
    href = []  # 20 Product_Links

    listing = soup.findAll('div', {"class": "card-body"})
    nm = 0                                    # *Total_Products (Should be Integer)
    mktName = "LionMarketplace"               # 0 *Marketplace_Name
    vendor = []                               # 1 *Vendor y
    rating_vendor = []                        # 2 Vendor_Rating
    success = []                              # 3 Vendor_Successful_Transactions
    name = []                                 # 4 *Product_Name y
    CVE = []                                  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = []                                   # 6 Product_MS_Classification (Microsoft Security)
    category = []                             # 7 Product_Category y
    describe = []                             # 8 Product_Description
    views = []                                # 9 Product_Number_Of_Views
    reviews = []                              # 10 Product_Number_Of_Reviews
    rating_item = []                          # 11 Product_Rating
    addDate = []                              # 12 Product_AddDate
    BTC = []                                  # 13 Product_BTC_SellingPrice
    USD = []                                  # 14 Product_USD_SellingPrice y
    EURO = []                                 # 15 Product_EURO_SellingPrice
    sold = []                                 # 16 Product_QuantitySold
    qLeft =[]                                 # 17 Product_QuantityLeft
    shipFrom = []                             # 18 Product_ShippedFrom
    shipTo = []                               # 19 Product_ShippedTo
    image = []                                # 20 Product_Image
    image_vendor = []                         # 21 Vendor_Image
    href = []                                 # 22 Product_Links

    listings = soup.findAll('div', {"class": "col-md-4 my-md-0 my-2 col-12"})

    # Populating the Number of Products
    nm = len(listing)
    nm = len(listings)

    for a in listing:
    for listing in listings:

        a = listing.find('div', {"class": "card-body"})
        row = a.findAll('p')

        # vendor
        try:
            temp = row[3].text
            vendor.append(cleanString(temp.strip()))
        except:
            vendor.append("-1")
            print('vendor')
        temp = row[3].text
        temp = temp.replace("Vendor:", "")
        vendor.append(cleanString(temp.strip()))

        image_vendor.append("-1")

        # vendor rating
        rating_vendor.append("-1")
@ -206,25 +182,22 @@ def lionmarketplace_listing_parser(soup):
        success.append("-1")

        # product name
        try:
            temp = a.find('a').text
            name.append(cleanString(temp.strip()))
        except:
            name.append("-1")
            print('product name')
        temp = a.find('a').text
        name.append(cleanString(temp.strip()))

        # Finding Product Image
        product_image = listing.find('img', {'class': 'card-img-top rounded'})
        product_image = product_image.get('src')
        product_image = product_image.split('base64,')[-1]
        image.append(product_image)

        CVE.append('-1')
        MS.append('-1')

        # product category
        try:
            temp = row[2].text
            if "Category: " in temp:
                temp = temp.replace("Category: ", "")
            category.append(cleanString(temp.strip()))

        except:
            print("Error in product category")
        temp = row[2].text
        temp = temp.replace("Category: ", "")
        category.append(cleanString(temp.strip()))

        describe.append('-1')

@ -238,14 +211,10 @@ def lionmarketplace_listing_parser(soup):
        BTC.append('-1')

        # USD
        try:
            temp = row[0].find('strong').text
            if ' $' in temp:
                temp = temp.replace(" $", "")
            USD.append(cleanString(temp.strip()))  # 14 Product_USD_SellingPrice
        except:
            print("USD")
            USD.append("-1")
        temp = row[0].find('strong').text
        if ' $' in temp:
            temp = temp.replace(" $", "")
        USD.append(cleanString(temp.strip()))  # 14 Product_USD_SellingPrice

        EURO.append("-1")  # 15 Product_EURO_SellingPrice

@ -257,15 +226,12 @@ def lionmarketplace_listing_parser(soup):
        shipTo.append('-1')  # 19 Product_ShippedTo

        # href
        try:
            temp = a.find('a').get('href')
            href.append(temp)
        except:
            print('product name')
        temp = a.find('a').get('href')
        href.append(temp)

    # Populate the final variable (this should be a list with all fields scraped)
    return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)


 #called by the crawler to get description links on a listing page
@ -276,9 +242,10 @@ def lionmarketplace_links_parser(soup):
    # Returning all links that should be visited by the Crawler

    href = []
    listing = soup.findAll('div', {"class": "container d-flex justify-content-center"})
    listings = soup.findAll('div', {"class": "col-md-4 my-md-0 my-2 col-12"})

    for a in listing:
    for listing in listings:
        a = listing.find('div', {"class": "card-body"})
        bae = a.find('a', href=True)
        link = bae['href']
        href.append(link)
--- a/MarketPlaces/M00nkeyMarket/crawler_selenium.py
+++ b/MarketPlaces/M00nkeyMarket/crawler_selenium.py
@ -33,7 +33,6 @@ MARKET_NAME = 'M00nkeyMarket'
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    driver = getAccess()

    if driver != 'down':
@ -42,25 +41,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(MARKET_NAME, BASE_URL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 # def getMKTName():
@ -77,7 +62,7 @@ def opentor():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -104,7 +89,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -159,7 +144,7 @@ def login(driver):

    # wait for listing page show up (This Xpath may need to change based on different seed url)
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, "/html/body/div/div[1]/div/div/div[2]/div[3]/div")))
        (By.XPATH, "/html/body/div/div[2]/div/div/div/div/div/div[1]/a/img")))

 # Saves the crawled html page, makes the directory path for html pages if not made
 def savePage(driver, page, url):
--- a/MarketPlaces/MetaVerseMarket/crawler_selenium.py
+++ b/MarketPlaces/MetaVerseMarket/crawler_selenium.py
@ -0,0 +1,291 @@
 __author__ = 'Helium'

 '''
 MetaVerseMarket Marketplace Crawler (Selenium)
 not complete
 need to go through multiple pages...
 '''

 from selenium import webdriver
 from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
 from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
 from selenium.webdriver.firefox.service import Service
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.common.by import By

 from PIL import Image
 import urllib.parse as urlparse
 import os, re, time
 from datetime import date
 import subprocess
 import configparser
 from bs4 import BeautifulSoup
 from MarketPlaces.Initialization.prepare_parser import new_parse
 from MarketPlaces.MetaVerseMarket.parser import metaversemarket_links_parser
 from MarketPlaces.Utilities.utilities import cleanHTML

 counter = 1
 baseURL = 'http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/login'


 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    mktName = getMKTName()
    driver = getAccess()

    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
    name = 'MetaVerseMarket'
    return name


 # Return the base link of the website
 #return: url of base site in string type
 def getFixedURL():
    url = 'http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/login'
    return url


 # Closes Tor Browser
 #@param: current selenium driver
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
    print('Closing Tor...')
    driver.close()
    time.sleep(3)
    return


 # Creates FireFox 'driver' and configure its 'Profile'
 # to use Tor proxy and socket
 def createFFDriver():
    from MarketPlaces.Initialization.markets_mining import config

    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))

    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
    ff_prof.set_preference("places.history.enabled", False)
    ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
    ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
    ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
    ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
    ff_prof.set_preference("signon.rememberSignons", False)
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference('network.proxy.type', 1)
    ff_prof.set_preference("network.proxy.socks_version", 5)
    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
    ff_prof.set_preference('network.proxy.socks_port', 9150)
    ff_prof.set_preference('network.proxy.socks_remote_dns', True)
    ff_prof.set_preference("javascript.enabled", False)
    ff_prof.update_preferences()

    service = Service(config.get('TOR', 'geckodriver_path'))

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver


 #the driver 'gets' the url, attempting to get on the site, if it can't access return 'down'
 #return: return the selenium driver or string 'down'
 def getAccess():
    url = getFixedURL()
    driver = createFFDriver()
    try:
        driver.get(url)
        return driver
    except:
        driver.close()
        return 'down'


 # Manual captcha solver, waits fora specific element so that the whole page loads, finds the input box, gets screenshot of captcha
 # then allows for manual solving of captcha in the terminal
 #@param: current selenium web driver
 def login(driver):

    # entering username and password into input boxes
    usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
    # Username here
    usernameBox.send_keys('metotomoto')
    passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="password"]')
    # Password here
    passwordBox.send_keys('lionking_kumba1ya')

    input("Press ENTER when CAPTCHA is completed and you exit the newsletter\n")

    # wait for listing page show up (This Xpath may need to change based on different seed url)
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, '//*[@id="searchq"]')))

 # Saves the crawled html page, makes the directory path for html pages if not made
 def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
    filePath = getFullPathName(url)
    os.makedirs(os.path.dirname(filePath), exist_ok=True)
    open(filePath, 'wb').write(cleanPage.encode('utf-8'))
    return


 # Gets the full path of the page to be saved along with its appropriate file name
 #@param: raw url as crawler crawls through every site
 def getFullPathName(url):
    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE

    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
    else:
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
    return fullPath


 # Creates the file name from passed URL, gives distinct name if can't be made unique after cleaned
 #@param: raw url as crawler crawls through every site
 def getNameFromURL(url):
    global counter
    name = ''.join(e for e in url if e.isalnum())
    if (name == ''):
        name = str(counter)
        counter = counter + 1
    return name


 # returns list of urls, here is where you can list the different urls of interest, the crawler runs through this list
 #in this example, there are a couple of categories some threads fall under such as
 # Guides and Tutorials, Digital Products, and Software and Malware
 #as you can see they are categories of products
 def getInterestedLinks():
    links = []

    # hacking
    links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hacking')
    # hosting
    links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hosting')
    # hacking guides and tutorials
    links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hacking-guides-and-tutorials')

    return links


 # gets links of interest to crawl through, iterates through list, where each link is clicked and crawled through
 #topic and description pages are crawled through here, where both types of pages are saved
 #@param: selenium driver
 def crawlForum(driver):
    print("Crawling the MetaVerse market")

    linksToCrawl = getInterestedLinks()

    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(driver, html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver, driver.page_source, item)
                    driver.back()

                    # comment out
                    break

                # comment out
                if count == 1:
                    break

                try:
                    link = driver.find_element(by=By.XPATH, value='//a[@class="page-link-next"]').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    count += 1

                except NoSuchElementException:
                    has_next_page = False

        except Exception as e:
            print(link, e)
        i += 1

    print("Crawling the MetaVerse market done.")


 # Returns 'True' if the link is a description link
 #@param: url of any url crawled
 #return: true if is a description page, false if not
 def isDescriptionLink(url):
    if 'PR' in url:
        return True
    return False


 # Returns True if the link is a listingPage link
 #@param: url of any url crawled
 #return: true if is a Listing page, false if not
 def isListingLink(url):
    if 'products' in url:
        return True
    return False


 # calling the parser to define the links, the html is the url of a link from the list of interested link list
 #@param: link from interested link list ie. getInterestingLinks()
 #return: list of description links that should be crawled through
 def productPages(html):
    soup = BeautifulSoup(html, "html.parser")
    return metaversemarket_links_parser(soup)


 # Drop links that "signout"
 # def isSignOut(url):
 #     #absURL = urlparse.urljoin(url.base_url, url.url)
 #     if 'signout' in url.lower() or 'logout' in url.lower():
 #         return True
 #
 #     return False


 def crawler():
    startCrawling()
    # print("Crawling and Parsing MetaVerseMarket .... DONE!")
--- a/MarketPlaces/MetaVerseMarket/parser.py
+++ b/MarketPlaces/MetaVerseMarket/parser.py
@ -0,0 +1,285 @@
 __author__ = 'DarkWeb'

 # Here, we are importing the auxiliary functions to clean or convert data
 from MarketPlaces.Utilities.utilities import *

 # Here, we are importing BeautifulSoup to search through the HTML tree
 from bs4 import BeautifulSoup


 # parses description pages, so takes html pages of description pages using soup object, and parses it for info it needs
 # stores info it needs in different lists, these lists are returned after being organized
 # @param: soup object looking at html page of description page
 # return: 'row' that contains a variety of lists that each hold info on the description page
 def darkfox_description_parser(soup):
    # Fields to be parsed

    name = "-1"  # 0 Product_Name
    describe = "-1"  # 1 Product_Description
    lastSeen = "-1"  # 2 Product_LastViewDate
    CVE = "-1"  # 4 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = "-1"  # 5 Product_MS_Classification (Microsoft Security)
    review = "-1"  # 6 Product_Number_Of_Reviews
    category = "-1"  # 7 Product_Category
    shipFrom = "-1"  # 8 Product_ShippedFrom
    shipTo = "-1"  # 9 Product_ShippedTo
    left = "-1"  # 10 Product_QuantityLeft
    escrow = "-1"  # 11 Vendor_Warranty
    terms = "-1"  # 12 Vendor_TermsAndConditions
    vendor = "-1"  # 13 Vendor_Name
    sold = "-1"  # 14 Product_QuantitySold
    addDate = "-1"  # 15 Product_AddedDate
    BTC = "-1"  # 18 Product_BTC_SellingPrice
    USD = "-1"  # 19 Product_USD_SellingPrice
    rating = "-1"  # 20 Vendor_Rating
    success = "-1"  # 21 Vendor_Successful_Transactions
    EURO = "-1"  # 22 Product_EURO_SellingPrice

    # Finding Product Name
    name = soup.find('h1').text
    name = name.replace('\n', ' ')
    name = name.replace(",", "")
    name = name.strip()

    # Finding Vendor
    vendor = soup.find('h3').find('a').text.strip()

    # Finding Vendor Rating
    rating = soup.find('span', {'class': "tag is-dark"}).text.strip()

    # Finding Successful Transactions
    success = soup.find('h3').text
    success = success.replace("Vendor: ", "")
    success = success.replace(vendor, "")
    success = success.replace("(", "")
    success = success.replace(")", "")
    success = success.strip()

    bae = soup.find('div', {'class': "box"}).find_all('ul')

    # Finding Prices
    USD = bae[1].find('strong').text.strip()

    li = bae[2].find_all('li')

    # Finding Escrow
    escrow = li[0].find('span', {'class': "tag is-dark"}).text.strip()

    # Finding the Product Category
    category = li[1].find('span', {'class': "tag is-dark"}).text.strip()

    # Finding the Product Quantity Available
    left = li[3].find('span', {'class': "tag is-dark"}).text.strip()

    # Finding Number Sold
    sold = li[4].find('span', {'class': "tag is-dark"}).text.strip()

    li = bae[3].find_all('li')

    # Finding Shipment Information (Origin)
    if "Ships from:" in li[-2].text:
        shipFrom = li[-2].text
        shipFrom = shipFrom.replace("Ships from: ", "")
        # shipFrom = shipFrom.replace(",", "")
        shipFrom = shipFrom.strip()

    # Finding Shipment Information (Destination)
    shipTo = li[-1].find('div', {'title': "List of countries is scrollable"}).text
    shipTo = shipTo.replace("Ships to: ", "")
    shipTo = shipTo.strip()
    if "certain countries" in shipTo:
        countries = ""
        tags = li[-1].find_all('span', {'class': "tag"})
        for tag in tags:
            country = tag.text.strip()
            countries += country + ", "
        shipTo = countries.strip(", ")

    # Finding the Product description
    describe = soup.find('div', {'class': "pre-line"}).text
    describe = describe.replace("\n", " ")
    describe = describe.strip()

    '''# Finding the Number of Product Reviews
    tag = soup.findAll(text=re.compile('Reviews'))
    for index in tag:
        reviews = index
        par = reviews.find('(')
        if par >=0:
            reviews = reviews.replace("Reviews (","")
            reviews = reviews.replace(")","")
            reviews = reviews.split(",")
            review = str(abs(int(reviews[0])) + abs(int(reviews[1])))
        else  :
            review = "-1"'''

    # Searching for CVE and MS categories
    cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
    if cve:
        CVE = " "
        for idx in cve:
            CVE += (idx)
            CVE += "  "
            CVE = CVE.replace(',', ' ')
            CVE = CVE.replace('\n', '')
    ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}'))
    if ms:
        MS = " "
        for im in ms:
            MS += (im)
            MS += " "
            MS = MS.replace(',', ' ')
            MS = MS.replace('\n', '')

    # Populating the final variable (this should be a list with all fields scraped)
    row = (name, describe, lastSeen, CVE, MS, review, category, shipFrom, shipTo, left, escrow, terms, vendor,
           sold, addDate, BTC, USD, rating, success, EURO)

    # Sending the results
    return row


 # parses listing pages, so takes html pages of listing pages using soup object, and parses it for info it needs
 # stores info it needs in different lists, these lists are returned after being organized
 # @param: soup object looking at html page of listing page
 # return: 'row' that contains a variety of lists that each hold info on the listing page
 def darkfox_listing_parser(soup):
    # Fields to be parsed
    nm = 0  # Total_Products (Should be Integer)
    mktName = "DarkFox"  # 0 Marketplace_Name
    name = []  # 1 Product_Name
    CVE = []  # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = []  # 3 Product_MS_Classification (Microsoft Security)
    category = []  # 4 Product_Category
    describe = []  # 5 Product_Description
    escrow = []  # 6 Vendor_Warranty
    views = []  # 7 Product_Number_Of_Views
    reviews = []  # 8 Product_Number_Of_Reviews
    addDate = []  # 9 Product_AddDate
    lastSeen = []  # 10 Product_LastViewDate
    BTC = []  # 11 Product_BTC_SellingPrice
    USD = []  # 12 Product_USD_SellingPrice
    EURO = []  # 13 Product_EURO_SellingPrice
    sold = []  # 14 Product_QuantitySold
    qLeft = []  # 15 Product_QuantityLeft
    shipFrom = []  # 16 Product_ShippedFrom
    shipTo = []  # 17 Product_ShippedTo
    vendor = []  # 18 Vendor
    rating = []  # 19 Vendor_Rating
    success = []  # 20 Vendor_Successful_Transactions
    href = []  # 23 Product_Links (Urls)

    listing = soup.findAll('div', {"class": "card"})

    # Populating the Number of Products
    nm = len(listing)

    for a in listing:
        bae = a.findAll('a', href=True)

        # Adding the url to the list of urls
        link = bae[0].get('href')
        link = cleanLink(link)
        href.append(link)

        # Finding the Product
        product = bae[1].find('p').text
        product = product.replace('\n', ' ')
        product = product.replace(",", "")
        product = product.replace("...", "")
        product = product.strip()
        name.append(product)

        bae = a.find('div', {'class': "media-content"}).find('div').find_all('div')

        if len(bae) >= 5:
            # Finding Prices
            price = bae[0].text
            ud = price.replace(" USD", " ")
            # u = ud.replace("$","")
            u = ud.replace(",", "")
            u = u.strip()
            USD.append(u)
            # bc = (prc[1]).strip(' BTC')
            # BTC.append(bc)

            # Finding the Vendor
            vendor_name = bae[1].find('a').text
            vendor_name = vendor_name.replace(",", "")
            vendor_name = vendor_name.strip()
            vendor.append(vendor_name)

            # Finding the Category
            cat = bae[2].find('small').text
            cat = cat.replace("Category: ", "")
            cat = cat.replace(",", "")
            cat = cat.strip()
            category.append(cat)

            # Finding Number Sold and Quantity Left
            num = bae[3].text
            num = num.replace("Sold: ", "")
            num = num.strip()
            sold.append(num)

            quant = bae[4].find('small').text
            quant = quant.replace("In stock: ", "")
            quant = quant.strip()
            qLeft.append(quant)

            # Finding Successful Transactions
            freq = bae[1].text
            freq = freq.replace(vendor_name, "")
            freq = re.sub(r'Vendor Level \d+', "", freq)
            freq = freq.replace("(", "")
            freq = freq.replace(")", "")
            freq = freq.strip()
            success.append(freq)

        # Searching for CVE and MS categories
        cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
        if not cve:
            cveValue = "-1"
        else:
            cee = " "
            for idx in cve:
                cee += (idx)
                cee += "  "
                cee = cee.replace(',', ' ')
                cee = cee.replace('\n', '')
            cveValue = cee
        CVE.append(cveValue)

        ms = a.findAll(text=re.compile('MS\d{2}-\d{3}'))
        if not ms:
            MSValue = "-1"
        else:
            me = " "
            for im in ms:
                me += (im)
                me += " "
                me = me.replace(',', ' ')
                me = me.replace('\n', '')
            MSValue = me
        MS.append(MSValue)

    # Populate the final variable (this should be a list with all fields scraped)
    return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen,
                            BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href)


 # called by the crawler to get description links on a listing page
 # @param: beautifulsoup object that is using the correct html page (listing page)
 # return: list of description links from a listing page
 def metaversemarket_links_parser(soup):
    # Returning all links that should be visited by the Crawler

    href = []
    listing = soup.findAll('div', {"class": "col-12 p-0"})

    for a in listing:
        bae = a.find('a', href=True)
        link = bae['href']
        href.append(link)

    return href
--- a/MarketPlaces/MikesGrandStore/crawler_selenium.py
+++ b/MarketPlaces/MikesGrandStore/crawler_selenium.py
@ -31,7 +31,6 @@ baseURL = 'http://4yx2akutmkhwfgzlpdxiah7cknurw6vlddlq24fxa3r3ebophwgpvhyd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -41,25 +40,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -76,7 +61,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -103,7 +88,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -145,6 +130,7 @@ def login(driver):
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, "/html/body/div[1]/header/div/div[3]/div/div/ul/li[6]/a")))


 # Saves the crawled html page, makes the directory path for html pages if not made
 def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
--- a/MarketPlaces/Nexus/crawler_selenium.py
+++ b/MarketPlaces/Nexus/crawler_selenium.py
@ -31,7 +31,6 @@ baseURL = 'http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -40,22 +39,10 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)

 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return

 # Returns the name of the website
 #return: name of site in string type
@ -73,7 +60,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -100,7 +87,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 2)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -133,6 +120,7 @@ def getAccess():
        driver.close()
        return 'down'


 def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
    filePath = getFullPathName(url)
@ -173,16 +161,24 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []

    # Bot nets
    links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/botnets/')
    # # Rats
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/rats/')
    # # Ransomware
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/ransomware/')
    # # Other Malware
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/outros-malware/')
    # # Hacking Tools & Scripting
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/hacking-spam/ferramentas-de-hacking-scripts/')
    # malware
    links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/')
    # # hacking-spam
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/hacking-spam/')
    # # hacking services
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/hacking/')
    # # programming services
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/programacao/')
    # # remote admin services
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/administracao-remota/')
    # # hacking guides
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-hacking/')
    # # malware guides
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-malware/')
    # # fraud guides
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-fraudes/')
    # # fraud software
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/fraudes/software-de-fraude/')

    return links

@ -222,7 +218,7 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    break
                    # break

                    # comment out
                if count == 1:
--- a/MarketPlaces/Nexus/parser.py
+++ b/MarketPlaces/Nexus/parser.py
@ -15,25 +15,28 @@ import re
 def nexus_description_parser(soup):

    # Fields to be parsed
    vendor = "-1"  # 0 *Vendor_Name
    success = "-1"  # 1 Vendor_Successful_Transactions
    rating_vendor = "-1"  # 2 Vendor_Rating
    name = "-1"  # 3 *Product_Name
    describe = "-1"  # 4 Product_Description
    CVE = "-1"  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much
    MS = "-1"  # 6 Product_MS_Classification (Microsoft Security) dont worry about that much
    category = "-1"  # 7 Product_Category
    views = "-1"  # 8 Product_Number_Of_Views
    reviews = "-1"  # 9 Product_Number_Of_Reviews
    rating_item = "-1"  # 10 Product_Rating
    addDate = "-1"  # 11 Product_AddedDate
    BTC = "-1"  # 12 Product_BTC_SellingPrice
    USD = "-1"  # 13 Product_USD_SellingPrice
    EURO = "-1"  # 14 Product_EURO_SellingPrice
    sold = "-1"  # 15 Product_QuantitySold
    left = "-1"  # 16 Product_QuantityLeft
    shipFrom = "-1"  # 17 Product_ShippedFrom
    shipTo = "-1"  # 18 Product_ShippedTo

    vendor = "-1"                       # 0 *Vendor_Name
    success = "-1"                      # 1 Vendor_Successful_Transactions
    rating_vendor = "-1"                # 2 Vendor_Rating
    name = "-1"                         # 3 *Product_Name
    describe = "-1"                     # 4 Product_Description
    CVE = "-1"                          # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = "-1"                           # 6 Product_MS_Classification (Microsoft Security)
    category = "-1"                     # 7 Product_Category
    views = "-1"                        # 8 Product_Number_Of_Views
    reviews = "-1"                      # 9 Product_Number_Of_Reviews
    rating_item = "-1"                  # 10 Product_Rating
    addDate = "-1"                      # 11 Product_AddedDate
    BTC = "-1"                          # 12 Product_BTC_SellingPrice
    USD = "-1"                          # 13 Product_USD_SellingPrice
    EURO = "-1"                         # 14 Product_EURO_SellingPrice
    sold = "-1"                         # 15 Product_QuantitySold
    left = "-1"                         # 16 Product_QuantityLeft
    shipFrom = "-1"                     # 17 Product_ShippedFrom
    shipTo = "-1"                       # 18 Product_ShippedTo
    image = "-1"                        # 19 Product_Image
    vendor_image = "-1"                 # 20 Vendor_Image


    #finding the name of the product
@ -48,6 +51,10 @@ def nexus_description_parser(soup):
    else:
        describe = cleanString(description_div.text.strip())

    # Finding Product Image
    image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
    image = image.get('src')
    image = image.split('base64,')[-1]

    #find the category of the product
    name_of_category = soup.find("span", {"class": "posted_in"}).find("a").text
@ -64,7 +71,7 @@ def nexus_description_parser(soup):

    # Populating the final variable (this should be a list with all fields scraped)
    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
           BTC, USD, EURO, sold, left, shipFrom, shipTo)
           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)


    # Sending the results
@ -78,28 +85,31 @@ def nexus_description_parser(soup):
 def nexus_listing_parser(soup):

    # Fields to be parsed
    nm = 0  # *Total_Products (Should be Integer)
    mktName = "Nexus"  # 0 *Marketplace_Name
    vendor = []  # 1 *Vendor y
    rating_vendor = []  # 2 Vendor_Rating
    success = []  # 3 Vendor_Successful_Transactions
    name = []  # 4 *Product_Name y
    CVE = []  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = []  # 6 Product_MS_Classification (Microsoft Security)
    category = []  # 7 Product_Category y
    describe = []  # 8 Product_Description
    views = []  # 9 Product_Number_Of_Views
    reviews = []  # 10 Product_Number_Of_Reviews
    rating_item = []  # 11 Product_Rating
    addDate = []  # 12 Product_AddDate
    BTC = []  # 13 Product_BTC_SellingPrice
    USD = []  # 14 Product_USD_SellingPrice y
    EURO = []  # 15 Product_EURO_SellingPrice
    sold = []  # 16 Product_QuantitySold
    qLeft = []  # 17 Product_QuantityLeft
    shipFrom = []  # 18 Product_ShippedFrom
    shipTo = []  # 19 Product_ShippedTo
    href = []  # 20 Product_Links
    nm = 0                                    # *Total_Products (Should be Integer)
    mktName = "Nexus"                         # 0 *Marketplace_Name
    vendor = []                               # 1 *Vendor y
    rating_vendor = []                        # 2 Vendor_Rating
    success = []                              # 3 Vendor_Successful_Transactions
    name = []                                 # 4 *Product_Name y
    CVE = []                                  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = []                                   # 6 Product_MS_Classification (Microsoft Security)
    category = []                             # 7 Product_Category y
    describe = []                             # 8 Product_Description
    views = []                                # 9 Product_Number_Of_Views
    reviews = []                              # 10 Product_Number_Of_Reviews
    rating_item = []                          # 11 Product_Rating
    addDate = []                              # 12 Product_AddDate
    BTC = []                                  # 13 Product_BTC_SellingPrice
    USD = []                                  # 14 Product_USD_SellingPrice y
    EURO = []                                 # 15 Product_EURO_SellingPrice
    sold = []                                 # 16 Product_QuantitySold
    qLeft =[]                                 # 17 Product_QuantityLeft
    shipFrom = []                             # 18 Product_ShippedFrom
    shipTo = []                               # 19 Product_ShippedTo
    image = []                                # 20 Product_Image
    image_vendor = []                         # 21 Vendor_Image
    href = []                                 # 22 Product_Links

    products_list = soup.find_all('li')
    nm = 0
    for product in products_list:
@ -117,10 +127,18 @@ def nexus_listing_parser(soup):
                print("I can't find the link")
                raise e

            # Finding Product Image
            product_image = product.find('a', {'class': 'woocommerce-loop-image-link woocommerce-LoopProduct-link woocommerce-loop-product__link'}).find('img')
            product_image = product_image.get('src')
            product_image = product_image.split('base64,')[-1]
            image.append(product_image)

            BTC.append("-1")

            #everything else appends a -1
            rating_vendor.append("-1")
            USD.append("-1")
            vendor.append("-1")
            vendor.append(mktName)
            success.append("-1")
            CVE.append("-1")
            MS.append("-1")
@ -129,12 +147,12 @@ def nexus_listing_parser(soup):
            views.append("-1")
            reviews.append("-1")
            addDate.append("-1")
            BTC.append("-1")
            EURO.append("-1")
            sold.append("-1")
            qLeft.append("-1")
            shipFrom.append("-1")
            shipTo.append("-1")
            image_vendor.append("-1")
            # print("Done! moving onto the next product!")
            # print(len(shipTo))
            nm += 1
@ -145,7 +163,7 @@ def nexus_listing_parser(soup):

    # Populate the final variable (this should be a list with all fields scraped)
    return organizeProducts(
        marketplace = "Nexus",
        marketplace = mktName,
        nm = nm,
        vendor = vendor,
        rating_vendor = rating_vendor,
@ -166,7 +184,9 @@ def nexus_listing_parser(soup):
        qLeft = qLeft,
        shipFrom = shipFrom,
        shipTo = shipTo,
        href = href
        href = href,
        image = image,
        image_vendor = image_vendor
    )


--- a/MarketPlaces/PabloEscobarMarket/crawler_selenium.py
+++ b/MarketPlaces/PabloEscobarMarket/crawler_selenium.py
@ -0,0 +1,256 @@
 __author__ = 'DarkWeb'

 '''
 PabloEscobarMarket Marketplace Crawler (Selenium)
 '''

 from selenium import webdriver
 from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
 from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
 from selenium.webdriver.firefox.service import Service
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait

 from PIL import Image
 import urllib.parse as urlparse
 import os, re, time
 import subprocess
 from bs4 import BeautifulSoup
 from MarketPlaces.Initialization.prepare_parser import new_parse
 from MarketPlaces.PabloEscobarMarket.parser import pabloescobarmarket_links_parser
 from MarketPlaces.Utilities.utilities import cleanHTML

 counter = 1
 baseURL = 'http://niejmptjzwhlfywruoab4pbuxg7kp2mtcr4c6mgpeykju5matewg36yd.onion/'


 # Opens Tor Browser, crawls the website
 def startCrawling():
    mktName = getMKTName()
    driver = getAccess()

    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Login using premade account credentials and do login captcha manually
 def login(driver):
    input("Press ENTER when CAPTCHA is complete and login page has loaded\n")

    # entering username and password into input boxes
    usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
    # Username here
    usernameBox.send_keys('snorlaxrights')
    passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="inputPassword3"]')
    # Password here
    passwordBox.send_keys('$noringAllday')

    input("Press ENTER when CAPTCHA is completed\n")

    # wait for listing page show up (This Xpath may need to change based on different seed url)
    # wait for 50 sec until id = tab_content is found, then cont
    WebDriverWait(driver, 50).until(EC.visibility_of_element_located(
        (By.XPATH, '//*[@id="collapse3"]')))


 # Returns the name of the website
 def getMKTName() -> str:
    name = 'PabloEscobarMarket'
    return name


 # Return the link of the website
 def getFixedURL():
    url = 'http://niejmptjzwhlfywruoab4pbuxg7kp2mtcr4c6mgpeykju5matewg36yd.onion/'
    return url


 # Closes Tor Browser
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
    print('Closing Tor...')
    driver.close() #close tab
    time.sleep(3)
    return


 # Creates FireFox 'driver' and configure its 'Profile'
 # to use Tor proxy and socket
 def createFFDriver():
    from MarketPlaces.Initialization.markets_mining import config

    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))

    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
    ff_prof.set_preference("places.history.enabled", False)
    ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
    ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
    ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
    ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
    ff_prof.set_preference("signon.rememberSignons", False)
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference('network.proxy.type', 1)
    ff_prof.set_preference("network.proxy.socks_version", 5)
    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
    ff_prof.set_preference('network.proxy.socks_port', 9150)
    ff_prof.set_preference('network.proxy.socks_remote_dns', True)
    ff_prof.set_preference("javascript.enabled", True)
    ff_prof.update_preferences()

    service = Service(config.get('TOR', 'geckodriver_path'))

    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)

    driver.maximize_window()

    return driver


 def getAccess():
    url = getFixedURL()
    driver = createFFDriver()
    try:
        driver.get(url)
        return driver
    except:
        driver.close()
        return 'down'


 # Saves the crawled html page
 def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
    filePath = getFullPathName(url)
    os.makedirs(os.path.dirname(filePath), exist_ok=True)
    open(filePath, 'wb').write(cleanPage.encode('utf-8'))
    return


 # Gets the full path of the page to be saved along with its appropriate file name
 def getFullPathName(url):
    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE

    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
    else:
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
    return fullPath


 # Creates the file name from passed URL
 def getNameFromURL(url):
    global counter
    name = ''.join(e for e in url if e.isalnum())
    if name == '':
        name = str(counter)
        counter = counter + 1
    return name

 # FIX
 def getInterestedLinks():
    links = []

    # # hire hacker
    # links.append('http://niejmptjzwhlfywruoab4pbuxg7kp2mtcr4c6mgpeykju5matewg36yd.onion/?sub_id=36')
    # hacker
    links.append('http://niejmptjzwhlfywruoab4pbuxg7kp2mtcr4c6mgpeykju5matewg36yd.onion/?sub_id=34')

    return links


 def crawlForum(driver):
    print("Crawling the PabloEscobarMarket market")

    linksToCrawl = getInterestedLinks()

    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(driver, html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver, driver.page_source, item)
                    driver.back()

                    # comment out
                    # break

                # comment out
                if count == 1:
                    break

                try:
                    link = driver.find_element(by=By.XPATH, value='//a[@rel="next"]').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    count += 1

                except NoSuchElementException:
                    has_next_page = False

        except Exception as e:
            print(link, e)
        i += 1

    print("Crawling the PabloEscobarMarket market done.")


 # Returns 'True' if the link is Topic link, may need to change for every website
 def isDescriptionLink(url):
    if 'single_product' in url:
        return True
    return False


 # Returns True if the link is a listingPage link, may need to change for every website
 def isListingLink(url):
    if 'sub_id' in url:
        return True
    return False


 # calling the parser to define the links
 def productPages(html):
    soup = BeautifulSoup(html, "html.parser")
    return pabloescobarmarket_links_parser(soup)


 def crawler():
    startCrawling()
    # print("Crawling and Parsing PabloEscobarMarket .... DONE!")
--- a/MarketPlaces/PabloEscobarMarket/parser.py
+++ b/MarketPlaces/PabloEscobarMarket/parser.py
@ -0,0 +1,241 @@
 __author__ = 'DarkWeb'

 # Here, we are importing the auxiliary functions to clean or convert data
 from MarketPlaces.Utilities.utilities import *

 # Here, we are importing BeautifulSoup to search through the HTML tree
 from bs4 import BeautifulSoup


 # parses description pages, so takes html pages of description pages using soup object, and parses it for info it needs
 # stores info it needs in different lists, these lists are returned after being organized
 # @param: soup object looking at html page of description page
 # return: 'row' that contains a variety of lists that each hold info on the description page
 def pabloescobarmarket_description_parser(soup):
    # Fields to be parsed

    name = "-1"  # 0 Product_Name
    describe = "-1"  # 1 Product_Description
    lastSeen = "-1"  # 2 Product_LastViewDate
    CVE = "-1"  # 4 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = "-1"  # 5 Product_MS_Classification (Microsoft Security)
    review = "-1"  # 6 Product_Number_Of_Reviews
    category = "-1"  # 7 Product_Category
    shipFrom = "-1"  # 8 Product_ShippedFrom
    shipTo = "-1"  # 9 Product_ShippedTo
    left = "-1"  # 10 Product_QuantityLeft
    escrow = "-1"  # 11 Vendor_Warranty
    terms = "-1"  # 12 Vendor_TermsAndConditions
    vendor = "-1"  # 13 Vendor_Name
    sold = "-1"  # 14 Product_QuantitySold
    addDate = "-1"  # 15 Product_AddedDate
    BTC = "-1"  # 18 Product_BTC_SellingPrice
    USD = "-1"  # 19 Product_USD_SellingPrice
    rating = "-1"  # 20 Vendor_Rating
    success = "-1"  # 21 Vendor_Successful_Transactions
    EURO = "-1"  # 22 Product_EURO_SellingPrice

    # Finding Product Name
    # NA

    divmd7 = soup.find('div', {'class': "col-md-7"})
    ptag = soup.findAll('p')
    # Finding Vendor
    vendor = divmd7.find('a').text.strip()

    # Finding Vendor Rating
    # NA

    # Finding Successful Transactions
    success = soup.find('span', {'class': "badge-primary"})

    # Finding Prices
    USD = soup.find('span', {'class': "total"}).text.strip()

    BTC = soup.find('div', {'class': "text-center"}).text.strip()

    # Finding Escrow
    escrow = ptag[-1].text.strip()

    # Finding the Product Category
    category = ptag[-2].text.strip()

    # Finding the Product Quantity Available
    # NA

    # Finding Number Sold
    # NA

    # Finding Shipment Information (Origin)
    # NA

    # Finding Shipment Information (Destination)
    # NA

    # Finding the Product description
    describe = soup.find('div', {'class': "text-white"}).text
    describe = describe.replace("\n", " ")
    describe = describe.strip()

    '''# Finding the Number of Product Reviews
    tag = soup.findAll(text=re.compile('Reviews'))
    for index in tag:
        reviews = index
        par = reviews.find('(')
        if par >=0:
            reviews = reviews.replace("Reviews (","")
            reviews = reviews.replace(")","")
            reviews = reviews.split(",")
            review = str(abs(int(reviews[0])) + abs(int(reviews[1])))
        else  :
            review = "-1"'''

    # Searching for CVE and MS categories
    cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
    if cve:
        CVE = " "
        for idx in cve:
            CVE += (idx)
            CVE += "  "
            CVE = CVE.replace(',', ' ')
            CVE = CVE.replace('\n', '')
    ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}'))
    if ms:
        MS = " "
        for im in ms:
            MS += (im)
            MS += " "
            MS = MS.replace(',', ' ')
            MS = MS.replace('\n', '')

    # Populating the final variable (this should be a list with all fields scraped)
    row = (name, describe, lastSeen, CVE, MS, review, category, shipFrom, shipTo, left, escrow, terms, vendor,
           sold, addDate, BTC, USD, rating, success, EURO)

    # Sending the results
    return row


 # parses listing pages, so takes html pages of listing pages using soup object, and parses it for info it needs
 # stores info it needs in different lists, these lists are returned after being organized
 # @param: soup object looking at html page of listing page
 # return: 'row' that contains a variety of lists that each hold info on the listing page
 def pabloescobarmarket_listing_parser(soup):
    # Fields to be parsed
    nm = 0  # Total_Products (Should be Integer)
    mktName = "PabloEscobarMarket"  # 0 Marketplace_Name
    name = []  # 1 Product_Name
    CVE = []  # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures)
    MS = []  # 3 Product_MS_Classification (Microsoft Security)
    category = []  # 4 Product_Category
    describe = []  # 5 Product_Description
    escrow = []  # 6 Vendor_Warranty
    views = []  # 7 Product_Number_Of_Views
    reviews = []  # 8 Product_Number_Of_Reviews
    addDate = []  # 9 Product_AddDate
    lastSeen = []  # 10 Product_LastViewDate
    BTC = []  # 11 Product_BTC_SellingPrice
    USD = []  # 12 Product_USD_SellingPrice
    EURO = []  # 13 Product_EURO_SellingPrice
    sold = []  # 14 Product_QuantitySold
    qLeft = []  # 15 Product_QuantityLeft
    shipFrom = []  # 16 Product_ShippedFrom
    shipTo = []  # 17 Product_ShippedTo
    vendor = []  # 18 Vendor
    rating = []  # 19 Vendor_Rating
    success = []  # 20 Vendor_Successful_Transactions
    href = []  # 23 Product_Links (Urls)

    listing = soup.findAll('div', {"class": "p-4"})

    # Populating the Number of Products
    nm = len(listing)

    for a in listing:
        bae = a.findAll('a', href=True)

        # Adding the url to the list of urls
        link = bae[0].get('href')
        link = cleanLink(link)
        href.append(link)

        # Finding the Product
        product = a.find('h4').text
        product = product.replace('\n', ' ')
        product = product.replace(",", "")
        product = product.replace("...", "")
        product = product.strip()
        name.append(product)


        # Finding Prices
        price = a.find('div', {"class": "price"}).text
        tempUSD = price.split("~")[0]
        tempUSD = tempUSD.replace("$", "")
        tempUSD = tempUSD.strip()
        USD.append(tempUSD)

        tempBTC = price.split("~")[1]
        tempBTC = tempBTC.replace("BTC", "")
        tempBTC = tempBTC.strip()
        BTC.append(tempBTC)

        # Finding the Vendor
        #NA

        # Finding the Category
        # NA

        # Finding Number Sold and Quantity Left
        # NA

        # Finding Successful Transactions
        # NA

        # Searching for CVE and MS categories
        cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
        if not cve:
            cveValue = "-1"
        else:
            cee = " "
            for idx in cve:
                cee += (idx)
                cee += "  "
                cee = cee.replace(',', ' ')
                cee = cee.replace('\n', '')
            cveValue = cee
        CVE.append(cveValue)

        ms = a.findAll(text=re.compile('MS\d{2}-\d{3}'))
        if not ms:
            MSValue = "-1"
        else:
            me = " "
            for im in ms:
                me += (im)
                me += " "
                me = me.replace(',', ' ')
                me = me.replace('\n', '')
            MSValue = me
        MS.append(MSValue)

    # Populate the final variable (this should be a list with all fields scraped)
    return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen,
                            BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href)


 # called by the crawler to get description links on a listing page
 # @param: beautifulsoup object that is using the correct html page (listing page)
 # return: list of description links from a listing page FIX
 def pabloescobarmarket_links_parser(soup):
    # Returning all links that should be visited by the Crawler

    href = []
    listing = soup.findAll('div', {"class": "p-4"})

    for a in listing:
        bae = a.find('a', href=True)
        link = bae['href']
        href.append(link)

    return href
--- a/MarketPlaces/RobinhoodMarket/crawler_selenium.py
+++ b/MarketPlaces/RobinhoodMarket/crawler_selenium.py
@ -29,9 +29,6 @@ baseURL = 'http://ilr3qzubfnx33vbhal7l5coo4ftqlkv2tboph4ujog5crz6m5ua2b2ad.onion

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # Opening tor beforehand gives "Tor exited during startup error"
    # opentor()

    marketName = getMKTName()

    driver = getAccess()
@ -45,24 +42,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(marketName, baseURL, True)


 # Opens Tor Browser
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Login is not needed in Robinhood
 def login(driver):
    pass
@ -82,7 +66,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -178,8 +162,8 @@ def getInterestedLinks():

    # Hacking
    links.append('http://ilr3qzubfnx33vbhal7l5coo4ftqlkv2tboph4ujog5crz6m5ua2b2ad.onion/product-category/hacking/')
    # # Other Software
    # links.append('http://ilr3qzubfnx33vbhal7l5coo4ftqlkv2tboph4ujog5crz6m5ua2b2ad.onion/product-category/other-software/')
    # Other Software
    links.append('http://ilr3qzubfnx33vbhal7l5coo4ftqlkv2tboph4ujog5crz6m5ua2b2ad.onion/product-category/other-software/')

    return links

@ -207,7 +191,7 @@ def crawlForum(driver):
                savePage(driver, html, link)

                list = productPages(html)
                for item in list:
                for c, item in enumerate(list):

                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
@ -218,11 +202,12 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    break
                    # if c == 4:
                    #     break

                # comment out
                if count == 1:
                    break
                # if count == 1:
                #     break

                # go to next page of market
                try:
@ -266,5 +251,6 @@ def crawler():
    startCrawling()
    # print("Crawling and Parsing BestCardingWorld .... DONE!")


 if __name__ == '__main__':
    startCrawling()
    startCrawling()
--- a/MarketPlaces/RobinhoodMarket/parser.py
+++ b/MarketPlaces/RobinhoodMarket/parser.py
@ -39,6 +39,8 @@ def Robinhood_description_parser(soup):
    left = "-1"                         # 16 Product_QuantityLeft
    shipFrom = "-1"                     # 17 Product_ShippedFrom
    shipTo = "-1"                       # 18 Product_ShippedTo
    image = "-1"                        # 19 Product_Image
    vendor_image = "-1"                 # 20 Vendor_Image

    # Finding Product Name
    name = soup.find('h1').text
@ -48,16 +50,17 @@ def Robinhood_description_parser(soup):

    # Finding description
    desc = ''
    primary = soup.find('div', {'id': 'primary'})
    product = primary.findAll('div')[1]
    commerce = product.findAll('div', recursive=False)[2]
    descDiv = commerce.findAll('div')[0]
    # descDiv = soup.find('div', {'class': 'woocommerce-Tabs-panel woocommerce-Tabs-panel--description panel entry-content wc-tab'})

    descText = descDiv.findAll('p')
    for para in descText:
        desc = desc + para.text
    describe = desc
    tab = soup.find('div', {"id": "tab-description"})
    for p in tab.findAll('p'):
        desc += p.text
    if desc == '':
        desc = soup.find('div', {"class": "woocommerce-product-details__short-description"}).text
    describe = cleanString(desc.strip())

    # Finding Product Image
    image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
    image = image.get('src')
    image = image.split('base64,')[-1]

    # Finding Vendor
    vendor = soup.find('a', {'class': 'wcfm_dashboard_item_title'}).text
@ -65,6 +68,11 @@ def Robinhood_description_parser(soup):
    vendor = vendor.replace("Sold by:", "")
    vendor = vendor.strip()

    # Finding Vendor Image
    vendor_image = soup.find('div', {'class': 'wcfmmp_sold_by_container_left'}).find('img')
    vendor_image = vendor_image.get('src')
    vendor_image = vendor_image.split('base64,')[-1]

    # Finding Category
    catSpan = soup.find('span', {'class': 'posted_in'})
    category = catSpan.find('a').text
@ -93,7 +101,7 @@ def Robinhood_description_parser(soup):

    # Populating the final variable (this should be a list with all fields scraped)
    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
           BTC, USD, EURO, sold, left, shipFrom, shipTo)
           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)

    # Sending the results
    return row
@ -124,7 +132,9 @@ def Robinhood_listing_parser(soup):
    qLeft =[]                                 # 17 Product_QuantityLeft
    shipFrom = []                             # 18 Product_ShippedFrom
    shipTo = []                               # 19 Product_ShippedTo
    href = []                                 # 20 Product_Links
    image = []                                # 20 Product_Image
    image_vendor = []                         # 21 Vendor_Image
    href = []                                 # 22 Product_Links

    listing = soup.find('ul', {"class": "products columns-4"})
    items = listing.findAll('li')
@ -153,6 +163,12 @@ def Robinhood_listing_parser(soup):
        product = product.strip()
        name.append(product)

        # Finding Product Image
        product_image = card.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'})
        product_image = product_image.get('src')
        product_image = product_image.split('base64,')[-1]
        image.append(product_image)

        info = card.find('div', {'class': 'wcfmmp_sold_by_container'})

        # Finding Vendor
@ -161,6 +177,12 @@ def Robinhood_listing_parser(soup):
        vendor_name = vendor_name.strip()
        vendor.append(vendor_name)

        # Finding Vendor Image
        vendor_icon = info.find('img', {'class', 'wcfmmp_sold_by_logo'})
        vendor_icon = vendor_icon.get('src')
        vendor_icon = vendor_icon.split('base64,')[-1]
        image_vendor.append(vendor_icon)

        # Finding USD
        span = card.find('span', {'class': 'price'})
        if span is not None:
@ -198,13 +220,12 @@ def Robinhood_listing_parser(soup):
            MSValue=me
        MS.append(MSValue)


    #print(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
    #                        reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)

    # Populate the final variable (this should be a list with all fields scraped)
    return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)


 def Robinhood_links_parser(soup):
--- a/MarketPlaces/ThiefWorld/crawler_selenium.py
+++ b/MarketPlaces/ThiefWorld/crawler_selenium.py
@ -32,7 +32,6 @@ baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -42,25 +41,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -77,7 +62,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -104,7 +89,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -144,7 +129,7 @@ def getAccess():
 def login(driver):
    # wait for  page to show up (This Xpath may need to change based on different seed url)
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, "/html/body/div/div[1]/div/div[1]/div[1]/ul")))
        (By.XPATH, "/html/body/div/header/div[2]/div/nav/div[2]/a[1]")))

    temp = driver.find_element(By.XPATH, '/html/body/div/header/div[2]/div/nav/div[2]/a[1]').get_attribute(
        'href')  # /html/body/div/div[2]/div/div[2]/div
@ -242,7 +227,7 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/MarketPlaces/ThiefWorld/parser.py
+++ b/MarketPlaces/ThiefWorld/parser.py
@ -53,7 +53,7 @@ def thiefWorld_description_parser(soup: BeautifulSoup) -> Tuple:
    USD = cleanString(usdText.replace("USD", "").strip())
    
    ratingDiv = soup.find('div', {'class': 'rating_star'})
    rating_vendor = ratingDiv.get('title').strip(' ')[1]
    rating_vendor = ratingDiv.get('title').split(' ')[1]
    
    # Populating the final variable (this should be a list with all fields scraped)
    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
--- a/MarketPlaces/Tor2door/crawler_selenium.py
+++ b/MarketPlaces/Tor2door/crawler_selenium.py
@ -29,7 +29,6 @@ baseURL = 'http://yzrrne3pveltulbavydr2kiashvlnysdwclwmklo6cyjuqpxi7ku4xqd.onion

 # Opens Tor Browser, crawls the website
 def startCrawling():
    # opentor()
    marketName = getMKTName()
    driver = getAccess()
    
@ -39,24 +38,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)
    
    new_parse(marketName, baseURL, True)


 # Opens Tor Browser
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Login using premade account credentials and do login captcha manually
 def login(driver):
    #wait for login page
@ -118,7 +104,7 @@ def getFixedURL():


 # Closes Tor Browser
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
--- a/MarketPlaces/TorBay/crawler_selenium.py
+++ b/MarketPlaces/TorBay/crawler_selenium.py
@ -32,7 +32,6 @@ baseURL = 'http://torbay3253zck4ym5cbowwvrbfjjzruzthrx3np5y6owvifrnhy5ybid.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -42,25 +41,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -77,7 +62,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -124,6 +109,7 @@ def createFFDriver():

    return driver


 #the driver 'gets' the url, attempting to get on the site, if it can't access return 'down'
 #return: return the selenium driver or string 'down'
 def getAccess():
@ -228,7 +214,7 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
--- a/MarketPlaces/TorMarket/crawler_selenium.py
+++ b/MarketPlaces/TorMarket/crawler_selenium.py
@ -31,35 +31,19 @@ baseURL = 'http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

    if driver != 'down':
        try:
            # login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -76,7 +60,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -101,9 +85,9 @@ def createFFDriver():
    ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
    ff_prof.set_preference("signon.rememberSignons", False)
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 1)
    # ff_prof.set_preference("network.dns.disablePrefetch", True)
    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -186,12 +170,12 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []

    # # Hacking Tutorials
    # links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/guides-tutorials/hacking/')
    # # Tutorials
    # links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/guides-tutorials/')
    # Malware
    links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/malware/')
    # # Hacking Services
    # links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/services/hacking-services/')
    # # Services
    # links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/services/')

    return links

@ -238,8 +222,7 @@ def crawlForum(driver):
                    break

                try:
                    link = driver.find_element(by=By.XPATH, value=
                        '/html/body/div[2]/div/div/div[1]/main/nav/ul/li[5]/a').get_attribute('href')
                    link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    count += 1
--- a/MarketPlaces/TorMarket/parser.py
+++ b/MarketPlaces/TorMarket/parser.py
@ -104,61 +104,58 @@ def tormarket_listing_parser(soup):
    shipFrom = []  # 18 Product_ShippedFrom
    shipTo = []  # 19 Product_ShippedTo
    href = []  # 20 Product_Links
    products_list = soup.find_all('li')
    nm = 0

    products_list = soup.find('ul', {"class": "products columns-3 tablet-columns-2 mobile-columns-1"}).find_all('li')
    nm = len(products_list)

    for product in products_list:
        # Finding the name of the product
        name_of_product = product.find("h2", {"class": "woocommerce-loop-product__title"}).find("a").text
        name_of_product_cleaned = cleanString(name_of_product.strip())
        # print(name_of_product_cleaned)
        name.append(name_of_product_cleaned)
        #finding the URL
        try:
            # Finding the name of the product
            name_of_product = product.find("h2", {"class": "woocommerce-loop-product__title"}).find("a").text
            name_of_product_cleaned = cleanString(name_of_product.strip())
            print(name_of_product_cleaned)
            name.append(name_of_product_cleaned)
            #finding the URL
            try:
                url = product.find("div", {"class": "product-loop-content text-center"}).find("a").get("href")
                print(url)
                href.append(url)
            except AttributeError as e:
                print("I can't find the link")
                raise e

            #finding the rating of the product
            rating_score_of_product = product.find("div", {"class": "product-loop-content text-center"}).find("div").find("span").text
            rating_item.append(cleanString(rating_score_of_product.strip()))
            print("done")
            #finding the rating of the vendors
            rating_score_of_vendor = product.find("div", {"class": "wcfmmp-store-rating"}).find("strong").text
            rating_vendor.append(cleanString(rating_score_of_vendor.strip()))
            print("done")
            #finding the cost in USD
            cost = product.find("span", {"class": "woocommerce-Price-amount amount"}).text
            USD.append(cost)
            print("done")
            #finding the name of the vendor
            vendor_name = product.find("div", {"class": "wcfmmp_sold_by_wrapper"}).find("a").text
            vendor.append(cleanString(vendor_name.strip()))
            print("done")
            #everything else appends a -1
            success.append("-1")
            CVE.append("-1")
            MS.append("-1")
            category.append("-1")
            describe.append("-1")
            views.append("-1")
            reviews.append("-1")
            addDate.append("-1")
            BTC.append("-1")
            EURO.append("-1")
            sold.append("-1")
            qLeft.append("-1")
            shipFrom.append("-1")
            shipTo.append("-1")
            print("Done! moving onto the next product!")
            print(len(shipTo))
            nm += 1
            url = product.find("div", {"class": "product-loop-content text-center"}).find("a").get("href")
            # print(url)
            href.append(url)
        except AttributeError as e:
            print("I'm somewhere I don't belong. I'm going to leave")
            continue
            print("I can't find the link")
            raise e

        #finding the rating of the product
        rating_score_of_product = product.find("div", {"class": "product-loop-content text-center"}).find("div").find("span").text
        rating_item.append(cleanString(rating_score_of_product.strip()))
        # print("done")
        #finding the rating of the vendors
        rating_score_of_vendor = product.find("div", {"class": "wcfmmp-store-rating"}).find("strong").text
        rating_vendor.append(cleanString(rating_score_of_vendor.strip()))
        # print("done")
        #finding the cost in USD
        cost = product.find("span", {"class": "woocommerce-Price-amount amount"}).text
        USD.append(cost)
        # print("done")
        #finding the name of the vendor
        vendor_name = product.find("div", {"class": "wcfmmp_sold_by_wrapper"}).find("a").text
        vendor.append(cleanString(vendor_name.strip()))
        # print("done")
        #everything else appends a -1
        success.append("-1")
        CVE.append("-1")
        MS.append("-1")
        category.append("-1")
        describe.append("-1")
        views.append("-1")
        reviews.append("-1")
        addDate.append("-1")
        BTC.append("-1")
        EURO.append("-1")
        sold.append("-1")
        qLeft.append("-1")
        shipFrom.append("-1")
        shipTo.append("-1")
        # print("Done! moving onto the next product!")
        # print(len(shipTo))


    # Populate the final variable (this should be a list with all fields scraped)
--- a/MarketPlaces/Utilities/utilities.py
+++ b/MarketPlaces/Utilities/utilities.py
@ -242,7 +242,7 @@ def cleanLink(originalLink):


 def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nombre, CVE, MS, category, describe,
                     views, reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href):
                     views, reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor):

    rw = []

@ -291,9 +291,13 @@ def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nom
        lne += ","
        lne += "-1" if len(shipTo) == 0 else shipTo[n]                          # 19
        lne += ","
        lne += "-1" if len(href) == 0 else href[n]                              # 20
        lne += "-1" if len(image) == 0 else image[n]                            # 20
        lne += ","
        lne += day + " " + ahora                                                # 21
        lne += "-1" if len(image_vendor) == 0 else image_vendor[n]              # 21
        lne += ","
        lne += "-1" if len(href) == 0 else href[n]                              # 22
        lne += ","
        lne += day + " " + ahora                                                # 23

        rw.append(lne)

@ -363,29 +367,24 @@ def encrypt_encode_image_to_base64(driver, xpath):
    return None


 def decode_decrypt_image_in_base64(html_content):

    soup = BeautifulSoup(html_content, 'html.parser')

    for img_tag in soup.find_all('img'):
 def decode_decrypt_image_in_base64(string_image):

        src_attr = img_tag.get('src')
    try:

        if src_attr and src_attr.startswith('data:image'):
        base64_image = bytes(string_image, encoding='utf-8')
        encrypted_image = base64.b64decode(base64_image)
        decrypted_image = aes_decryption(encrypted_image)

            try:
        im = Image.open(io.BytesIO(decrypted_image))
        im.show()

                string_image = src_attr.split('base64,')[-1]
                base64_image = bytes(string_image, encoding='utf-8')
                encrypted_image = base64.b64decode(base64_image)
                decrypted_image = aes_decryption(encrypted_image)
        return decrypted_image

                im = Image.open(io.BytesIO(decrypted_image))
                im.show()
    except Exception as e:
        print(e)
        pass

            except Exception as e:
                print(e)
                pass
    return None


 def replace_image_sources(driver, html_content):
@ -415,7 +414,6 @@ def replace_image_sources(driver, html_content):
 def cleanHTML(driver, html):

    clean_html = replace_image_sources(driver, html)
    # decode_decrypt_image_in_base64(clean_html)

    formats = [
        "jpg", "jpeg", "jfif", "pjpeg", "pjp",
--- a/MarketPlaces/ViceCity/crawler_selenium.py
+++ b/MarketPlaces/ViceCity/crawler_selenium.py
@ -32,7 +32,6 @@ baseURL = 'http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    driver = getAccess()

@ -42,25 +41,11 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
        closeDriver(driver)

    new_parse(mktName, baseURL, True)


 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config

    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    time.sleep(7.5)
    input('Tor Connected. Press ENTER to continue\n')
    return


 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
@ -77,7 +62,7 @@ def getFixedURL():

 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
 def closeDriver(driver):
    # global pid
    # os.system("taskkill /pid " + str(pro.pid))
    # os.system("taskkill /t /f /im tor.exe")
@ -104,10 +89,10 @@ def createFFDriver():
    # ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    # ff_prof.set_preference("network.dns.disablePrefetch", True)
    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
    # ff_prof.set_preference("permissions.default.image", 3)
    # ff_prof.set_preference("browser.download.folderList", 2)
    # ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    # ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference("permissions.default.image", 3)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
    ff_prof.set_preference('network.proxy.type', 1)
    ff_prof.set_preference("network.proxy.socks_version", 5)
    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
@ -271,7 +256,7 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1: