From a567086dd60e11963a323c21561b1474957fb2f5 Mon Sep 17 00:00:00 2001
From: westernmeadow <wkwan626@gmail.com>
Date: Tue, 3 Oct 2023 15:53:04 -0700
Subject: [PATCH] added forum and market templates

---
 Forums/CryptBB/crawler_mechanize.py        | 257 ++++++++++++++++
 Forums/CryptBB/crawler_selenium.py         | 331 +++++++++++++++++++++
 Forums/CryptBB/parser.py                   | 282 ++++++++++++++++++
 MarketPlaces/DarkBazar/crawler_selenium.py | 262 ++++++++++++++++
 MarketPlaces/DarkBazar/parser.py           | 289 ++++++++++++++++++
 5 files changed, 1421 insertions(+)
 create mode 100644 Forums/CryptBB/crawler_mechanize.py
 create mode 100644 Forums/CryptBB/crawler_selenium.py
 create mode 100644 Forums/CryptBB/parser.py
 create mode 100644 MarketPlaces/DarkBazar/crawler_selenium.py
 create mode 100644 MarketPlaces/DarkBazar/parser.py

diff --git a/Forums/CryptBB/crawler_mechanize.py b/Forums/CryptBB/crawler_mechanize.py
new file mode 100644
index 0000000..7a763c6
--- /dev/null
+++ b/Forums/CryptBB/crawler_mechanize.py
@@ -0,0 +1,257 @@
+__author__ = '91Shadows'
+
+'''
+CryptBB Crawler (Mechanize)
+'''
+
+import codecs, os, re
+import socks, socket, time
+from datetime import date
+
+import urllib.parse as urlparse
+import http.client as httplib
+import mechanize
+import subprocess
+from bs4 import BeautifulSoup
+from Forums.Initialization.prepare_parser import new_parse
+from Forums.BestCardingWorld.parser import bestcardingworld_links_parser
+
+counter = 1
+httplib.HTTPConnection._http_vsn = 10
+httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
+baseURL = 'http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=42&sid=ee2cbfd73c12923d979790b2bb4bdfd5'
+socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150)
+
+
+# Opens Tor Browser, crawls the website
+def startCrawling():
+    opentor()
+    getUrl()
+    forumName = getForumName()
+    br = getAccess()
+
+    if br != 'down':
+        crawlForum(br)
+        new_parse(forumName, False)
+
+    # new_parse(forumName, False)
+
+    closetor()
+
+
+# Opens Tor Browser
+def opentor():
+    global pid
+    print("Connecting Tor...")
+    path = open('../../path.txt').readline()
+    pro = subprocess.Popen(path)
+    pid = pro.pid
+    time.sleep(7.5)
+    input("Tor Connected. Press ENTER to continue\n")
+    return
+
+
+# Creates a connection through Tor Port
+def getUrl(timeout=None):
+    socket.socket = socks.socksocket
+    socket.create_connection = create_connection
+    return
+
+
+# Makes the onion address request
+def create_connection(address, timeout=None, source_address=None):
+    sock = socks.socksocket()
+    sock.connect(address)
+    return sock
+
+
+# Returns the name of website
+def getForumName():
+    name = 'CryptBB'
+    return name
+
+
+# Return the link of website
+def getFixedURL():
+    url = 'http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=42&sid=ee2cbfd73c12923d979790b2bb4bdfd5'
+
+    return url
+
+
+# Closes Tor Browser
+def closetor():
+    global pid
+    os.system("taskkill /pid " + str(pid))
+    print('Closing Tor...')
+    time.sleep(3)
+    return
+
+
+# Creates a Mechanize browser and initializes its options
+def createBrowser():
+    br = mechanize.Browser()
+    cj = mechanize.CookieJar()
+    br.set_cookiejar(cj)
+
+    # Browser options
+    br.set_handle_equiv(True)
+    br.set_handle_redirect(True)
+    br.set_handle_referer(True)
+    br.set_handle_robots(False)
+    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
+    br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'),
+                     ('Accept', '*/*')]
+
+    return br
+
+
+def getAccess():
+    url = getFixedURL()
+    br = createBrowser()
+
+    try:
+
+        br.open(url)
+        return br
+
+    except:
+
+        return 'down'
+
+
+# Saves the crawled html page
+def savePage(page, url):
+    filePath = getFullPathName(url)
+    os.makedirs(os.path.dirname(filePath), exist_ok=True)
+    a = page.read()
+    open(filePath, "wb").write(a)
+    return
+
+
+# Gets the full path of the page to be saved along with its appropriate file name
+def getFullPathName(url):
+    fileName = getNameFromURL(url)
+    if isDescriptionLink(url):
+        fullPath = 'C:/Users/CALSysLab/Documents/threatIntelligence-main/DarkWebMining_Working/Forums/ThiefWorld/HTML_Pages/' + str(
+            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
+            "%04d" % date.today().year) + '/' + 'Description/' + fileName + '.html'
+    else:
+        fullPath = 'C:/Users/CALSysLab/Documents/threatIntelligence-main/DarkWebMining_Working/Forums/ThiefWorld/HTML_Pages/' + str(
+            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
+            "%04d" % date.today().year) + '/' + 'Listing/' + fileName + '.html'
+    return fullPath
+
+
+# Creates the name of the file based on URL
+def getNameFromURL(url):
+    global counter
+    name = ''.join(e for e in url if e.isalnum())
+    if (name == ''):
+        name = str(counter)
+        counter = counter + 1
+    return name
+
+
+# Hacking and Markets related topics
+def getInterestedLinks():
+    links = []
+
+    links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=43&sid=e12864ffccc5df877b03b573534955be')
+
+    return links
+
+
+# Start crawling Forum pages
+def crawlForum(br):
+    print("Crawling CryptBB forum")
+
+    linksToCrawl = getInterestedLinks()
+    visited = set(linksToCrawl)
+    initialTime = time.time()
+
+
+    i = 0
+    while i < len(linksToCrawl):
+        link = linksToCrawl[i]
+        print('Crawling :', link)
+        try:
+            page = br.open(link)
+            savePage(page, link)
+
+            res = br.response().read()
+            soup = BeautifulSoup(res, 'html.parser')
+
+            next_link = soup.find("a", {"rel": "next"})
+            if next_link != None:
+                full_url = urlparse.urljoin(linksToCrawl[i], next_link['href'])
+                linksToCrawl.insert(i + 1, full_url)
+
+            listOfTopics = findDescriptionPages(link)
+            for topic in listOfTopics:
+                itemPage = br.open(str(topic))
+                savePage(itemPage, topic)
+
+        except Exception as e:
+            print('Error getting link: ', link, e)
+        i += 1
+
+    # finalTime = time.time()
+    # print finalTime - initialTime
+
+    input("CryptBB forum done sucessfully. Press ENTER to continue\n")
+
+    return
+
+
+# Returns True if the link is 'Topic' Links, may need to change for diff websites
+def isDescriptionLink(url):
+    if 'topic' in url:
+        return True
+    return False
+
+
+# Returns True if the link is a listingPage link, may need to change for diff websites
+def isListingLink(url):
+    '''
+    reg = 'board=[0-9]+.[0-9]+\Z'
+    if len(re.findall(reg, url)) == 0:
+        return False
+    return True
+    '''
+    if 'forum' in url:
+        return True
+    return False
+
+
+# calling the parser to define the links
+def findDescriptionPages(url):
+    soup = ""
+
+    error = False
+    try:
+        html = codecs.open(
+            "C:\\Users\\CALSysLab\\Documents\\threatIntelligence-main\\DarkWebMining_Working\\Forums\\BestCardingWorld\\HTML_Pages\\" + str(
+                "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
+                "%04d" % date.today().year) + "\\Listing\\" + getNameFromURL(url) + ".html", encoding='utf8')
+        soup = BeautifulSoup(html, "html.parser")
+    except:
+        try:
+            html = open(
+                "C:\\Users\\CALSysLab\\Documents\\threatIntelligence-main\\DarkWebMining_Working\\Forums\\BestCardingWorld\\HTML_Pages\\" + str(
+                    "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
+                    "%04d" % date.today().year) + "\\Listing\\" + getNameFromURL(url) + ".html")
+            soup = BeautifulSoup(html, "html.parser")
+        except:
+            error = True
+            print("There was a problem to read the file " + getNameFromURL(url) + " in the listing section.")
+
+    if not error:
+        return bestcardingworld_links_parser(soup)
+
+    else:
+        return []
+
+
+def crawler():
+    startCrawling()
+    print("Crawling and Parsing CryptBB .... DONE!")
diff --git a/Forums/CryptBB/crawler_selenium.py b/Forums/CryptBB/crawler_selenium.py
new file mode 100644
index 0000000..1570f56
--- /dev/null
+++ b/Forums/CryptBB/crawler_selenium.py
@@ -0,0 +1,331 @@
+__author__ = 'DarkWeb'
+
+'''
+CryptBB Forum Crawler (Selenium)
+'''
+
+from selenium import webdriver
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
+from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
+from selenium.webdriver.firefox.service import Service
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+
+from PIL import Image
+import urllib.parse as urlparse
+import os, re, time
+import subprocess
+from bs4 import BeautifulSoup
+from Forums.Initialization.prepare_parser import new_parse
+from Forums.CryptBB.parser import cryptBB_links_parser
+from Forums.Utilities.utilities import cleanHTML
+
+counter = 1
+baseURL = 'http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/'
+
+
+# Opens Tor Browser, crawls the website
+def startCrawling():
+    forumName = getForumName()
+    driver = getAccess()
+
+    if driver != 'down':
+        try:
+            login(driver)
+            crawlForum(driver)
+        except Exception as e:
+            print(driver.current_url, e)
+        closeDriver(driver)
+
+    new_parse(forumName, baseURL, True)
+
+
+# Login using premade account credentials and do login captcha manually
+def login(driver):
+    #click login button
+    login_link = driver.find_element(
+                by=By.XPATH, value='/html/body/div/div[2]/div/table/tbody/tr[2]/td/center/pre/strong/a[1]').\
+                get_attribute('href')
+    driver.get(login_link)# open tab with url
+
+    #entering username and password into input boxes
+    usernameBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[2]/td[2]/input')
+    #Username here
+    usernameBox.send_keys('holyre')#sends string to the username box
+    passwordBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[3]/td[2]/input')
+    #Password here
+    passwordBox.send_keys('PlatinumBorn2')# sends string to passwordBox
+
+    '''
+    # wait for captcha page show up
+    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
+        (By.XPATH, "/html/body/div/div[2]/div/form/div/input")))
+
+    # save captcha to local
+    driver.find_element(by=By.XPATH, value='//*[@id="captcha_img"]').screenshot(r'..\CryptBB\captcha.png')
+
+    # This method will show image in any image viewer
+    im = Image.open(r'..\CryptBB\captcha.png')
+
+    im.show()
+
+    # wait until input space show up
+    inputBox = driver.find_element(by=By.XPATH, value='//*[@id="imagestring"]')
+
+
+    # ask user input captcha solution in terminal
+    userIn = input("Enter solution: ")
+
+    # send user solution into the input space
+    inputBox.send_keys(userIn)
+
+    # click the verify(submit) button
+    driver.find_element(by=By.XPATH, value="/html/body/div/div[2]/div/form/div/input").click()
+    '''
+    input("Press ENTER when CAPTCHA is completed\n")
+
+    # wait for listing page show up (This Xpath may need to change based on different seed url)
+    # wait for 50 sec until id = tab_content is found, then cont
+    WebDriverWait(driver, 50).until(EC.visibility_of_element_located(
+        (By.XPATH, '//*[@id="tab_content"]')))
+
+
+# Returns the name of the website
+def getForumName() -> str:
+    name = 'CryptBB'
+    return name
+
+
+# Return the link of the website
+def getFixedURL():
+    url = 'http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/'
+    return url
+
+
+# Closes Tor Browser
+def closeDriver(driver):
+    # global pid
+    # os.system("taskkill /pid " + str(pro.pid))
+    # os.system("taskkill /t /f /im tor.exe")
+    print('Closing Tor...')
+    driver.close() #close tab
+    time.sleep(3)
+    return
+
+
+# Creates FireFox 'driver' and configure its 'Profile'
+# to use Tor proxy and socket
+def createFFDriver():
+    from Forums.Initialization.forums_mining import config
+
+    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
+
+    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
+    ff_prof.set_preference("places.history.enabled", False)
+    ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
+    ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
+    ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
+    ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
+    ff_prof.set_preference("signon.rememberSignons", False)
+    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
+    ff_prof.set_preference("network.dns.disablePrefetch", True)
+    ff_prof.set_preference("network.http.sendRefererHeader", 0)
+    ff_prof.set_preference("permissions.default.image", 3)
+    ff_prof.set_preference("browser.download.folderList", 2)
+    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
+    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
+    ff_prof.set_preference('network.proxy.type', 1)
+    ff_prof.set_preference("network.proxy.socks_version", 5)
+    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
+    ff_prof.set_preference('network.proxy.socks_port', 9150)
+    ff_prof.set_preference('network.proxy.socks_remote_dns', True)
+    ff_prof.set_preference("javascript.enabled", True)
+    ff_prof.update_preferences()
+
+    service = Service(config.get('TOR', 'geckodriver_path'))
+
+    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
+
+    driver.maximize_window()
+
+    return driver
+
+
+def getAccess():
+    url = getFixedURL()
+    driver = createFFDriver()
+    try:
+        driver.get(url)
+        return driver
+    except:
+        driver.close()
+        return 'down'
+
+
+# Saves the crawled html page
+def savePage(driver, page, url):
+    cleanPage = cleanHTML(driver, page)
+    filePath = getFullPathName(url)
+    os.makedirs(os.path.dirname(filePath), exist_ok=True)
+    open(filePath, 'wb').write(cleanPage.encode('utf-8'))
+    return
+
+
+# Gets the full path of the page to be saved along with its appropriate file name
+def getFullPathName(url):
+    from Forums.Initialization.forums_mining import config, CURRENT_DATE
+
+    mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
+    fileName = getNameFromURL(url)
+    if not isListingLink(url):
+        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
+    else:
+        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
+    return fullPath
+
+
+# Creates the file name from passed URL
+def getNameFromURL(url):
+    global counter
+    name = ''.join(e for e in url if e.isalnum())
+    if name == '':
+        name = str(counter)
+        counter = counter + 1
+    return name
+
+
+def getInterestedLinks():
+    links = []
+
+    # Beginner Programming
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86')
+    # Beginner Carding and Fraud
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=91')
+    # Beginner Hacking
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=87')
+    # Newbie
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=84')
+    # Beginner Hardware
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=89')
+    # Training Challenges
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=96')
+    # Darknet Discussions
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=88')
+    # Public Leaks and Warez
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=97')
+    # Sell
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=44')
+
+    return links
+
+
+def crawlForum(driver):
+    print("Crawling the CryptBB forum")
+
+    linksToCrawl = getInterestedLinks()
+
+    i = 0
+    while i < len(linksToCrawl):
+        link = linksToCrawl[i]
+        print('Crawling :', link)
+        try:
+            has_next_page = True
+            count = 0
+
+            while has_next_page:
+                try:
+                    driver.get(link)
+                except:
+                    driver.refresh()
+                html = driver.page_source
+                savePage(driver, html, link)
+
+                topics = topicPages(html)
+                for topic in topics:
+                    has_next_topic_page = True
+                    counter = 1
+                    page = topic
+
+                    while has_next_topic_page:
+                        itemURL = urlparse.urljoin(baseURL, str(page))
+                        try:
+                            driver.get(itemURL)
+                        except:
+                            driver.refresh()
+                        
+                        if isListingLink(driver.current_url):
+                            break
+
+                        savePage(driver, driver.page_source, topic + f"page{counter}")  # very important
+
+                        # # comment out
+                        # if counter == 2:
+                        #     break
+
+                        try:
+                            temp = driver.find_element(By.XPATH, '/html/body/div/div[2]/div/div[2]/div')
+                            page = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')
+
+                            if page == "":
+                                raise NoSuchElementException
+                            counter += 1
+
+                        except NoSuchElementException:
+                            has_next_topic_page = False
+
+                    # making sure we go back to the listing page (browser back button simulation)
+                    try:
+                        driver.get(link)
+                    except:
+                        driver.refresh()
+
+                #     # comment out
+                #     break
+                #
+                # # comment out
+                # if count == 1:
+                #     break
+
+                try:
+                    temp = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/div[2]/div')
+                    link = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')
+
+                    if link == "":
+                        raise NoSuchElementException
+                    count += 1
+
+                except NoSuchElementException:
+                    has_next_page = False
+
+        except Exception as e:
+            print(link, e)
+        i += 1
+
+    print("Crawling the CrypttBB forum done.")
+
+
+# Returns 'True' if the link is Topic link, may need to change for every website
+def isDescriptionLink(url):
+    if 'thread' in url:
+        return True
+    return False
+
+
+# Returns True if the link is a listingPage link, may need to change for every website
+def isListingLink(url):
+    if '.onion/forumdisplay' in url:
+        return True
+    return False
+
+
+# calling the parser to define the links
+def topicPages(html):
+    soup = BeautifulSoup(html, "html.parser")
+    return cryptBB_links_parser(soup)
+
+
+def crawler():
+    startCrawling()
+    # print("Crawling and Parsing BestCardingWorld .... DONE!")
diff --git a/Forums/CryptBB/parser.py b/Forums/CryptBB/parser.py
new file mode 100644
index 0000000..1ac7bc6
--- /dev/null
+++ b/Forums/CryptBB/parser.py
@@ -0,0 +1,282 @@
+__author__ = 'DarkWeb'
+
+# Here, we are importing the auxiliary functions to clean or convert data
+from Forums.Utilities.utilities import *
+from datetime import date
+from datetime import timedelta
+import re
+
+# Here, we are importing BeautifulSoup to search through the HTML tree
+from bs4 import BeautifulSoup
+
+# This is the method to parse the Description Pages (one page to each topic in the Listing Pages)
+
+
+def cryptBB_description_parser(soup):
+
+    # Fields to be parsed
+
+    topic = "-1"            # 0 *topic name
+    user = []               # 1 *all users of each post
+    status = []             # 2 all user's authority in each post such as (adm, member, dangerous)
+    reputation = []         # 3 all user's karma in each post (usually found as a number)
+    interest = []           # 4 all user's interest in each post
+    sign = []               # 5 all user's signature in each post (usually a standard message after the content of the post)
+    post = []               # 6 all messages of each post
+    feedback = []           # 7 all feedbacks of each vendor (this was found in just one Forum and with a number format)
+    addDate = []            # 8 all dates of each post
+    image_user = []         # 9 all user avatars of each post
+    image_post = []         # 10 all first images of each post
+
+    # Finding the topic (should be just one coming from the Listing Page)
+
+    li = soup.find("td", {"class": "thead"}).find('strong')
+    topic = li.text
+    topic = re.sub("\[\w*\]", '', topic)
+
+    topic = topic.replace(",","")
+    topic = topic.replace("\n","")
+    topic = cleanString(topic.strip())
+
+    # Finding the repeated tag that corresponds to the listing of posts
+
+    posts = soup.find('table', {"class": "tborder tfixed clear"}).find('td', {"id": "posts_container"}).find_all(
+        'div', {"class": "post"})
+
+    # For each message (post), get all the fields we are interested to:
+
+    for ipost in posts:
+
+        if ipost.find('div', {"class": "deleted_post_author"}):
+            continue
+
+        # Finding a first level of the HTML page
+
+        post_wrapper = ipost.find('span', {"class": "largetext"})
+
+        # Finding the author (user) of the post
+
+        author = post_wrapper.text.strip()
+        user.append(cleanString(author))  # Remember to clean the problematic characters
+
+        # Finding the status of the author
+
+        smalltext = ipost.find('div', {"class": "post_author"})
+
+        if smalltext is not None:
+
+            # CryptBB does have membergroup and postgroup
+            membergroup = smalltext.find('div', {"class": "profile-rank"})
+            postgroup = smalltext.find('div', {"class": "postgroup"})
+            if membergroup != None:
+                membergroup = membergroup.text.strip()
+                if postgroup != None:
+                    postgroup = postgroup.text.strip()
+                    membergroup = membergroup + " - " + postgroup
+            else:
+                if postgroup != None:
+                    membergroup = postgroup.text.strip()
+                else:
+                    membergroup = "-1"
+            status.append(cleanString(membergroup))
+
+            # Finding the interest of the author
+            # CryptBB does not have blurb
+            blurb = smalltext.find('li', {"class": "blurb"})
+            if blurb != None:
+                blurb = blurb.text.strip()
+            else:
+                blurb = "-1"
+            interest.append(cleanString(blurb))
+
+            # Finding the reputation of the user
+            # CryptBB does have reputation
+            author_stats = smalltext.find('div', {"class": "author_statistics"})
+            karma = author_stats.find('strong')
+            if karma != None:
+                karma = karma.text
+                karma = karma.replace("Community Rating: ", "")
+                karma = karma.replace("Karma: ", "")
+                karma = karma.strip()
+            else:
+                karma = "-1"
+            reputation.append(cleanString(karma))
+
+        else:
+            status.append('-1')
+            interest.append('-1')
+            reputation.append('-1')
+
+        # Getting here another good tag to find the post date, post content and users' signature
+
+        postarea = ipost.find('div', {"class": "post_content"})
+
+        dt = postarea.find('span', {"class": "post_date"}).text
+        # dt = dt.strip().split()
+        dt = dt.strip()
+        day=date.today()
+        if "Today" in dt:
+            today = day.strftime('%m-%d-%Y')
+            stime = dt.replace('Today,','').strip()
+            date_time_obj = today + ', '+stime
+            date_time_obj = datetime.strptime(date_time_obj,'%m-%d-%Y, %I:%M %p')
+        elif "Yesterday" in dt:
+            yesterday = day - timedelta(days=1)
+            yesterday = yesterday.strftime('%m-%d-%Y')
+            stime = dt.replace('Yesterday,','').strip()
+            date_time_obj = yesterday + ', '+stime
+            date_time_obj = datetime.strptime(date_time_obj,'%m-%d-%Y, %I:%M %p')
+        elif "ago" in dt:
+            date_time_obj = postarea.find('span', {"class": "post_date"}).find('span')['title']
+            date_time_obj = datetime.strptime(date_time_obj, '%m-%d-%Y, %I:%M %p')
+        else:
+            date_time_obj = datetime.strptime(dt, '%m-%d-%Y, %I:%M %p')
+        addDate.append(date_time_obj)
+
+        # Finding the post
+
+        inner = postarea.find('div', {"class": "post_body scaleimages"})
+        quote = inner.find('blockquote')
+        if quote is not None:
+            quote.decompose()
+        inner = inner.text.strip()
+        post.append(cleanString(inner))
+
+        # Finding the user's signature
+
+        # signature = ipost.find('div', {"class": "post_wrapper"}).find('div', {"class": "moderatorbar"}).find('div', {"class": "signature"})
+        signature = ipost.find('div', {"class": "signature scaleimages"})
+        if signature != None:
+            signature = signature.text.strip()
+            # print(signature)
+        else:
+            signature = "-1"
+        sign.append(cleanString(signature))
+
+        # As no information about user's feedback was found, just assign "-1" to the variable
+
+        feedback.append("-1")
+
+        img = ipost.find('div', {"class": "post_body scaleimages"}).find('img')
+        if img is not None:
+            img = img.get('src').split('base64,')[-1]
+        else:
+            img = "-1"
+        image_post.append(img)
+
+        avatar = ipost.find('div', {"class": "author_avatar"})
+        if avatar is not None:
+           img = avatar.find('img')
+           if img is not None:
+              img = img.get('src').split('base64,')[-1]
+           else:
+               img = "-1"
+        else:
+            img = "-1"
+        image_user.append(img)
+
+    # Populate the final variable (this should be a list with all fields scraped)
+
+    row = (topic, user, status, reputation, interest, sign, post, feedback, addDate, image_user, image_post)
+
+    # Sending the results
+
+    return row
+
+# This is the method to parse the Listing Pages (one page with many posts)
+
+def cryptBB_listing_parser(soup):
+
+    nm = 0              # *this variable should receive the number of topics
+    forum = "CryptBB"   # 0 *forum name
+    board = "-1"        # 1 *board name (the previous level of the topic in the Forum categorization tree.
+                        # For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
+    author = []         # 2 *all authors of each topic
+    topic = []          # 3 *all topics
+    views = []          # 4 number of views of each topic
+    posts = []          # 5 number of posts of each topic
+    href = []           # 6 this variable should receive all cleaned urls (we will use this to do the marge between
+                        # Listing and Description pages)
+    addDate = []        # 7 when the topic was created (difficult to find)
+    image_author = []   # 8 all author avatars used in each topic
+
+
+    # Finding the board (should be just one)
+
+    board = soup.find('span', {"class": "active"}).text
+    board = cleanString(board.strip())
+
+    # Finding the repeated tag that corresponds to the listing of topics
+
+    itopics = soup.find_all('tr',{"class": "inline_row"})
+
+    # Counting how many topics
+
+    nm = len(itopics)
+
+    for itopic in itopics:
+
+        # For each topic found, the structure to get the rest of the information can be of two types. Testing all of them
+        # to don't miss any topic
+
+        # Adding the topic to the topic list
+        try:
+            topics = itopic.find('span', {"class": "subject_old"}).find('a').text
+        except:
+            topics = itopic.find('span', {"class": "subject_new"}).find('a').text
+        topics = re.sub("\[\w*\]", '', topics)
+        topic.append(cleanString(topics))
+
+        image_author.append(-1)
+
+        # Adding the url to the list of urls
+        try:
+            link = itopic.find('span', {"class": "subject_old"}).find('a').get('href')
+        except:
+            link = itopic.find('span',{"class": "subject_new"}).find('a').get('href')
+        href.append(link)
+
+        # Finding the author of the topic
+        ps = itopic.find('div', {"class":"author smalltext"}).text
+        user = ps.strip()
+        author.append(cleanString(user))
+
+        # Finding the number of replies
+        columns = itopic.findChildren('td',recursive=False)
+        replies = columns[3].text
+        if replies == '-':
+            posts.append('-1')
+        else:
+            posts.append(cleanString(replies))
+
+        # Finding the number of Views
+        tview = columns[4].text
+        if tview == '-':
+            views.append('-1')
+        else:
+            views.append(cleanString(tview))
+
+        # If no information about when the topic was added, just assign "-1" to the variable
+
+        addDate.append("-1")
+
+    return organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate, image_author)
+
+
+def cryptBB_links_parser(soup):
+
+    # Returning all links that should be visited by the Crawler
+
+    href = []
+
+    listing = soup.find('table', {"class": "tborder clear"}).find('tbody').find_all('tr', {"class": "inline_row"})
+
+    for a in listing:
+        try:
+            link = a.find('span', {"class": "subject_old"}).find('a').get('href')
+        except:
+            link = a.find('span', {"class": "subject_new"}).find('a').get('href')
+
+        href.append(link)
+
+    return href
diff --git a/MarketPlaces/DarkBazar/crawler_selenium.py b/MarketPlaces/DarkBazar/crawler_selenium.py
new file mode 100644
index 0000000..fdfb640
--- /dev/null
+++ b/MarketPlaces/DarkBazar/crawler_selenium.py
@@ -0,0 +1,262 @@
+__author__ = 'DarkWeb'
+
+'''
+DarkBazar Marketplace Crawler (Selenium)
+'''
+
+from selenium import webdriver
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
+from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
+from selenium.webdriver.firefox.service import Service
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support.ui import Select
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.by import By
+
+from PIL import Image
+import urllib.parse as urlparse
+import os, re, time
+from datetime import date
+import subprocess
+import configparser
+from bs4 import BeautifulSoup
+from MarketPlaces.Initialization.prepare_parser import new_parse
+from MarketPlaces.DarkBazar.parser import darkbazar_links_parser
+from MarketPlaces.Utilities.utilities import cleanHTML
+
+counter = 1
+baseURL = 'http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/'
+
+
+def startCrawling():
+    mktName = getMKTName()
+    driver = getAccess()
+
+    if driver != 'down':
+        try:
+            login(driver)
+            crawlForum(driver)
+        except Exception as e:
+            print(driver.current_url, e)
+        closeDriver(driver)
+
+    new_parse(mktName, baseURL, True)
+
+
+# Returns the name of the website
+def getMKTName():
+    name = 'DarkBazar'
+    return name
+
+
+# Return the base link of the website
+def getFixedURL():
+    url = 'http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/'
+    return url
+
+
+# Closes Tor Browser
+def closeDriver(driver):
+    # global pid
+    # os.system("taskkill /pid " + str(pro.pid))
+    # os.system("taskkill /t /f /im tor.exe")
+    print('Closing Tor...')
+    driver.close()
+    time.sleep(3)
+    return
+
+
+# Creates FireFox 'driver' and configure its 'Profile'
+# to use Tor proxy and socket
+def createFFDriver():
+    from MarketPlaces.Initialization.markets_mining import config
+
+    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
+
+    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
+    ff_prof.set_preference("places.history.enabled", False)
+    ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
+    ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
+    ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True)
+    ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
+    ff_prof.set_preference("signon.rememberSignons", False)
+    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
+    # ff_prof.set_preference("network.dns.disablePrefetch", True)
+    # ff_prof.set_preference("network.http.sendRefererHeader", 0)
+    ff_prof.set_preference("permissions.default.image", 3)
+    ff_prof.set_preference("browser.download.folderList", 2)
+    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
+    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
+    ff_prof.set_preference('network.proxy.type', 1)
+    ff_prof.set_preference("network.proxy.socks_version", 5)
+    ff_prof.set_preference('network.proxy.socks', '127.0.0.1')
+    ff_prof.set_preference('network.proxy.socks_port', 9150)
+    ff_prof.set_preference('network.proxy.socks_remote_dns', True)
+    ff_prof.set_preference("javascript.enabled", False)
+    ff_prof.update_preferences()
+
+    service = Service(config.get('TOR', 'geckodriver_path'))
+
+    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
+
+    driver.maximize_window()
+
+    return driver
+
+
+#the driver 'gets' the url, attempting to get on the site, if it can't access return 'down'
+def getAccess():
+    url = getFixedURL()
+    driver = createFFDriver()
+    try:
+        driver.get(url)
+        return driver
+    except:
+        driver.close()
+        return 'down'
+
+
+def login(driver):
+    input("Press ENTER when CAPTCHA is complete and login page has loaded\n")
+
+    # entering username and password into input boxes
+    usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="username"]')
+    # Username here
+    usernameBox.send_keys('aliciamykeys')
+    passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="password"]')
+    # Password here
+    passwordBox.send_keys('aliciawherearemykey$')
+    # session time
+    session_select = Select(driver.find_element(by=By.XPATH, value='/html/body/main/div/div/div/div/div/form/div[4]/div/div[2]/select'))
+    session_select.select_by_visible_text('Session 60min')
+
+    input("Press ENTER when CAPTCHA is completed and you exit the newsletter\n")
+
+    # wait for listing page show up (This Xpath may need to change based on different seed url)
+    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
+        (By.XPATH, '//*[@id="submit"]')))
+
+
+def savePage(driver, page, url):
+    cleanPage = cleanHTML(driver, page)
+    filePath = getFullPathName(url)
+    os.makedirs(os.path.dirname(filePath), exist_ok=True)
+    open(filePath, 'wb').write(cleanPage.encode('utf-8'))
+    return
+
+
+def getFullPathName(url):
+    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
+
+    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
+    fileName = getNameFromURL(url)
+    if isDescriptionLink(url):
+        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
+    else:
+        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
+    return fullPath
+
+
+def getMKTName() -> str:
+    name = 'DarkBazar'
+    return name
+
+
+def getNameFromURL(url):
+    global counter
+    name = ''.join(e for e in url if e.isalnum())
+    if name == '':
+        name = str(counter)
+        counter = counter + 1
+    return name
+
+
+def getInterestedLinks():
+    links = []
+
+    # Digital Goods
+    links.append('http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/cat.php?category=3')
+    # Services
+    links.append('http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/cat.php?category=5')
+
+    return links
+
+
+def crawlForum(driver):
+
+    print("Crawling the DarkBazar market")
+
+    linksToCrawl = getInterestedLinks()
+
+    i = 0
+    while i < len(linksToCrawl):
+        link = linksToCrawl[i]
+        print('Crawling :', link)
+        try:
+            has_next_page = True
+            count = 0
+
+            while has_next_page:
+                try:
+                    driver.get(link)
+                except:
+                    driver.refresh()
+                html = driver.page_source
+                savePage(driver, html, link)
+
+                list = productPages(html)
+
+                for item in list:
+                    itemURL = urlparse.urljoin(baseURL, str(item))
+                    try:
+                        driver.get(itemURL)
+                    except:
+                        driver.refresh()
+                    savePage(driver, driver.page_source, item)
+                    driver.back()
+
+                #     # comment out
+                #     break
+                #
+                # # comment out
+                # if count == 1:
+                #     break
+
+                try:
+                    link = driver.find_element(by=By.XPATH, value='//a[contains(text(), "Next")]').get_attribute('href')
+                    if link == "":
+                        raise NoSuchElementException
+                    count += 1
+
+                except NoSuchElementException:
+                    has_next_page = False
+
+        except Exception as e:
+            print(link, e)
+        i += 1
+
+        print("Crawling the DarkBazar market done.")
+
+
+# Returns 'True' if the link is Topic link, may need to change for every website
+def isDescriptionLink(url):
+    if 'item' in url:
+        return True
+    return False
+
+
+# Returns True if the link is a listingPage link, may need to change for every website
+def isListingLink(url):
+    if 'category=' in url:
+        return True
+    return False
+
+
+def productPages(html):
+    soup = BeautifulSoup(html, "html.parser")
+    return darkbazar_links_parser(soup)
+
+
+def crawler():
+    startCrawling()
diff --git a/MarketPlaces/DarkBazar/parser.py b/MarketPlaces/DarkBazar/parser.py
new file mode 100644
index 0000000..9386d18
--- /dev/null
+++ b/MarketPlaces/DarkBazar/parser.py
@@ -0,0 +1,289 @@
+__author__ = 'DarkWeb'
+
+# Here, we are importing the auxiliary functions to clean or convert data
+from MarketPlaces.Utilities.utilities import *
+
+# Here, we are importing BeautifulSoup to search through the HTML tree
+from bs4 import BeautifulSoup
+
+
+# parses description pages, so takes html pages of description pages using soup object, and parses it for info it needs
+# stores info it needs in different lists, these lists are returned after being organized
+# @param: soup object looking at html page of description page
+# return: 'row' that contains a variety of lists that each hold info on the description page
+def darkbazar_description_parser(soup):
+    # Fields to be parsed
+
+    vendor = "-1"                       # 0 *Vendor_Name
+    success = "-1"                      # 1 Vendor_Successful_Transactions
+    rating_vendor = "-1"                # 2 Vendor_Rating
+    name = "-1"                         # 3 *Product_Name
+    describe = "-1"                     # 4 Product_Description
+    CVE = "-1"                          # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
+    MS = "-1"                           # 6 Product_MS_Classification (Microsoft Security)
+    category = "-1"                     # 7 Product_Category
+    views = "-1"                        # 8 Product_Number_Of_Views
+    reviews = "-1"                      # 9 Product_Number_Of_Reviews
+    rating_item = "-1"                  # 10 Product_Rating
+    addDate = "-1"                      # 11 Product_AddedDate
+    BTC = "-1"                          # 12 Product_BTC_SellingPrice
+    USD = "-1"                          # 13 Product_USD_SellingPrice
+    EURO = "-1"                         # 14 Product_EURO_SellingPrice
+    sold = "-1"                         # 15 Product_QuantitySold
+    left = "-1"                         # 16 Product_QuantityLeft
+    shipFrom = "-1"                     # 17 Product_ShippedFrom
+    shipTo = "-1"                       # 18 Product_ShippedTo
+    image = "-1"                        # 19 Product_Image
+    vendor_image = "-1"                 # 20 Vendor_Image
+
+    # Finding Product Name
+    divmb = soup.findAll('div', {'class': "mb-1"})
+
+    name = divmb[0].text
+    name = name.replace('\n', ' ')
+    name = name.replace(",", "")
+    name = name.strip()
+
+    # Finding Vendor
+    vendor = divmb[1].find('a').text.strip()
+
+    # Finding Vendor Rating
+    temp = soup.find('div', {'class': ""}).text
+    temp = temp.split('(')
+    rating = temp[0].replace("Vendor's Review : ", "")
+    rating = rating.replace("%", "")
+    rating_vendor = rating.strip()
+
+    # Finding the Product Rating and Number of Product Reviews
+    reviews = temp[2].replace(" review)", "")
+    reviews = reviews.strip()
+
+    temp = temp[1].split(")")
+    rating = temp[1].replace("Product Review : ", "")
+    rating = rating.replace("%", "")
+    rating_item = rating.strip()
+
+    # Finding Prices
+    USD = soup.find('div', {'class': "h3 text-primary"}).text.strip()
+
+    # Finding the Product Category
+    pmb = soup.findAll('p', {'class': "mb-1"})
+
+    category = pmb[-1].text
+    category = category.replace("Category: ", "").strip()
+
+    # Finding the Product Quantity Available
+    left = divmb[-1].text
+    left = left.split(",", 1)[1]
+    left = left.replace("in stock", "")
+    left = left.strip()
+
+    # Finding Number Sold
+    sold = divmb[-1].text
+    sold = sold.split(",", 1)[0]
+    sold = sold.replace("sold", "")
+    sold = sold.strip()
+
+    # Finding Shipment Information (Origin)
+    pmb[0].text
+    shipFrom = shipFrom.replace("Ships from: ", "").strip()
+
+    # Finding Shipment Information (Destination)
+    pmb[1].text
+    shipTo = shipTo.replace("Ships to: ", "").strip()
+
+    # Finding the Product description
+    cardbody = soup.findAll('div', {'class': "card-body"})
+    describe = cardbody[1].text.strip()
+
+    # Finding Product Image
+    image = soup.find('div', {'class': 'product-primary'}).find('img')
+    image = image.get('src')
+    image = image.split('base64,')[-1]
+
+    # Searching for CVE and MS categories
+    cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
+    if cve:
+        CVE = " "
+        for idx in cve:
+            CVE += (idx)
+            CVE += "  "
+            CVE = CVE.replace(',', ' ')
+            CVE = CVE.replace('\n', '')
+    ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}'))
+    if ms:
+        MS = " "
+        for im in ms:
+            MS += (im)
+            MS += " "
+            MS = MS.replace(',', ' ')
+            MS = MS.replace('\n', '')
+
+    # Populating the final variable (this should be a list with all fields scraped)
+    row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
+           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
+
+    # Sending the results
+    return row
+
+
+# parses listing pages, so takes html pages of listing pages using soup object, and parses it for info it needs
+# stores info it needs in different lists, these lists are returned after being organized
+# @param: soup object looking at html page of listing page
+# return: 'row' that contains a variety of lists that each hold info on the listing page
+def darkbazar_listing_parser(soup):
+
+    # Fields to be parsed
+    nm = 0  # *Total_Products (Should be Integer)
+    mktName = "DarkBazar"                     # 0 *Marketplace_Name
+    vendor = []                               # 1 *Vendor y
+    rating_vendor = []                        # 2 Vendor_Rating
+    success = []                              # 3 Vendor_Successful_Transactions
+    name = []                                 # 4 *Product_Name y
+    CVE = []                                  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this
+    MS = []                                   # 6 Product_MS_Classification (Microsoft Security) dont worry about this
+    category = []                             # 7 Product_Category y
+    describe = []                             # 8 Product_Description
+    views = []                                # 9 Product_Number_Of_Views
+    reviews = []                              # 10 Product_Number_Of_Reviews
+    rating_item = []                          # 11 Product_Rating
+    addDate = []                              # 12 Product_AddDate
+    BTC = []                                  # 13 Product_BTC_SellingPrice
+    USD = []                                  # 14 Product_USD_SellingPrice y
+    EURO = []                                 # 15 Product_EURO_SellingPrice
+    sold = []                                 # 16 Product_QuantitySold
+    qLeft = []                                # 17 Product_QuantityLeft
+    shipFrom = []                             # 18 Product_ShippedFrom
+    shipTo = []                               # 19 Product_ShippedTo
+    image = []                                # 20 Product_Image
+    image_vendor = []                         # 21 Vendor_Image
+    href = []                                 # 22 Product_Links
+
+    listing = soup.findAll('div', {"id": "itembox"})
+
+    # Populating the Number of Products
+    nm = len(listing)
+
+    for a in listing:
+        bae = a.findAll('a', href=True)
+        lb = a.findAll('div', {"id": "littlebox"})
+
+        # Adding the url to the list of urls
+        link = bae[0].get('href')
+        link = cleanLink(link)
+        href.append(link)
+
+        # Finding the Product
+        product = lb[1].find('a').text
+        product = product.replace('\n', ' ')
+        product = product.replace(",", "")
+        product = product.replace("...", "")
+        product = product.strip()
+        name.append(product)
+
+        # Finding Product Image
+        product_image = a.find('img')
+        product_image = product_image.get('src')
+        product_image = product_image.split('base64,')[-1]
+        image.append(product_image)
+
+        # Finding Prices
+        price = lb[-1].find('div', {"class": "mb-1"}).text
+        price = price.replace("$","")
+        price = price.strip()
+        USD.append(price)
+
+        # Finding the Vendor
+        vendor_name = lb[-1].find("a").text
+        vendor_name = vendor_name.replace(",", "")
+        vendor_name = vendor_name.strip()
+        vendor.append(vendor_name)
+
+        image_vendor.append("-1")
+
+        # Finding the Category
+        cat = lb[-1].find("span").text
+        cat = cat.replace("class:", "")
+        cat = cat.strip()
+        category.append(cat)
+
+        span = lb[0].findAll("span")
+
+        # Finding Number of Views
+        num = span[0].text
+        num = num.replace("views:", "")
+        num = num.strip()
+        sold.append(num)
+
+        # Finding Number Sold
+        num = span[2].text
+        num = num.replace("Sold:", "")
+        num = num.strip()
+        sold.append(num)
+
+        # Finding Quantity Left
+        quant = span[1].text
+        quant = quant.replace("stock:", "")
+        quant = quant.strip()
+        qLeft.append(quant)
+
+        # add shipping information
+        ship = lb[2].findAll('small')[1].findAll('span')[1].text.split("->")
+        shipFrom.append(ship[0].replace("Ship from ", "").strip())
+        shipTo.append(ship[1].replace("to ", "").strip())
+
+
+        # Searching for CVE and MS categories
+        cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
+        if not cve:
+            cveValue = "-1"
+        else:
+            cee = " "
+            for idx in cve:
+                cee += (idx)
+                cee += "  "
+                cee = cee.replace(',', ' ')
+                cee = cee.replace('\n', '')
+            cveValue = cee
+        CVE.append(cveValue)
+
+        ms = a.findAll(text=re.compile('MS\d{2}-\d{3}'))
+        if not ms:
+            MSValue = "-1"
+        else:
+            me = " "
+            for im in ms:
+                me += (im)
+                me += " "
+                me = me.replace(',', ' ')
+                me = me.replace('\n', '')
+            MSValue = me
+        MS.append(MSValue)
+
+    # Populate the final variable (this should be a list with all fields scraped)
+    return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
+                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)
+
+
+# called by the crawler to get description links on a listing page
+# @param: beautifulsoup object that is using the correct html page (listing page)
+# return: list of description links from a listing page
+def darkbazar_links_parser(soup):
+    # Returning all links that should be visited by the Crawler
+
+    href = []
+    listing = soup.findAll('div', {"id": "itembox"})
+
+    # for a in listing:
+    #     bae = a.find('a', {"class": "text-info"}, href=True)
+    #     link = bae['href']
+    #     href.append(link)
+
+    for a in listing:
+        bae = a.findAll('a', href=True)
+
+        # Adding the url to the list of urls
+        link = bae[0].get('href')
+        href.append(link)
+
+    return href
\ No newline at end of file