From d30c8066e307536b5e951ec07a15f08833074d5e Mon Sep 17 00:00:00 2001
From: westernmeadow <wkwan626@gmail.com>
Date: Tue, 20 Jun 2023 02:14:23 -0700
Subject: [PATCH] added setup.ini, global date, and persisting urls

---
 .idea/DW_Pipeline_Test.iml                    |   4 +-
 .idea/misc.xml                                |   2 +-
 Forums/CryptBB/crawler_selenium.py            |  66 +++++-----
 Forums/CryptBB/parser.py                      |  57 ++++-----
 Forums/DB_Connection/db_connection.py         |  16 ++-
 Forums/Initialization/forums_mining.py        |  17 ++-
 Forums/Initialization/geckodriver.log         | 119 ++++++++++++++++++
 Forums/Initialization/prepare_parser.py       |  41 +++---
 Forums/Utilities/utilities.py                 |   2 +-
 MarketPlaces/DB_Connection/db_connection.py   |  16 ++-
 MarketPlaces/Initialization/geckodriver.log   |  32 +++++
 MarketPlaces/Initialization/marketsList.txt   |   2 +-
 MarketPlaces/Initialization/markets_mining.py |   6 +-
 MarketPlaces/Initialization/prepare_parser.py |  41 +++---
 MarketPlaces/ThiefWorld/crawler_selenium.py   |  55 ++++----
 MarketPlaces/Tor2door/crawler_selenium.py     |  55 ++++----
 MarketPlaces/Tor2door/parser.py               |  49 ++++----
 path.txt                                      |   3 -
 setup.ini                                     |  14 +++
 19 files changed, 370 insertions(+), 227 deletions(-)
 delete mode 100644 path.txt
 create mode 100644 setup.ini
diff --git a/.idea/DW_Pipeline_Test.iml b/.idea/DW_Pipeline_Test.iml
index 71f5e9b..11bc817 100644
--- a/.idea/DW_Pipeline_Test.iml
+++ b/.idea/DW_Pipeline_Test.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="C:\Users\Helium\anaconda3" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.11 (venv)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
@@ -12,6 +12,8 @@
         <option value="$MODULE_DIR$/Forums/CryptBB" />
         <option value="$MODULE_DIR$/MarketPlaces/DarkFox" />
         <option value="$MODULE_DIR$/MarketPlaces/Tor2door" />
+        <option value="$MODULE_DIR$/Forums/OnniForums" />
+        <option value="$MODULE_DIR$/MarketPlaces/ThiefWorld" />
       </list>
     </option>
   </component>
diff --git a/.idea/misc.xml b/.idea/misc.xml
index baf04e9..61a3499 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="C:\Users\Helium\anaconda3" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (venv)" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/Forums/CryptBB/crawler_selenium.py b/Forums/CryptBB/crawler_selenium.py
index 44db724..655f39a 100644
--- a/Forums/CryptBB/crawler_selenium.py
+++ b/Forums/CryptBB/crawler_selenium.py
@@ -12,17 +12,19 @@ from selenium.webdriver.firefox.service import Service
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
-from PIL import Image
 
+from PIL import Image
 import urllib.parse as urlparse
 import os, re, time
-from datetime import date
 import subprocess
+import configparser
 from bs4 import BeautifulSoup
 from Forums.Initialization.prepare_parser import new_parse
 from Forums.CryptBB.parser import cryptBB_links_parser
 from Forums.Utilities.utilities import cleanHTML
 
+config = configparser.ConfigParser()
+config.read('../../setup.ini')
 counter = 1
 baseURL = 'http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/'
 
@@ -41,15 +43,14 @@ def startCrawling():
             print(driver.current_url, e)
         closetor(driver)
 
-    # new_parse(forumName, False)
+    # new_parse(forumName, baseURL, False)
 
 
 # Opens Tor Browser
 def opentor():
     global pid
     print("Connecting Tor...")
-    path = open('../../path.txt').readline().strip()
-    pro = subprocess.Popen(path)
+    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
     pid = pro.pid
     time.sleep(7.5)
     input('Tor Connected. Press ENTER to continue\n')
@@ -132,12 +133,9 @@ def closetor(driver):
 # Creates FireFox 'driver' and configure its 'Profile'
 # to use Tor proxy and socket
 def createFFDriver():
-    file = open('../../path.txt', 'r')
-    lines = file.readlines()
-
-    ff_binary = FirefoxBinary(lines[0].strip())
+    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
 
-    ff_prof = FirefoxProfile(lines[1].strip())
+    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
     ff_prof.set_preference("places.history.enabled", False)
     ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
     ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
@@ -145,7 +143,7 @@ def createFFDriver():
     ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
     ff_prof.set_preference("signon.rememberSignons", False)
     ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
-    ff_prof.set_preference("network.dns.disablePrefetch", True)#
+    ff_prof.set_preference("network.dns.disablePrefetch", True)
     ff_prof.set_preference("network.http.sendRefererHeader", 0)
     ff_prof.set_preference("permissions.default.image", 3)
     ff_prof.set_preference("browser.download.folderList", 2)
@@ -159,7 +157,7 @@ def createFFDriver():
     ff_prof.set_preference("javascript.enabled", True)
     ff_prof.update_preferences()
 
-    service = Service(lines[2].strip())
+    service = Service(config.get('TOR', 'geckodriver_path'))
 
     driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
 
@@ -170,10 +168,10 @@ def getAccess():
     url = getFixedURL()
     driver = createFFDriver()
     try:
-        driver.get(url)# open url in browser
+        driver.get(url)
         return driver
     except:
-        driver.close()# close tab
+        driver.close()
         return 'down'
 
 
@@ -188,15 +186,12 @@ def savePage(page, url):
 
 # Gets the full path of the page to be saved along with its appropriate file name
 def getFullPathName(url):
+    from Forums.Initialization.forums_mining import CURRENT_DATE
     fileName = getNameFromURL(url)
     if isDescriptionLink(url):
-        fullPath = r'..\\CryptBB\\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
+        fullPath = r'..\\CryptBB\\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
     else:
-        fullPath = r'..\\CryptBB\\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
+        fullPath = r'..\\CryptBB\\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
     return fullPath
 
 
@@ -204,7 +199,7 @@ def getFullPathName(url):
 def getNameFromURL(url):
     global counter
     name = ''.join(e for e in url if e.isalnum())
-    if (name == ''):
+    if name == '':
         name = str(counter)
         counter = counter + 1
     return name
@@ -226,7 +221,7 @@ def getInterestedLinks():
     # # Training Challenges
     # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=96')
     # Darknet Discussions
-    #links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=88')
+    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=88')
     # # Public Leaks and Warez
     # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=97')
     # # Hacked Accounts and Database Dumps
@@ -251,7 +246,7 @@ def crawlForum(driver):
         print('Crawling :', link)
         try:
             try:
-                driver.get(link)# open
+                driver.get(link)
             except:
                 driver.refresh()
             html = driver.page_source
@@ -259,10 +254,17 @@ def crawlForum(driver):
 
             has_next_page = True
 
-            #loop through the topics
             while has_next_page:
-                list = topicPages(html)# for multiple pages
+                list = topicPages(html)
                 for item in list:
+                    itemURL = urlparse.urljoin(baseURL, str(item))
+                    try:
+                        driver.get(itemURL)
+                    except:
+                        driver.refresh()
+                    savePage(driver.page_source, item)
+                    driver.back()
+                    '''
                     #variable to check if there is a next page for the topic
                     has_next_topic_page = True
                     counter = 1
@@ -291,18 +293,19 @@ def crawlForum(driver):
                         except NoSuchElementException:
                             has_next_topic_page = False
 
-                    #end of loop
+                    # end of loop
                     for i in range(counter):
                         driver.back()
+                    '''
                     # comment out
-                    #break
+                    break
 
                 # comment out
-                #if count == 1:
-                #    count = 0
-                #    break
+                if count == 1:
+                    count = 0
+                    break
 
-                try:# change depending on web page, #next page
+                try:
                     temp = driver.find_element(by=By.XPATH, value = '/html/body/div/div[2]/div/div[2]/div')
                     link = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')
 
@@ -346,7 +349,6 @@ def isListingLink(url):
 # calling the parser to define the links
 def topicPages(html):
     soup = BeautifulSoup(html, "html.parser")
-    #print(soup.find('div', id="container").find('div', id="content").find('table', {"class": "tborder clear"}).find('tbody').find('tr',{"class": "inline_row"}).find('strong').text)
     return cryptBB_links_parser(soup)
 
 
diff --git a/Forums/CryptBB/parser.py b/Forums/CryptBB/parser.py
index 0957b76..318b04e 100644
--- a/Forums/CryptBB/parser.py
+++ b/Forums/CryptBB/parser.py
@@ -15,15 +15,15 @@ def cryptBB_description_parser(soup):
 
     # Fields to be parsed
 
-    topic = "-1"           # topic name
-    user = []              # all users of each post
-    addDate = []           # all dated of each post
-    feedback = []          # all feedbacks of each vendor (this was found in just one Forum and with a number format)
-    status = []            # all user's authority in each post such as (adm, member, dangerous)
-    reputation = []        # all user's karma in each post (usually found as a number)
-    sign = []              # all user's signature in each post (usually a standard message after the content of the post)
-    post = []              # all messages of each post
-    interest = []          # all user's interest in each post
+    topic = "-1"            # 0 *topic name
+    user = []               # 1 *all users of each post
+    status = []             # 2 all user's authority in each post such as (adm, member, dangerous)
+    reputation = []         # 3 all user's karma in each post (usually found as a number)
+    interest = []           # 4 all user's interest in each post
+    sign = []               # 5 all user's signature in each post (usually a standard message after the content of the post)
+    post = []               # 6 all messages of each post
+    feedback = []           # 7 all feedbacks of each vendor (this was found in just one Forum and with a number format)
+    addDate = []            # 8 all dated of each post
 
     # Finding the topic (should be just one coming from the Listing Page)
 
@@ -154,20 +154,6 @@ def cryptBB_description_parser(soup):
 
         feedback.append("-1")
 
-    '''
-    except:
-        if soup.find('td', {"class": "trow1"}).text == " You do not have permission to access this page. ":
-            user.append("-1")
-            status.append(-1)
-            interest.append(-1)
-            reputation.append(-1)
-            addDate.append(-1)
-            post.append("NO ACCESS TO THIS PAGE!")
-            sign.append(-1)
-            feedback.append(-1)
-    '''
-
-
     # Populate the final variable (this should be a list with all fields scraped)
 
     row = (topic, user, status, reputation, interest, sign, post, feedback, addDate)
@@ -180,17 +166,17 @@ def cryptBB_description_parser(soup):
 
 def cryptBB_listing_parser(soup):
 
-    board = "-1"       # board name (the previous level of the topic in the Forum categorization tree.
-                       # For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
-
-    nm = 0             # this variable should receive the number of topics
-    topic = []         # all topics
-    author = []        # all authors of each topic
-    views = []         # number of views of each topic
-    posts = []         # number of posts of each topic
-    addDate = []       # when the topic was created (difficult to find)
-    href = []          # this variable should receive all cleaned urls (we will use this to do the marge between
-                       # Listing and Description pages)
+    nm = 0              # *this variable should receive the number of topics
+    forum = "CryptBB"   # 0 *forum name
+    board = "-1"        # 1 *board name (the previous level of the topic in the Forum categorization tree.
+                        # For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
+    topic = []          # 2 *all topics
+    author = []         # 3 *all authors of each topic
+    views = []          # 4 number of views of each topic
+    posts = []          # 5 number of posts of each topic
+    href = []           # 6 this variable should receive all cleaned urls (we will use this to do the marge between
+                        # Listing and Description pages)
+    addDate = []        # 7 when the topic was created (difficult to find)
 
     # Finding the board (should be just one)
 
@@ -223,7 +209,6 @@ def cryptBB_listing_parser(soup):
             link = itopic.find('span', {"class": "subject_old"}).find('a').get('href')
         except:
             link = itopic.find('span',{"class": "subject_new"}).find('a').get('href')
-        link = cleanLink(link)
         href.append(link)
 
         # Finding the author of the topic
@@ -245,7 +230,7 @@ def cryptBB_listing_parser(soup):
 
         addDate.append("-1")
 
-    return organizeTopics("CryptBB", nm, topic, board, author, views, posts, href, addDate)
+    return organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate)
 
 
 def cryptBB_links_parser(soup):
diff --git a/Forums/DB_Connection/db_connection.py b/Forums/DB_Connection/db_connection.py
index 619b85e..eeaf69b 100644
--- a/Forums/DB_Connection/db_connection.py
+++ b/Forums/DB_Connection/db_connection.py
@@ -2,15 +2,21 @@ __author__ = 'DarkWeb'
 
 import psycopg2
 import traceback
-import time
-from datetime import date
+import configparser
 
 
 def connectDataBase():
 
     try:
 
-        return psycopg2.connect(host='localhost', user='postgres', password='password', dbname='darkweb_markets_forums')
+        config = configparser.ConfigParser()
+        config.read('../../setup.ini')
+        ip = config.get('PostgreSQL', 'ip')
+        username = config.get('PostgreSQL', 'username')
+        password = config.get('PostgreSQL', 'password')
+        database = config.get('PostgreSQL', 'database')
+
+        return psycopg2.connect(host=ip, user=username, password=password, dbname=database)
 
     except:
 
@@ -197,7 +203,7 @@ def getLastPost(cur):
 '''
 
 
-def create_forum(cur, row):
+def create_forum(cur, row, url):
 
     forumId = verifyForum(cur, row[0])
 
@@ -207,7 +213,7 @@ def create_forum(cur, row):
 
        sql = "Insert into forums (forum_id, name_forum, url_forum, dateinserted_forum) Values (%s, %s, %s, %s)"
 
-       recset = [forumId, row[0], None, row[8]]
+       recset = [forumId, row[0], url, row[8]]
 
        cur.execute(sql, recset)
 
diff --git a/Forums/Initialization/forums_mining.py b/Forums/Initialization/forums_mining.py
index 71907e0..f431f97 100644
--- a/Forums/Initialization/forums_mining.py
+++ b/Forums/Initialization/forums_mining.py
@@ -9,10 +9,12 @@ from datetime import *
 from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld
 from Forums.CryptBB.crawler_selenium import crawler as crawlerCryptBB
 from Forums.OnniForums.crawler_selenium import crawler as crawlerOnniForums
-#from Forums.CrackingPro.crawler_selenium import crawler as crawlerCrackingPro
+# from Forums.CrackingPro.crawler_selenium import crawler as crawlerCrackingPro
 
 import time
 
+CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
+
 
 # reads list of marketplaces manually inputted
 def getForums():
@@ -30,8 +32,6 @@ def createDirectory(forum):
         pagesMainDir = '../' + forum
     else:
         pagesMainDir = '../' + forum + "/HTML_Pages"
-        # sharedFolderPath = r'\\VBoxSvr\VM_Files_(shared)'
-        # pagesMainDir = os.path.join(sharedFolderPath,  'HTML/Forums/' + forum + '/HTML_Pages')
 
     if not os.path.isdir(pagesMainDir):
         os.makedirs(pagesMainDir)
@@ -58,7 +58,7 @@ def createRedditsSubdirectories(pagesMainDir):
 
 def createSubdirectories(pagesDir):
 
-    currentDateDir = pagesDir + '/' + str("%02d" %date.today().month) + str("%02d" %date.today().day) + str("%04d" %date.today().year)
+    currentDateDir = pagesDir + '/' + CURRENT_DATE
     if not os.path.isdir(currentDateDir):
         os.mkdir(currentDateDir)
 
@@ -79,19 +79,19 @@ def createSubdirectories(pagesDir):
         os.mkdir(descReadDir)
 
 
-#main method
+# main method
 if __name__ == '__main__':
 
-    #assignment from forumsList.txt
+    # assignment from forumsList.txt
     forumsList = getForums()
 
-    #get forum from forumsList
+    # get forum from forumsList
     for forum in forumsList:
         forum = forum.replace('\n','')
 
         print("Creating listing and description directories ...")
         createDirectory(forum)
-        time.sleep(5) #wait for directories to be created
+        time.sleep(5)  # wait for directories to be created
         input("Directories created successfully. Press ENTER to continue\n")
 
         if forum == "BestCardingWorld":
@@ -103,7 +103,6 @@ if __name__ == '__main__':
         elif forum == "CrackingPro":
             crawlerCrackingPro()
 
-
     print("Scraping process completed successfully!")
 
 
diff --git a/Forums/Initialization/geckodriver.log b/Forums/Initialization/geckodriver.log
index c206435..15928b8 100644
--- a/Forums/Initialization/geckodriver.log
+++ b/Forums/Initialization/geckodriver.log
@@ -3963,3 +3963,122 @@ JavaScript error: resource://gre/modules/ExtensionTelemetry.jsm, line 109: Error
 JavaScript error: resource://gre/modules/ExtensionTelemetry.jsm, line 113: Error: TelemetryStopwatch: finishing nonexisting stopwatch. Histogram: "WEBEXT_CONTENT_SCRIPT_INJECTION_MS_BY_ADDONID", key: "{73a6fe31-595d-460b-a920-fcc0f8843232}"
 JavaScript error: resource://gre/modules/ExtensionTelemetry.jsm, line 109: Error: TelemetryStopwatch: finishing nonexisting stopwatch. Histogram: "WEBEXT_CONTENT_SCRIPT_INJECTION_MS", key: ""
 JavaScript error: resource://gre/modules/ExtensionTelemetry.jsm, line 113: Error: TelemetryStopwatch: finishing nonexisting stopwatch. Histogram: "WEBEXT_CONTENT_SCRIPT_INJECTION_MS_BY_ADDONID", key: "{73a6fe31-595d-460b-a920-fcc0f8843232}"
+1687240079948	geckodriver	INFO	Listening on 127.0.0.1:50448
+1687240084735	mozrunner::runner	INFO	Running command: "C:\\Users\\calsyslab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50449" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileuYe2AP"
+console.log: "TorSettings: loadFromPrefs()"
+console.log: "TorConnect: init()"
+console.log: "TorConnect: Entering Initial state"
+console.log: "TorConnect: Observed profile-after-change"
+console.log: "TorConnect: Observing topic 'TorProcessExited'"
+console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
+console.log: "TorConnect: Observing topic 'torsettings:ready'"
+console.log: "TorSettings: Observed profile-after-change"
+1687240085868	Marionette	INFO	Marionette enabled
+console.log: "TorConnect: Will load after bootstrap => [about:blank]"
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid"
+JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
+DevTools listening on ws://localhost:50449/devtools/browser/e85e6865-1f97-480a-8e46-778271184a87
+1687240090364	Marionette	INFO	Listening on port 50454
+1687240090846	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/, line 2: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 2: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 5: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 9: ReferenceError: use_xmlhttprequest is not defined
+JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\calsyslab\AppData\Local\Temp\rust_mozprofileuYe2AP\thumbnails) because it does not exist
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86, line 3: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 6: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 19: ReferenceError: use_xmlhttprequest is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 25: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628&page=2, line 6: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628&page=2, line 19: ReferenceError: use_xmlhttprequest is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628&page=2, line 25: ReferenceError: $ is not defined
+1687240218310	Marionette	INFO	Stopped listening on port 50454
+JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
+!!! error running onStopped callback: TypeError: callback is not a function
+JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
+JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\calsyslab\AppData\Local\Temp\rust_mozprofileuYe2AP\thumbnails) because it does not exist
+1687240220095	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
+unwatchForTabs()@TargetList.jsm:70
+unwatchForTargets()@TargetList.jsm:37
+destructor()@TargetList.jsm:109
+stop()@CDP.jsm:104
+close()@RemoteAgent.jsm:138
+1687240311209	geckodriver	INFO	Listening on 127.0.0.1:50519
+1687240315070	mozrunner::runner	INFO	Running command: "C:\\Users\\calsyslab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "50520" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofiletzrkDs"
+console.log: "TorSettings: loadFromPrefs()"
+console.log: "TorConnect: init()"
+console.log: "TorConnect: Entering Initial state"
+console.log: "TorConnect: Observed profile-after-change"
+console.log: "TorConnect: Observing topic 'TorProcessExited'"
+console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
+console.log: "TorConnect: Observing topic 'torsettings:ready'"
+console.log: "TorSettings: Observed profile-after-change"
+1687240315958	Marionette	INFO	Marionette enabled
+console.log: "TorConnect: Will load after bootstrap => [about:blank]"
+console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid"
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
+DevTools listening on ws://localhost:50520/devtools/browser/4b6276ea-c420-4b6d-b4bc-fda679f97800
+1687240317156	Marionette	INFO	Listening on port 50525
+1687240317256	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/, line 2: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 2: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 5: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 9: ReferenceError: use_xmlhttprequest is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86, line 3: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 6: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 19: ReferenceError: use_xmlhttprequest is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 25: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86, line 3: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86&page=2, line 3: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=16404, line 6: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=16404, line 19: ReferenceError: use_xmlhttprequest is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=16404, line 25: ReferenceError: $ is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86&page=2, line 3: ReferenceError: lang is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
+JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
+1687240409940	Marionette	INFO	Stopped listening on port 50525
+JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
+
+###!!! [Parent][MessageChannel] Error: (msgtype=0x140007,name=PBackgroundLSDatabase::Msg_RequestAllowToClose) Channel error: cannot send/recv
+
+[Parent 1036, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167
+!!! error running onStopped callback: TypeError: callback is not a function
+JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
+JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\calsyslab\AppData\Local\Temp\rust_mozprofiletzrkDs\thumbnails) because it does not exist
+
+###!!! [Child][MessageChannel] Error: (msgtype=0x5D0005,name=PImageBridge::Msg_WillClose) Channel error: cannot send/recv
+
+1687240410572	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
+unwatchForTabs()@TargetList.jsm:70
+unwatchForTargets()@TargetList.jsm:37
+destructor()@TargetList.jsm:109
+stop()@CDP.jsm:104
+close()@RemoteAgent.jsm:138
diff --git a/Forums/Initialization/prepare_parser.py b/Forums/Initialization/prepare_parser.py
index 23d97f1..2efb84d 100644
--- a/Forums/Initialization/prepare_parser.py
+++ b/Forums/Initialization/prepare_parser.py
@@ -9,7 +9,7 @@ from Forums.BestCardingWorld.parser import *
 from Forums.CryptBB.parser import *
 
 from Forums.Classifier.classify_product import predict
-#from DarkWebMining_Sample.Forums.Classifier.classify_product import predict_semi
+# from DarkWebMining_Sample.Forums.Classifier.classify_product import predict_semi
 
 
 # determines if forum is russian, not really used now but maybe later
@@ -62,9 +62,9 @@ def getPosts(posts):
 
 #uses db connection , another program, methods to persists values to the correct categories
 #@param: row is the list of entries for this instance, cur is the db connection object
-def persist_data(row, cur):
+def persist_data(url, row, cur):
 
-    forum = create_forum(cur, row)
+    forum = create_forum(cur, row, url)
 
     board = create_board(cur, row, forum)
 
@@ -77,15 +77,13 @@ def persist_data(row, cur):
 
 #main method for this program, what actually gets the parsed info from the parser, and persists them into the db
 #calls the different parser methods here depending on the type of html page
-def new_parse(forum, createLog):
+def new_parse(forum, url, createLog):
 
-    print("Parsing The " + forum + " Forum and conduct data classification to store the information in the database.")
-
-    crawlerDate = date.today()
+    from Forums.Initialization.forums_mining import CURRENT_DATE
 
-    ini = time.time()
+    print("Parsing The " + forum + " Forum and conduct data classification to store the information in the database.")
 
-    global site
+    # ini = time.time()
 
     # Connecting to the database
     con = connectDataBase()
@@ -96,27 +94,26 @@ def new_parse(forum, createLog):
 
     nError = 0
 
-    lines = [] #lines.clear()
-    lns = []   #lns.clear()
+    lines = []  # listing pages
+    lns = []    # description pages
     detPage = {}
-    rw = []
 
     # Creating the log file for each Forum
     if createLog:
-        if not os.path.exists("./" + forum + "/Logs/" + forum + "_" + str("%02d" %crawlerDate.month) + str("%02d" %crawlerDate.day) + str("%04d" %crawlerDate.year) + ".log"):
-            logFile = open("./" + forum + "/Logs/" + forum + "_" + str("%02d" %crawlerDate.today().month) + str("%02d" %crawlerDate.day) + str("%04d" %crawlerDate.year) + ".log", "w")
+        if not os.path.exists("./" + forum + "/Logs/" + forum + "_" + CURRENT_DATE + ".log"):
+            logFile = open("./" + forum + "/Logs/" + forum + "_" + CURRENT_DATE + ".log", "w")
         else:
-            print("Files of the date " + str("%02d" %crawlerDate.today().month) + str("%02d" %crawlerDate.today().day) + str("%04d" %crawlerDate.today().year) +
-                   " from the Forum " + forum + " were already read. Delete the referent information in the Data Base and also delete the log file "
-                   "in the _Logs folder to read files from this Forum of this date again.")
+            print("Files of the date " + CURRENT_DATE + " from the Forum " + forum +
+                  " were already read. Delete the referent information in the Data Base and also delete the log file"
+                  " in the _Logs folder to read files from this Forum of this date again.")
             raise SystemExit
 
     # Reading the Listing Html Pages
-    for fileListing in glob.glob(os.path.join (os.getcwd().replace("Initialization","") + forum + "\\HTML_Pages\\" + str("%02d" %crawlerDate.month) + str("%02d" %crawlerDate.day) + str("%04d" %crawlerDate.year) + "\\Listing" ,'*.html')):
+    for fileListing in glob.glob(os.path.join("..\\" + forum + "\\HTML_Pages\\" + CURRENT_DATE + "\\Listing", '*.html')):
         lines.append(fileListing)
 
     # Reading the Description Html Pages
-    for fileDescription in glob.glob(os.path.join (os.getcwd().replace("Initialization","") + forum + "\\HTML_Pages\\" + str("%02d" %crawlerDate.month) + str("%02d" %crawlerDate.day) + str("%04d" %crawlerDate.year) + "\\Description" ,'*.html')):
+    for fileDescription in glob.glob(os.path.join("..\\" + forum + "\\HTML_Pages\\" + CURRENT_DATE + "\\Description" ,'*.html')):
         lns.append(fileDescription)
 
     # Parsing the Description Pages and put the tag's content into a dictionary (Hash table)
@@ -218,9 +215,7 @@ def new_parse(forum, createLog):
                     rec = rec.split(',')
 
                     # key = u"Top:" + rec[1].upper().strip() + u" User:" + rec[5].upper().strip()
-                    # key = rec[16]
-                    url = ''.join(e for e in rec[6] if e.isalnum())
-                    key = u"Url:" + url
+                    key = u"Url:" + cleanLink(rec[6])
 
                     if key in detPage:
 
@@ -237,7 +232,7 @@ def new_parse(forum, createLog):
 
                         # Persisting the information in the database
                         try:
-                            persist_data(tuple(rec), cur)
+                            persist_data(url, tuple(rec), cur)
                             con.commit()
                         except:
 
diff --git a/Forums/Utilities/utilities.py b/Forums/Utilities/utilities.py
index 9d64cb6..d8ca9eb 100644
--- a/Forums/Utilities/utilities.py
+++ b/Forums/Utilities/utilities.py
@@ -160,7 +160,7 @@ def cleanLink(originalLink):
     return originalLink
 
 
-def organizeTopics(forum, nm, topic, board, author, views, posts, href, addDate):
+def organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate):
 
     day = time.strftime("%m/%d/%Y")
     ahora = time.strftime("%I:%M:%S")
diff --git a/MarketPlaces/DB_Connection/db_connection.py b/MarketPlaces/DB_Connection/db_connection.py
index 9cabf34..97296e3 100644
--- a/MarketPlaces/DB_Connection/db_connection.py
+++ b/MarketPlaces/DB_Connection/db_connection.py
@@ -2,15 +2,21 @@ __author__ = 'DarkWeb'
 
 import psycopg2
 import traceback
-import time
-from datetime import date
+import configparser
 
 
 def connectDataBase():
 
     try:
 
-        return psycopg2.connect(host='localhost', user='postgres', password='password', dbname='darkweb_markets_forums')
+        config = configparser.ConfigParser()
+        config.read('../../setup.ini')
+        ip = config.get('PostgreSQL', 'ip')
+        username = config.get('PostgreSQL', 'username')
+        password = config.get('PostgreSQL', 'password')
+        database = config.get('PostgreSQL', 'database')
+
+        return psycopg2.connect(host=ip, user=username, password=password, dbname=database)
 
     except:
 
@@ -95,7 +101,7 @@ def getLastVendor(cur):
         print (trace)
 
 
-def create_marketPlace(cur, row):
+def create_marketPlace(cur, row, url):
 
     marketId = verifyMarketPlace(cur, row[0])
 
@@ -105,7 +111,7 @@ def create_marketPlace(cur, row):
        sql = "Insert into marketplaces (market_id, name_market, url_market, dateinserted_market) " \
              "Values (%s, %s, %s, %s)"
 
-       recset = [marketId, row[0], None, row[21]]
+       recset = [marketId, row[0], url, row[21]]
 
        cur.execute(sql, recset)
 
diff --git a/MarketPlaces/Initialization/geckodriver.log b/MarketPlaces/Initialization/geckodriver.log
index 51d45ff..7f95777 100644
--- a/MarketPlaces/Initialization/geckodriver.log
+++ b/MarketPlaces/Initialization/geckodriver.log
@@ -6073,3 +6073,35 @@ unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
+1687245533907	geckodriver	INFO	Listening on 127.0.0.1:62051
+1687245536832	mozrunner::runner	INFO	Running command: "C:\\Users\\calsyslab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "62052" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofileuMGaeY"
+console.log: "TorSettings: loadFromPrefs()"
+console.log: "TorConnect: init()"
+console.log: "TorConnect: Entering Initial state"
+console.log: "TorConnect: Observed profile-after-change"
+console.log: "TorConnect: Observing topic 'TorProcessExited'"
+console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
+console.log: "TorConnect: Observing topic 'torsettings:ready'"
+console.log: "TorSettings: Observed profile-after-change"
+1687245537956	Marionette	INFO	Marionette enabled
+console.log: "TorConnect: Will load after bootstrap => [about:blank]"
+console.error: "Could not load engine blockchair-onion@search.mozilla.org: Error: Extension is invalid"
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
+JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
+DevTools listening on ws://localhost:62052/devtools/browser/9cf17e56-2fb1-468d-b65e-15c4de4eaa64
+1687245540759	Marionette	INFO	Listening on port 49935
+1687245540897	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
+1687245639406	Marionette	INFO	Stopped listening on port 49935
+JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
+!!! error running onStopped callback: TypeError: callback is not a function
+JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
+JavaScript error: resource://gre/modules/PageThumbs.jsm, line 709: AbortError: IOUtils.profileBeforeChange getter: IOUtils: profileBeforeChange phase has already finished
+1687245650576	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
+unwatchForTabs()@TargetList.jsm:70
+unwatchForTargets()@TargetList.jsm:37
+destructor()@TargetList.jsm:109
+stop()@CDP.jsm:104
+close()@RemoteAgent.jsm:138
diff --git a/MarketPlaces/Initialization/marketsList.txt b/MarketPlaces/Initialization/marketsList.txt
index 87f811c..b85ae71 100644
--- a/MarketPlaces/Initialization/marketsList.txt
+++ b/MarketPlaces/Initialization/marketsList.txt
@@ -1 +1 @@
-ThiefWorld
\ No newline at end of file
+Tor2door
\ No newline at end of file
diff --git a/MarketPlaces/Initialization/markets_mining.py b/MarketPlaces/Initialization/markets_mining.py
index 42bb51c..3073612 100644
--- a/MarketPlaces/Initialization/markets_mining.py
+++ b/MarketPlaces/Initialization/markets_mining.py
@@ -12,6 +12,8 @@ from MarketPlaces.ThiefWorld.crawler_selenium import crawler as crawlerThiefWorl
 
 import time
 
+CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
+
 
 # reads list of marketplaces
 def getMarkets():
@@ -26,12 +28,10 @@ def createDirectory(mkt):
 
     # Package should already be there, holding crawler and parser
     pagesDir = '../' + mkt + '/HTML_Pages'
-    # sharedFolderPath = r'\\VBoxSvr\VM_Files_(shared)'
-    # pagesDir = os.path.join(sharedFolderPath, 'HTML/MarketPlaces/' + mkt + '/HTML_Pages')
     if not os.path.isdir(pagesDir):
         os.makedirs(pagesDir)
 
-    currentDateDir = pagesDir + '/' + str("%02d" %date.today().month) + str("%02d" %date.today().day) + str("%04d" %date.today().year)
+    currentDateDir = pagesDir + '/' + CURRENT_DATE
     if not os.path.isdir(currentDateDir):
         os.mkdir(currentDateDir)
 
diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py
index 2389834..de13899 100644
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@@ -60,55 +60,52 @@ def mergePages(rmm, rec):
     return rec
 
 
-def persist_data(row, cur):
+def persist_data(url, row, cur):
 
-    marketPlace = create_marketPlace(cur, row)
+    marketPlace = create_marketPlace(cur, row, url)
 
     vendor = create_vendor(cur, row, marketPlace)
 
     create_items(cur, row, marketPlace, vendor)
 
 
-def new_parse(marketPlace, createLog):
+def new_parse(marketPlace, url, createLog):
 
-    print("Parsing the " + marketPlace + " marketplace and conduct data classification to store the information in the database.")
+    from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
 
-    crawlerDate = date.today()
+    print("Parsing the " + marketPlace + " marketplace and conduct data classification to store the information in the database.")
 
     # ini = time.time()
 
-    global site
-
-    #Connecting to the database
+    # Connecting to the database
     con = connectDataBase()
     cur = con.cursor()
 
-    #Creating the tables (The database should be created manually)
+    # Creating the tables (The database should be created manually)
     create_database(cur, con)
 
     nError = 0
 
-    lines = [] #lines.clear()
-    lns = []   #lns.clear()
+    lines = []  # listing pages
+    lns = []    # description pages
     detPage = {}
-    rw = []
 
     #Creating the log file for each Market Place
     if createLog:
-        if not os.path.exists("./" + marketPlace + "/Logs/" + marketPlace + "_" + str("%02d" %crawlerDate.month) + str("%02d" %crawlerDate.day) + str("%04d" %crawlerDate.year) + ".log"):
-            logFile = open("./" + marketPlace + "/Logs/" + marketPlace + "_" + str("%02d" %crawlerDate.month) + str("%02d" %crawlerDate.day) + str("%04d" %crawlerDate.year) + ".log", "w")
+        if not os.path.exists("./" + marketPlace + "/Logs/" + marketPlace + "_" + CURRENT_DATE + ".log"):
+            logFile = open("./" + marketPlace + "/Logs/" + marketPlace + "_" + CURRENT_DATE + ".log", "w")
         else:
-            print("Files of the date " + str("%02d" %crawlerDate.month) + "/" + str("%02d" %crawlerDate.day) + "/" + str("%04d" %crawlerDate.year) +
-                   " from the Market Place " + marketPlace + " were already read. Delete the referent information in the Data Base and also delete the log file "
-                   "in the _Logs folder to read files from this Market Place of this date again.")
+            print("Files of the date " + CURRENT_DATE + " from the Market Place " + marketPlace +
+                  " were already read. Delete the referent information in the Data Base and also delete the log file"
+                  " in the _Logs folder to read files from this Market Place of this date again.")
             raise SystemExit
 
     # Reading the Listing Html Pages
-    for fileListing in glob.glob(os.path.join (os.getcwd().replace("Initialization","") + marketPlace + "\\HTML_Pages\\" + str("%02d" %crawlerDate.month) + str("%02d" %crawlerDate.day) + str("%04d" %crawlerDate.year) + "\\Listing" ,'*.html')):
+    for fileListing in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\" + CURRENT_DATE + "\\Listing", '*.html')):
         lines.append(fileListing)
 
     # Reading the Description Html Pages
-    for fileDescription in glob.glob(os.path.join (os.getcwd().replace("Initialization","") + marketPlace + "\\HTML_Pages\\" + str("%02d" %crawlerDate.month) + str("%02d" %crawlerDate.day) + str("%04d" %crawlerDate.year) + "\\Description" ,'*.html')):
+    for fileDescription in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\" + CURRENT_DATE + "\\Description", '*.html')):
         lns.append(fileDescription)
 
     # Parsing the Description Pages and put the tag's content into a dictionary (Hash table)
@@ -214,9 +211,7 @@ def new_parse(marketPlace, createLog):
                     #   key = rec[23]
 
                     # key = u"Pr:" + rec[1].upper()[:list_lim1] + u" Vendor:" + rec[18].upper()[:list_lim2]
-                    # key = u"Pr:" + rec[1].upper()
-                    url = ''.join(e for e in rec[20] if e.isalnum())
-                    key = u"Url:" + url
+                    key = u"Url:" + cleanLink(rec[20])
 
                     # if the associated description page is parsed
                     if key in detPage:
@@ -233,7 +228,7 @@ def new_parse(marketPlace, createLog):
 
                         # Persisting the information in the database
                         try:
-                            persist_data(tuple(rec), cur)
+                            persist_data(url, tuple(rec), cur)
                             con.commit()
                         except:
 
diff --git a/MarketPlaces/ThiefWorld/crawler_selenium.py b/MarketPlaces/ThiefWorld/crawler_selenium.py
index 34d606d..3d3c28a 100644
--- a/MarketPlaces/ThiefWorld/crawler_selenium.py
+++ b/MarketPlaces/ThiefWorld/crawler_selenium.py
@@ -15,14 +15,17 @@ from selenium.webdriver.common.by import By
 
 from PIL import Image
 import urllib.parse as urlparse
-import os, time
+import os, re, time
 from datetime import date
 import subprocess
+import configparser
 from bs4 import BeautifulSoup
 from MarketPlaces.Initialization.prepare_parser import new_parse
 from MarketPlaces.ThiefWorld.parser import thiefworld_links_parser
 from MarketPlaces.Utilities.utilities import cleanHTML
 
+config = configparser.ConfigParser()
+config.read('../../setup.ini')
 counter = 1
 baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/'
 
@@ -31,7 +34,7 @@ baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
     opentor()
-    mktName = getMKTName()
+    # mktName = getMKTName()
     driver = getAccess()
 
     if driver != 'down':
@@ -42,7 +45,7 @@ def startCrawling():
             print(driver.current_url, e)
         closetor(driver)
 
-    #new_parse(mktName, False)
+    # new_parse(mktName, False)
 
 
 # Opens Tor Browser
@@ -50,8 +53,7 @@ def startCrawling():
 def opentor():
     global pid
     print("Connecting Tor...")
-    path = open('../../path.txt').readline().strip()
-    pro = subprocess.Popen(path)
+    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
     pid = pro.pid
     time.sleep(7.5)
     input('Tor Connected. Press ENTER to continue\n')
@@ -61,7 +63,7 @@ def opentor():
 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
-    name = 'TheifWorld'
+    name = 'ThiefWorld'
     return name
 
 
@@ -87,12 +89,9 @@ def closetor(driver):
 # Creates FireFox 'driver' and configure its 'Profile'
 # to use Tor proxy and socket
 def createFFDriver():
-    file = open('../../path.txt', 'r')
-    lines = file.readlines()
+    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
 
-    ff_binary = FirefoxBinary(lines[0].strip())
-
-    ff_prof = FirefoxProfile(lines[1].strip())
+    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
     ff_prof.set_preference("places.history.enabled", False)
     ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
     ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
@@ -114,7 +113,7 @@ def createFFDriver():
     ff_prof.set_preference("javascript.enabled", False)
     ff_prof.update_preferences()
 
-    service = Service(lines[2].strip())
+    service = Service(config.get('TOR', 'geckodriver_path'))
 
     driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
 
@@ -162,15 +161,12 @@ def savePage(page, url):
 # Gets the full path of the page to be saved along with its appropriate file name
 #@param: raw url as crawler crawls through every site
 def getFullPathName(url):
+    from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
     fileName = getNameFromURL(url)
     if isDescriptionLink(url):
-        fullPath = r'..\ThiefWorld\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
+        fullPath = r'..\ThiefWorld\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
     else:
-        fullPath = r'..\ThiefWorld\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
+        fullPath = r'..\ThiefWorld\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
     return fullPath
 
 
@@ -191,14 +187,15 @@ def getNameFromURL(url):
 #as you can see they are categories of products
 def getInterestedLinks():
     links = []
+
     # Hacking and DDOS
     links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/35')
     # # Carding Manuals
-    links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/20')
+    # links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/20')
     # # Software
-    links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/37')
-    # #Database
-    links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/38')
+    # links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/37')
+    # # Database
+    # links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/38')
 
     return links
 
@@ -238,13 +235,13 @@ def crawlForum(driver):
                     savePage(driver.page_source, item)
                     driver.back()
 
-                #     # comment out
-                #     break
-                #
-                # # # comment out
-                # if count == 1:
-                #    count = 0
-                #    break
+                    # comment out
+                    break
+
+                # comment out
+                if count == 1:
+                    count = 0
+                    break
 
                 try:
                     link = driver.find_element(by=By.XPATH, value=
diff --git a/MarketPlaces/Tor2door/crawler_selenium.py b/MarketPlaces/Tor2door/crawler_selenium.py
index b7e7937..baef719 100644
--- a/MarketPlaces/Tor2door/crawler_selenium.py
+++ b/MarketPlaces/Tor2door/crawler_selenium.py
@@ -15,41 +15,42 @@ from selenium.webdriver.support.ui import WebDriverWait
 from PIL import Image
 
 import urllib.parse as urlparse
-import os, time
-from datetime import date
+import os, re, time
 import subprocess
+import configparser
 from bs4 import BeautifulSoup
 from MarketPlaces.Initialization.prepare_parser import new_parse
 from MarketPlaces.Tor2door.parser import tor2door_links_parser
 from MarketPlaces.Utilities.utilities import cleanHTML
 
+config = configparser.ConfigParser()
+config.read('../../setup.ini')
 counter = 1
-baseURL = 'http://http://yzrrne3pveltulbavydr2kiashvlnysdwclwmklo6cyjuqpxi7ku4xqd.onion'
+baseURL = 'http://yzrrne3pveltulbavydr2kiashvlnysdwclwmklo6cyjuqpxi7ku4xqd.onion'
 
 
 # Opens Tor Browser, crawls the website
 def startCrawling():
-    opentor()
-    # marketName = getMarketName()
-    driver = getAccess()
-
-    if driver != 'down':
-        try:
-            login(driver)
-            crawlForum(driver)
-        except Exception as e:
-            print(driver.current_url, e)
-        closetor(driver)
-
-    # new_parse(marketName, False)
+    # opentor()
+    marketName = getMarketName()
+    # driver = getAccess()
+    #
+    # if driver != 'down':
+    #     try:
+    #         login(driver)
+    #         crawlForum(driver)
+    #     except Exception as e:
+    #         print(driver.current_url, e)
+    #     closetor(driver)
+    #
+    new_parse(marketName, baseURL, False)
 
 
 # Opens Tor Browser
 def opentor():
     global pid
     print("Connecting Tor...")
-    path = open('../../path.txt').readline().strip()
-    pro = subprocess.Popen(path)
+    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
     pid = pro.pid
     time.sleep(7.5)
     input('Tor Connected. Press ENTER to continue\n')
@@ -130,12 +131,9 @@ def closetor(driver):
 # Creates FireFox 'driver' and configure its 'Profile'
 # to use Tor proxy and socket
 def createFFDriver():
-    file = open('../../path.txt', 'r')
-    lines = file.readlines()
-
-    ff_binary = FirefoxBinary(lines[0].strip())
+    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
 
-    ff_prof = FirefoxProfile(lines[1].strip())
+    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
     ff_prof.set_preference("places.history.enabled", False)
     ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
     ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
@@ -157,7 +155,7 @@ def createFFDriver():
     ff_prof.set_preference("javascript.enabled", False)
     ff_prof.update_preferences()
 
-    service = Service(executable_path=lines[2].strip())
+    service = Service(config.get('TOR', 'geckodriver_path'))
 
     driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
 
@@ -186,15 +184,12 @@ def savePage(page, url):
 
 # Gets the full path of the page to be saved along with its appropriate file name
 def getFullPathName(url):
+    from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
     fileName = getNameFromURL(url)
     if isDescriptionLink(url):
-        fullPath = r'..\Tor2door\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
+        fullPath = r'..\Tor2door\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
     else:
-        fullPath = r'..\Tor2door\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
+        fullPath = r'..\Tor2door\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
     return fullPath
 
 
diff --git a/MarketPlaces/Tor2door/parser.py b/MarketPlaces/Tor2door/parser.py
index 105fc99..f4a4c07 100644
--- a/MarketPlaces/Tor2door/parser.py
+++ b/MarketPlaces/Tor2door/parser.py
@@ -12,10 +12,10 @@ def tor2door_description_parser(soup):
 
     # Fields to be parsed
 
-    vendor = "-1"                       # 0 Vendor_Name
+    vendor = "-1"                       # 0 *Vendor_Name
     success = "-1"                      # 1 Vendor_Successful_Transactions
     rating_vendor = "-1"                # 2 Vendor_Rating
-    name = "-1"                         # 3 Product_Name
+    name = "-1"                         # 3 *Product_Name
     describe = "-1"                     # 4 Product_Description
     CVE = "-1"                          # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
     MS = "-1"                           # 6 Product_MS_Classification (Microsoft Security)
@@ -118,28 +118,28 @@ def tor2door_description_parser(soup):
 def tor2door_listing_parser(soup):
 
     # Fields to be parsed
-    nm = 0                                    # Total_Products (Should be Integer)
-    mktName = "Tor2door"                      # 0 Marketplace_Name
-    vendor = []                               # 18 Vendor y
-    rating_vendor = []                        # 19 Vendor_Rating
-    success = []                              # 20 Vendor_Successful_Transactions
-    name = []                                 # 1 Product_Name y
-    CVE = []                                  # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures)
-    MS = []                                   # 3 Product_MS_Classification (Microsoft Security)
-    category = []                             # 4 Product_Category y
-    describe = []                             # 5 Product_Description
-    views = []                                # 7 Product_Number_Of_Views
-    reviews = []                              # 7 Product_Number_Of_Reviews
-    rating_item = []                          # 8 Product_Rating
-    addDate = []                              # 9 Product_AddDate
-    BTC = []                                  # 11 Product_BTC_SellingPrice
-    USD = []                                  # 12 Product_USD_SellingPrice y
-    EURO = []                                 # 13 Product_EURO_SellingPrice
-    sold = []                                 # 14 Product_QuantitySold
-    qLeft =[]                                 # 15 Product_QuantityLeft
-    shipFrom = []                             # 16 Product_ShippedFrom
-    shipTo = []                               # 17 Product_ShippedTo
-    href = []                                 # 24 Product_Links
+    nm = 0                                    # *Total_Products (Should be Integer)
+    mktName = "Tor2door"                      # 0 *Marketplace_Name
+    vendor = []                               # 1 *Vendor y
+    rating_vendor = []                        # 2 Vendor_Rating
+    success = []                              # 3 Vendor_Successful_Transactions
+    name = []                                 # 4 *Product_Name y
+    CVE = []                                  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
+    MS = []                                   # 6 Product_MS_Classification (Microsoft Security)
+    category = []                             # 7 Product_Category y
+    describe = []                             # 8 Product_Description
+    views = []                                # 9 Product_Number_Of_Views
+    reviews = []                              # 10 Product_Number_Of_Reviews
+    rating_item = []                          # 11 Product_Rating
+    addDate = []                              # 12 Product_AddDate
+    BTC = []                                  # 13 Product_BTC_SellingPrice
+    USD = []                                  # 14 Product_USD_SellingPrice y
+    EURO = []                                 # 15 Product_EURO_SellingPrice
+    sold = []                                 # 16 Product_QuantitySold
+    qLeft =[]                                 # 17 Product_QuantityLeft
+    shipFrom = []                             # 18 Product_ShippedFrom
+    shipTo = []                               # 19 Product_ShippedTo
+    href = []                                 # 20 Product_Links
 
     listing = soup.findAll('div', {"class": "card product-card mb-3"})
 
@@ -160,7 +160,6 @@ def tor2door_listing_parser(soup):
 
         # Adding the url to the list of urls
         link = bae[0].get('href')
-        link = cleanLink(link)
         href.append(link)
 
         # Finding Product Name
diff --git a/path.txt b/path.txt
deleted file mode 100644
index 3992963..0000000
--- a/path.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-C:\Users\Helium\Desktop\Tor Browser\Browser\firefox.exe
-C:\Users\Helium\Desktop\Tor Browser\Browser\TorBrowser\Data\Browser\profile.default
-C:\Users\Helium\PycharmProjects\dw_pipeline_test\selenium\geckodriver.exe
\ No newline at end of file
diff --git a/setup.ini b/setup.ini
new file mode 100644
index 0000000..38c2347
--- /dev/null
+++ b/setup.ini
@@ -0,0 +1,14 @@
+[TOR]
+firefox_binary_path = C:\Users\calsyslab\Desktop\Tor Browser\Browser\firefox.exe
+firefox_profile_path = C:\Users\calsyslab\Desktop\Tor Browser\Browser\TorBrowser\Data\Browser\profile.default
+geckodriver_path = C:\Users\calsyslab\Projects\dw_pipeline_test\selenium\geckodriver.exe
+
+[Project]
+project_directory = C:\Users\calsyslab\Projects\dw_pipeline_test
+shared_folder = \\VBoxSvr\VM_Files_(shared)
+
+[PostgreSQL]
+ip = localhost
+username = postgres
+password = password
+database = darkweb_markets_forums
\ No newline at end of file