From 07bfc887b18581f7ce02dab9befb2e10abc72899 Mon Sep 17 00:00:00 2001
From: westernmeadow <wkwan626@gmail.com>
Date: Wed, 25 Oct 2023 16:30:57 -0700
Subject: [PATCH 1/5] don't use cleanLink

---
 MarketPlaces/DarkBazar/crawler_selenium.py | 12 ++++++------
 MarketPlaces/DarkBazar/parser.py           |  1 -
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/MarketPlaces/DarkBazar/crawler_selenium.py b/MarketPlaces/DarkBazar/crawler_selenium.py
index fdfb640..d351c42 100644
--- a/MarketPlaces/DarkBazar/crawler_selenium.py
+++ b/MarketPlaces/DarkBazar/crawler_selenium.py
@@ -216,12 +216,12 @@ def crawlForum(driver):
                     savePage(driver, driver.page_source, item)
                     driver.back()
 
-                #     # comment out
-                #     break
-                #
-                # # comment out
-                # if count == 1:
-                #     break
+                    # comment out
+                    break
+
+                # comment out
+                if count == 1:
+                    break
 
                 try:
                     link = driver.find_element(by=By.XPATH, value='//a[contains(text(), "Next")]').get_attribute('href')
diff --git a/MarketPlaces/DarkBazar/parser.py b/MarketPlaces/DarkBazar/parser.py
index 9386d18..3d56e92 100644
--- a/MarketPlaces/DarkBazar/parser.py
+++ b/MarketPlaces/DarkBazar/parser.py
@@ -170,7 +170,6 @@ def darkbazar_listing_parser(soup):
 
         # Adding the url to the list of urls
         link = bae[0].get('href')
-        link = cleanLink(link)
         href.append(link)
 
         # Finding the Product

From f0003d4b386ecf7188ee01bb53a40bcd1264ed06 Mon Sep 17 00:00:00 2001
From: Helium <tigerlynn11@gmail.com>
Date: Thu, 26 Oct 2023 13:14:25 -0700
Subject: [PATCH 2/5] kingdom completed for initial testing, might need to
 create new account every once in a while. the og account was deleted

---
 MarketPlaces/Initialization/prepare_parser.py |   6 +
 MarketPlaces/Kingdom/crawler_selenium.py      | 121 ++++--------------
 MarketPlaces/Kingdom/parser.py                |  21 ++-
 3 files changed, 50 insertions(+), 98 deletions(-)

diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py
index f3c792a..982995f 100644
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@@ -15,6 +15,8 @@ from MarketPlaces.M00nkeyMarket.parser import *
 from MarketPlaces.MikesGrandStore.parser import *
 from MarketPlaces.PabloEscobarMarket.parser import *
 from MarketPlaces.CityMarket.parser import *
+from MarketPlaces.Kingdom.parser import *
+
 
 from MarketPlaces.Classifier.classify_product import predict
 
@@ -130,6 +132,8 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
             rw = pabloescobarmarket_listing_parser(soup)
         elif marketPlace == "CityMarket":
             rw = city_listing_parser(soup)
+        elif marketPlace == "Kingdom":
+            rw = kingdom_listing_parser(soup)
         else:
             print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
             raise Exception
@@ -164,6 +168,8 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile):
             rmm = pabloescobarmarket_description_parser(soup)
         elif marketPlace == "CityMarket":
             rmm = city_description_parser(soup)
+        elif marketPlace == "Kingdom":
+            rmm = kingdom_description_parser(soup)
         else:
             print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!")
             raise Exception
diff --git a/MarketPlaces/Kingdom/crawler_selenium.py b/MarketPlaces/Kingdom/crawler_selenium.py
index e6b489f..5385150 100644
--- a/MarketPlaces/Kingdom/crawler_selenium.py
+++ b/MarketPlaces/Kingdom/crawler_selenium.py
@@ -1,4 +1,4 @@
-__author__ = 'DarkWeb'
+__author__ = 'Helium'
 
 '''
 Kingdom Market Crawler (Selenium)
@@ -35,55 +35,27 @@ baseURL = 'http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion
 
 # Opens Tor Browser, crawls the website
 def startCrawling():
-    # marketName = getMarketName()
+    mktName = getMKTName()
     driver = getAccess()
 
     if driver != 'down':
         try:
-            captcha(driver)
             login(driver)
             crawlForum(driver)
         except Exception as e:
             print(driver.current_url, e)
         closeDriver(driver)
 
-    # new_parse(marketName, False)
+    new_parse(mktName, baseURL, True)
 
+# Login using premade account credentials and do login captcha manually
+def login(driver):
 
-def captcha(driver):
-    '''
-    # wait for captcha page
-    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
-        (By.XPATH, "/html/body/div/div[1]")))
-
-    # save captcha to local
-    driver.find_element(by=By.XPATH, value='/html/body/div/div[2]').screenshot(
-        r'..\Kingdom\captcha1.png')
-
-    # This method will show image in any image viewer
-    im = Image.open(r'..\Kingdom\captcha1.png')
-    im.show()
-
-    iframes = driver.find_elements(by=By.TAG_NAME, value='iframe')
-
-    # ask user input captcha solution in terminal
-    print("Enter squares from smallest to largest (squares are numbered 1-9 left to right)")
-    for order in ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']:
-        id = input(f"{order}: ")
-        iframes[int(id)-1].click()
-    '''
     input("Press ENTER when CAPTCHA is completed\n")
 
     # wait for login page
     WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
-        (By.XPATH, "/html/body/div/div/div[3]/div[1]/div/div/form/div[3]/div/div[1]/button")))
-
-
-# Login using premade account credentials and do login captcha manually
-def login(driver):
-    # wait for login page
-    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
-        (By.XPATH, "/html/body/div/div/div[3]/div[1]/div/div/form/div[3]/div/div[1]/button")))
+        (By.XPATH, '//*[@id="login-form"]')))
 
     # entering username and password into input boxes
     usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="loginformwithcaptcha-name"]')
@@ -96,39 +68,17 @@ def login(driver):
     select = Select(driver.find_element(by=By.XPATH, value='//*[@id="loginformwithcaptcha-sessiontime"]'))
     select.select_by_visible_text('24 hours')
 
-    '''
-    # wait for captcha page show up
-    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
-        (By.XPATH, '//*[@id="captcha"]')))
-
-    # save captcha to local
-    driver.find_element(by=By.XPATH, value='//*[@id="captcha"]').screenshot(r'..\Kingdom\captcha2.png')
-
-    # This method will show image in any image viewer
-    im = Image.open(r'..\Kingdom\captcha2.png')
-    im.show()
-
-    # wait until input space show up
-    inputBox = driver.find_element(by=By.XPATH, value='//*[@id="loginformwithcaptcha-captcha"]')
-
-    # ask user input captcha solution in terminal
-    userIn = input("Enter solution: ")
-
-    # send user solution into the input space
-    inputBox.send_keys(userIn)
-
-    # click the verify(submit) button
-    driver.find_element(by=By.XPATH, value="/html/body/div/div/div[3]/div[1]/div/div/form/div[3]/div/div[1]/button").click()
-    '''
-    input("Press ENTER when CAPTCHA is completed\n")
+    input("Press ENTER when CAPTCHA and DDOS is completed\n")
 
     # wait for listing page show up (This Xpath may need to change based on different seed url)
     WebDriverWait(driver, 50).until(EC.visibility_of_element_located(
-        (By.XPATH, '/html/body/div/div/div[3]/div[2]')))
+        (By.XPATH, '/html/body/div/div/div[3]/div[1]/div/div[3]')))
+
+
 
 
 # Returns the name of the website
-def getMarketName():
+def getMKTName():
     name = 'Kingdom'
     return name
 
@@ -236,30 +186,17 @@ def getInterestedLinks():
     links = []
 
     # Software and Malware
-    links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=127&t=c298a77d9e93ad32')
+    links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=127&t=597a56b9a0b3e0d0')
     # # Services
-    # links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=45&t=c298a77d9e93ad32')
-    # # Exploits
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=45')
-    # # Tools
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=46')
-    # # Malware
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=47')
-    # # Cryptography
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=48')
-    # # Others
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=49')
-    # # Hacking Tutorials
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=50')
-    # # Hacked Accounts and Database Dumps
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=30')
-    # # Android Moded pak
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53')
+    links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=45&t=597a56b9a0b3e0d0')
+    # # guides and tutorials
+    links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=107&t=597a56b9a0b3e0d0')
 
     return links
 
 
 def crawlForum(driver):
+
     print("Crawling the Kingdom market")
 
     linksToCrawl = getInterestedLinks()
@@ -281,6 +218,7 @@ def crawlForum(driver):
                 savePage(driver, html, link)
 
                 list = productPages(html)
+
                 for item in list:
                     itemURL = urlparse.urljoin(baseURL, str(item))
                     try:
@@ -290,18 +228,15 @@ def crawlForum(driver):
                     savePage(driver, driver.page_source, item)
                     driver.back()
 
-                    # comment out
-                    break
-
-                # comment out
-                if count == 1:
-                    break
+                #     # comment out
+                #     break
+                #
+                # # comment out
+                # if count == 1:
+                #     break
 
                 try:
-                    temp = driver.find_element(by=By.XPATH, value=
-                        '/html/body/div/div/div[3]/div[2]/div[2]/div/div/ul')
-                    next = temp.find_element_by_class_name("next")
-                    link = link.find_element_by_tag_name('a').get_attribute('href')
+                    link = driver.find_element(by=By.XPATH, value='/html/body/div/div/div[3]/div[2]/div[2]/div[3]/div/ul/li[13]/a').get_attribute('href')
                     if link == "":
                         raise NoSuchElementException
                     count += 1
@@ -313,7 +248,7 @@ def crawlForum(driver):
             print(link, e)
         i += 1
 
-    input("Crawling Kingdom Market done sucessfully. Press ENTER to continue\n")
+        print("Crawling the Kingdom market done.")
 
 
 # Returns 'True' if the link is Topic link
@@ -325,7 +260,7 @@ def isDescriptionLink(url):
 
 # Returns True if the link is a listingPage link
 def isListingLink(url):
-    if 'category' in url:
+    if 'filter_category' in url:
         return True
     return False
 
@@ -333,10 +268,8 @@ def isListingLink(url):
 # calling the parser to define the links
 def productPages(html):
     soup = BeautifulSoup(html, "html.parser")
-    #print(soup.find('div', id="container").find('div', id="content").find('table', {"class": "tborder clear"}).find('tbody').find('tr',{"class": "inline_row"}).find('strong').text)
     return kingdom_links_parser(soup)
 
 
 def crawler():
-    startCrawling()
-    # print("Crawling and Parsing BestCardingWorld .... DONE!")
+    startCrawling()
\ No newline at end of file
diff --git a/MarketPlaces/Kingdom/parser.py b/MarketPlaces/Kingdom/parser.py
index b1e05d5..abade27 100644
--- a/MarketPlaces/Kingdom/parser.py
+++ b/MarketPlaces/Kingdom/parser.py
@@ -1,4 +1,4 @@
-__author__ = 'DarkWeb'
+__author__ = 'Helium'
 
 # Here, we are importing the auxiliary functions to clean or convert data
 from MarketPlaces.Utilities.utilities import *
@@ -31,6 +31,8 @@ def kingdom_description_parser(soup):
     left = "-1"                         # 16 Product_QuantityLeft
     shipFrom = "-1"                     # 17 Product_ShippedFrom
     shipTo = "-1"                       # 18 Product_ShippedTo
+    image = "-1"  # 19 Product_Image
+    vendor_image = "-1"  # 20 Vendor_Image
 
     # Finding Product Name
 
@@ -95,7 +97,7 @@ def kingdom_description_parser(soup):
     # Populating the final variable (this should be a list with all fields scraped)
 
     row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
-           BTC, USD, EURO, sold, left, shipFrom, shipTo)
+           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
 
     # Sending the results
 
@@ -126,7 +128,9 @@ def kingdom_listing_parser(soup):
     qLeft =[]                                 # 17 Product_QuantityLeft
     shipFrom = []                             # 18 Product_ShippedFrom
     shipTo = []                               # 19 Product_ShippedTo
-    href = []                                 # 20 Product_Links
+    image = []  # 20 Product_Image
+    image_vendor = []  # 21 Vendor_Image
+    href = []  # 22 Product_Links
 
     listing = soup.find('div', {"id": "p0"}).find('div').find_all('div', {"class": "row"}, recursive=False)
 
@@ -153,12 +157,20 @@ def kingdom_listing_parser(soup):
         product = product.strip()
         name.append(product)
 
+        # Finding Product Image
+        product_image = a.find('img')
+        product_image = product_image.get('src')
+        product_image = product_image.split('base64,')[-1]
+        image.append(product_image)
+
         # Finding the Vendor
         vendor_name = a.select_one('a[href^="/user"]').text
         vendor_name = vendor_name.replace(",", " ").replace('/', '')
         vendor_name = vendor_name.strip()
         vendor.append(vendor_name)
 
+        image_vendor.append("-1")
+
         # Adding the url to the list of urls
         link = a.find('div', {"class": "col-md-7"}).select_one('a[href^="/offer/view?"]')['href']
         link = cleanLink(link)
@@ -169,7 +181,8 @@ def kingdom_listing_parser(soup):
 
     # Populate the final variable (this should be a list with all fields scraped)
     return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
-                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
+                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href,
+                            image, image_vendor)
 
 
 def kingdom_links_parser(soup):

From 1d091b944acb9f2f8e01b2a377cedab5201c9076 Mon Sep 17 00:00:00 2001
From: westernmeadow <wkwan626@gmail.com>
Date: Fri, 27 Oct 2023 14:34:07 -0700
Subject: [PATCH 3/5] removed indent

---
 MarketPlaces/DarkBazar/crawler_selenium.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MarketPlaces/DarkBazar/crawler_selenium.py b/MarketPlaces/DarkBazar/crawler_selenium.py
index d351c42..dac91b0 100644
--- a/MarketPlaces/DarkBazar/crawler_selenium.py
+++ b/MarketPlaces/DarkBazar/crawler_selenium.py
@@ -236,7 +236,7 @@ def crawlForum(driver):
             print(link, e)
         i += 1
 
-        print("Crawling the DarkBazar market done.")
+    print("Crawling the DarkBazar market done.")
 
 
 # Returns 'True' if the link is Topic link, may need to change for every website

From b084d76d3ed6277bbb6e030828be564503321d0c Mon Sep 17 00:00:00 2001
From: westernmeadow <wkwan626@gmail.com>
Date: Fri, 27 Oct 2023 15:13:58 -0700
Subject: [PATCH 4/5] listings, descriptions, and reference date

---
 Forums/DB_Connection/db_connection.py         | 19 +++++++++------
 Forums/Initialization/prepare_parser.py       |  8 +++++--
 MarketPlaces/DB_Connection/db_connection.py   | 24 ++++++++++++-------
 MarketPlaces/Initialization/prepare_parser.py |  8 +++++--
 4 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/Forums/DB_Connection/db_connection.py b/Forums/DB_Connection/db_connection.py
index f0d4ed6..dfdec49 100644
--- a/Forums/DB_Connection/db_connection.py
+++ b/Forums/DB_Connection/db_connection.py
@@ -3,7 +3,7 @@ __author__ = 'DarkWeb'
 import psycopg2
 import traceback
 from Forums.Utilities.utilities import *
-
+from dateutil.relativedelta import relativedelta, FR
 
 def connectDataBase():
 
@@ -484,21 +484,25 @@ def create_posts(cur, row, forumId, topicId):
                                       'dateinserted_post': row[8],
                                       'postId': postId})
 
-def create_status(cur, forumId, date, status):
+def create_status(cur, forumId, date, listings, descriptions, status):
 
     date = datetime.strptime(date, "%m%d%Y")
 
+    # getting last Fridays a reference date
+    date_reference = date + relativedelta(weekday=FR(-1))
+
     # checking if status already exists
     sql = "select * from forums_status where forum_id = %(forum_id)s and date_inserted = %(date_inserted)s"
     cur.execute(sql, {'forum_id': forumId, 'date_inserted': date})
 
     recset = cur.fetchall()
     if recset:
-       sql = "Update forums_status set status = %(status)s where forum_id = %(forum_id)s and date_inserted = %(date_inserted)s"
-       recset = {'status': status, 'forum_id': forumId, 'date_inserted': date}
+       sql = "Update forums_status set listings = %(listings)s, descriptions = %(descriptions)s, status = %(status)s, date_reference = %(date_reference)s " \
+             "where forum_id = %(forum_id)s and date_inserted = %(date_inserted)s"
+       recset = {'listings': listings, 'descriptions': descriptions, 'status': status, 'date_reference': date_reference, 'forum_id': forumId, 'date_inserted': date}
     else:
-       sql = "Insert into forums_status (forum_id, date_inserted, status) Values (%s, %s, %s)"
-       recset = [forumId, date, status]
+       sql = "Insert into forums_status (forum_id, date_inserted, listings, descriptions, status, date_reference) Values (%s, %s, %s, %s, %s, %s)"
+       recset = [forumId, date, listings, descriptions, status, date_reference]
 
     cur.execute(sql, recset)
 
@@ -514,7 +518,8 @@ def create_database(cur, con):
         sql = "create unique index unique_forum ON forums USING btree (name_forum ASC NULLS LAST)"
         cur.execute(sql)
 
-        sql = "Create table forums_status (forum_id integer NOT NULL, date_inserted date NOT NULL, status bit(1) NOT NULL, " \
+        sql = "Create table forums_status (forum_id integer NOT NULL, date_inserted date NOT NULL, " \
+              "listings integer NOT NULL, descriptions integer NOT NULL, status bit(1) NOT NULL, date_reference date NOT NULL " \
               "CONSTRAINT forums_log_pkey PRIMARY KEY (forum_id, date_inserted), " \
               "CONSTRAINT forums_fk FOREIGN KEY (forum_id) REFERENCES forums (forum_id))"
         cur.execute(sql)
diff --git a/Forums/Initialization/prepare_parser.py b/Forums/Initialization/prepare_parser.py
index 1f55319..31982fd 100644
--- a/Forums/Initialization/prepare_parser.py
+++ b/Forums/Initialization/prepare_parser.py
@@ -341,10 +341,14 @@ def new_parse(forum, url, createLog):
                 # move listing files of completed folder
                 move_file(listingFile, createLog, logFile)
 
-    # registering the current forum status (up/down) in the database
+    # registering the current forum status (up/down) and the number of scraped pages in the database
     forumId = verifyForum(cur, forum)
     if (forumId > 0):
-        create_status(cur, forumId, CURRENT_DATE, '1' if len(listings) > 0 else '0')
+
+        readListings = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing\\read", '*.htm'))
+        readDescriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description\\read", '*.htm'))
+
+        create_status(cur, forumId, CURRENT_DATE, len(readListings), len(readDescriptions), '1' if len(listings) > 0 else '0')
         con.commit()
 
     if createLog:
diff --git a/MarketPlaces/DB_Connection/db_connection.py b/MarketPlaces/DB_Connection/db_connection.py
index 4f439f0..2f3341a 100644
--- a/MarketPlaces/DB_Connection/db_connection.py
+++ b/MarketPlaces/DB_Connection/db_connection.py
@@ -4,7 +4,7 @@ import psycopg2
 import traceback
 import configparser
 from MarketPlaces.Utilities.utilities import *
-
+from dateutil.relativedelta import relativedelta, FR
 
 def connectDataBase():
 
@@ -273,6 +273,8 @@ def create_items(cur, row, marketId, vendorId):
 
     if newItem:
 
+        # decode_decrypt_image_in_base64(row[20])
+
         sql = "Insert into items (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \
               "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \
               "quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \
@@ -312,7 +314,7 @@ def create_items(cur, row, marketId, vendorId):
 
             recset = cur.fetchall()
 
-            # decode_decrypt_image_in_base64(recset[0][20])
+            # decode_decrypt_image_in_base64(recset[0]['image_item'])
 
             if (str(recset[0]['description_item']) != str(row[5] if row[5] != '-1' else None) or
                 str(recset[0]['cve_item']) != str(row[6] if row[6] != '-1' else None) or
@@ -401,24 +403,27 @@ def create_items(cur, row, marketId, vendorId):
 
     return itemId
 
-def create_status(cur, marketId, date, status):
+def create_status(cur, marketId, date, listings, descriptions, status):
 
     date = datetime.strptime(date, "%m%d%Y")
 
+    # getting last Fridays a reference date
+    date_reference = date + relativedelta(weekday=FR(-1))
+
     # checking if status already exists
     sql = "select * from marketplaces_status where market_id = %(market_id)s and date_inserted = %(date_inserted)s"
     cur.execute(sql, {'market_id': marketId, 'date_inserted': date})
 
     recset = cur.fetchall()
     if recset:
-       sql = "Update marketplaces_status set status = %(status)s where market_id = %(market_id)s and date_inserted = %(date_inserted)s"
-       recset = {'status': status, 'market_id': marketId, 'date_inserted': date}
+       sql = "Update marketplaces_status set listings = %(listings)s, descriptions = %(descriptions)s, status = %(status)s, date_reference = %(date_reference)s " \
+             "where market_id = %(market_id)s and date_inserted = %(date_inserted)s"
+       recset = {'listings': listings, 'descriptions': descriptions, 'status': status, 'date_reference': date_reference, 'market_id': marketId, 'date_inserted': date}
     else:
-       sql = "Insert into marketplaces_status (market_id, date_inserted, status) Values (%s, %s, %s)"
-       recset = [marketId, date, status]
+       sql = "Insert into marketplaces_status (market_id, date_inserted, listings, descriptions, status, date_reference) Values (%s, %s, %s, %s, %s, %s)"
+       recset = [marketId, date, listings, descriptions, status, date_reference]
 
     cur.execute(sql, recset)
-
 def create_database(cur, con):
 
     try:
@@ -431,7 +436,8 @@ def create_database(cur, con):
         sql = "create unique index unique_market ON marketplaces USING btree (name_market ASC NULLS LAST)"
         cur.execute(sql)
 
-        sql = "Create table marketplaces_status (market_id integer NOT NULL, date_inserted date NOT NULL, status bit(1) NOT NULL, " \
+        sql = "Create table marketplaces_status (market_id integer NOT NULL, date_inserted date NOT NULL, " \
+              "listings integer NOT NULL, descriptions integer NOT NULL, status bit(1) NOT NULL, date_reference date NOT NULL " \
               "CONSTRAINT marketplaces_log_pkey PRIMARY KEY (market_id, date_inserted), " \
               "CONSTRAINT marketplaces_fk FOREIGN KEY (market_id) REFERENCES marketplaces (market_id))"
         cur.execute(sql)
diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py
index 982995f..7c35f5a 100644
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@@ -369,10 +369,14 @@ def new_parse(marketPlace, url, createLog):
                 # move listing files of completed folder
                 move_file(listingFile, createLog, logFile)
 
-    # registering the current market status (up/down) in the database
+    # registering the current forum status (up/down) and the number of scraped pages in the database
     marketId = verifyMarketPlace(cur, marketPlace)
     if (marketId > 0):
-        create_status(cur, marketId, CURRENT_DATE, '1' if len(listings) > 0 else '0')
+
+        readListings = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing\\read", '*.htm'))
+        readDescriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description\\read", '*.htm'))
+
+        create_status(cur, marketId, CURRENT_DATE, len(readListings), len(readDescriptions), '1' if len(listings) > 0 else '0')
         con.commit()
 
     if createLog:

From c976032cc40945cf8a660ab9551e6366a95f6927 Mon Sep 17 00:00:00 2001
From: westernmeadow <wkwan626@gmail.com>
Date: Fri, 27 Oct 2023 15:35:31 -0700
Subject: [PATCH 5/5] small fixes

---
 Forums/Initialization/prepare_parser.py       |  4 ++--
 MarketPlaces/Initialization/prepare_parser.py | 15 ++++++++++++---
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/Forums/Initialization/prepare_parser.py b/Forums/Initialization/prepare_parser.py
index 31982fd..b86b5c6 100644
--- a/Forums/Initialization/prepare_parser.py
+++ b/Forums/Initialization/prepare_parser.py
@@ -345,8 +345,8 @@ def new_parse(forum, url, createLog):
     forumId = verifyForum(cur, forum)
     if (forumId > 0):
 
-        readListings = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing\\read", '*.htm'))
-        readDescriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description\\read", '*.htm'))
+        readListings = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing\\read", '*.html'))
+        readDescriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description\\read", '*.html'))
 
         create_status(cur, forumId, CURRENT_DATE, len(readListings), len(readDescriptions), '1' if len(listings) > 0 else '0')
         con.commit()
diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py
index 7c35f5a..de6cc79 100644
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@@ -15,9 +15,10 @@ from MarketPlaces.M00nkeyMarket.parser import *
 from MarketPlaces.MikesGrandStore.parser import *
 from MarketPlaces.PabloEscobarMarket.parser import *
 from MarketPlaces.CityMarket.parser import *
+from MarketPlaces.DarkBazar.parser import *
+from MarketPlaces.Sonanza.parser import *
 from MarketPlaces.Kingdom.parser import *
 
-
 from MarketPlaces.Classifier.classify_product import predict
 
 nError = 0
@@ -132,6 +133,10 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
             rw = pabloescobarmarket_listing_parser(soup)
         elif marketPlace == "CityMarket":
             rw = city_listing_parser(soup)
+        elif marketPlace == "DarkBazar":
+            rw = darkbazar_listing_parser(soup)
+        elif marketPlace == "Sonanza":
+            rw = sonanza_listing_parser(soup)
         elif marketPlace == "Kingdom":
             rw = kingdom_listing_parser(soup)
         else:
@@ -168,6 +173,10 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile):
             rmm = pabloescobarmarket_description_parser(soup)
         elif marketPlace == "CityMarket":
             rmm = city_description_parser(soup)
+        elif marketPlace == "DarkBazar":
+            rmm = darkbazar_description_parser(soup)
+        elif marketPlace == "Sonanza":
+            rmm = sonanza_description_parser(soup)
         elif marketPlace == "Kingdom":
             rmm = kingdom_description_parser(soup)
         else:
@@ -373,8 +382,8 @@ def new_parse(marketPlace, url, createLog):
     marketId = verifyMarketPlace(cur, marketPlace)
     if (marketId > 0):
 
-        readListings = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing\\read", '*.htm'))
-        readDescriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description\\read", '*.htm'))
+        readListings = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing\\read", '*.html'))
+        readDescriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description\\read", '*.html'))
 
         create_status(cur, marketId, CURRENT_DATE, len(readListings), len(readDescriptions), '1' if len(listings) > 0 else '0')
         con.commit()