From f0003d4b386ecf7188ee01bb53a40bcd1264ed06 Mon Sep 17 00:00:00 2001
From: Helium <tigerlynn11@gmail.com>
Date: Thu, 26 Oct 2023 13:14:25 -0700
Subject: [PATCH] kingdom completed for initial testing, might need to create
 new account every once in a while. the og account was deleted

---
 MarketPlaces/Initialization/prepare_parser.py |   6 +
 MarketPlaces/Kingdom/crawler_selenium.py      | 121 ++++--------------
 MarketPlaces/Kingdom/parser.py                |  21 ++-
 3 files changed, 50 insertions(+), 98 deletions(-)

diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py
index f3c792a..982995f 100644
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@@ -15,6 +15,8 @@ from MarketPlaces.M00nkeyMarket.parser import *
 from MarketPlaces.MikesGrandStore.parser import *
 from MarketPlaces.PabloEscobarMarket.parser import *
 from MarketPlaces.CityMarket.parser import *
+from MarketPlaces.Kingdom.parser import *
+
 
 from MarketPlaces.Classifier.classify_product import predict
 
@@ -130,6 +132,8 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
             rw = pabloescobarmarket_listing_parser(soup)
         elif marketPlace == "CityMarket":
             rw = city_listing_parser(soup)
+        elif marketPlace == "Kingdom":
+            rw = kingdom_listing_parser(soup)
         else:
             print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
             raise Exception
@@ -164,6 +168,8 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile):
             rmm = pabloescobarmarket_description_parser(soup)
         elif marketPlace == "CityMarket":
             rmm = city_description_parser(soup)
+        elif marketPlace == "Kingdom":
+            rmm = kingdom_description_parser(soup)
         else:
             print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!")
             raise Exception
diff --git a/MarketPlaces/Kingdom/crawler_selenium.py b/MarketPlaces/Kingdom/crawler_selenium.py
index e6b489f..5385150 100644
--- a/MarketPlaces/Kingdom/crawler_selenium.py
+++ b/MarketPlaces/Kingdom/crawler_selenium.py
@@ -1,4 +1,4 @@
-__author__ = 'DarkWeb'
+__author__ = 'Helium'
 
 '''
 Kingdom Market Crawler (Selenium)
@@ -35,55 +35,27 @@ baseURL = 'http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion
 
 # Opens Tor Browser, crawls the website
 def startCrawling():
-    # marketName = getMarketName()
+    mktName = getMKTName()
     driver = getAccess()
 
     if driver != 'down':
         try:
-            captcha(driver)
             login(driver)
             crawlForum(driver)
         except Exception as e:
             print(driver.current_url, e)
         closeDriver(driver)
 
-    # new_parse(marketName, False)
+    new_parse(mktName, baseURL, True)
 
+# Login using premade account credentials and do login captcha manually
+def login(driver):
 
-def captcha(driver):
-    '''
-    # wait for captcha page
-    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
-        (By.XPATH, "/html/body/div/div[1]")))
-
-    # save captcha to local
-    driver.find_element(by=By.XPATH, value='/html/body/div/div[2]').screenshot(
-        r'..\Kingdom\captcha1.png')
-
-    # This method will show image in any image viewer
-    im = Image.open(r'..\Kingdom\captcha1.png')
-    im.show()
-
-    iframes = driver.find_elements(by=By.TAG_NAME, value='iframe')
-
-    # ask user input captcha solution in terminal
-    print("Enter squares from smallest to largest (squares are numbered 1-9 left to right)")
-    for order in ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']:
-        id = input(f"{order}: ")
-        iframes[int(id)-1].click()
-    '''
     input("Press ENTER when CAPTCHA is completed\n")
 
     # wait for login page
     WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
-        (By.XPATH, "/html/body/div/div/div[3]/div[1]/div/div/form/div[3]/div/div[1]/button")))
-
-
-# Login using premade account credentials and do login captcha manually
-def login(driver):
-    # wait for login page
-    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
-        (By.XPATH, "/html/body/div/div/div[3]/div[1]/div/div/form/div[3]/div/div[1]/button")))
+        (By.XPATH, '//*[@id="login-form"]')))
 
     # entering username and password into input boxes
     usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="loginformwithcaptcha-name"]')
@@ -96,39 +68,17 @@ def login(driver):
     select = Select(driver.find_element(by=By.XPATH, value='//*[@id="loginformwithcaptcha-sessiontime"]'))
     select.select_by_visible_text('24 hours')
 
-    '''
-    # wait for captcha page show up
-    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
-        (By.XPATH, '//*[@id="captcha"]')))
-
-    # save captcha to local
-    driver.find_element(by=By.XPATH, value='//*[@id="captcha"]').screenshot(r'..\Kingdom\captcha2.png')
-
-    # This method will show image in any image viewer
-    im = Image.open(r'..\Kingdom\captcha2.png')
-    im.show()
-
-    # wait until input space show up
-    inputBox = driver.find_element(by=By.XPATH, value='//*[@id="loginformwithcaptcha-captcha"]')
-
-    # ask user input captcha solution in terminal
-    userIn = input("Enter solution: ")
-
-    # send user solution into the input space
-    inputBox.send_keys(userIn)
-
-    # click the verify(submit) button
-    driver.find_element(by=By.XPATH, value="/html/body/div/div/div[3]/div[1]/div/div/form/div[3]/div/div[1]/button").click()
-    '''
-    input("Press ENTER when CAPTCHA is completed\n")
+    input("Press ENTER when CAPTCHA and DDOS is completed\n")
 
     # wait for listing page show up (This Xpath may need to change based on different seed url)
     WebDriverWait(driver, 50).until(EC.visibility_of_element_located(
-        (By.XPATH, '/html/body/div/div/div[3]/div[2]')))
+        (By.XPATH, '/html/body/div/div/div[3]/div[1]/div/div[3]')))
+
+
 
 
 # Returns the name of the website
-def getMarketName():
+def getMKTName():
     name = 'Kingdom'
     return name
 
@@ -236,30 +186,17 @@ def getInterestedLinks():
     links = []
 
     # Software and Malware
-    links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=127&t=c298a77d9e93ad32')
+    links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=127&t=597a56b9a0b3e0d0')
     # # Services
-    # links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=45&t=c298a77d9e93ad32')
-    # # Exploits
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=45')
-    # # Tools
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=46')
-    # # Malware
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=47')
-    # # Cryptography
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=48')
-    # # Others
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=49')
-    # # Hacking Tutorials
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=50')
-    # # Hacked Accounts and Database Dumps
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=30')
-    # # Android Moded pak
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53')
+    links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=45&t=597a56b9a0b3e0d0')
+    # # guides and tutorials
+    links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=107&t=597a56b9a0b3e0d0')
 
     return links
 
 
 def crawlForum(driver):
+
     print("Crawling the Kingdom market")
 
     linksToCrawl = getInterestedLinks()
@@ -281,6 +218,7 @@ def crawlForum(driver):
                 savePage(driver, html, link)
 
                 list = productPages(html)
+
                 for item in list:
                     itemURL = urlparse.urljoin(baseURL, str(item))
                     try:
@@ -290,18 +228,15 @@ def crawlForum(driver):
                     savePage(driver, driver.page_source, item)
                     driver.back()
 
-                    # comment out
-                    break
-
-                # comment out
-                if count == 1:
-                    break
+                #     # comment out
+                #     break
+                #
+                # # comment out
+                # if count == 1:
+                #     break
 
                 try:
-                    temp = driver.find_element(by=By.XPATH, value=
-                        '/html/body/div/div/div[3]/div[2]/div[2]/div/div/ul')
-                    next = temp.find_element_by_class_name("next")
-                    link = link.find_element_by_tag_name('a').get_attribute('href')
+                    link = driver.find_element(by=By.XPATH, value='/html/body/div/div/div[3]/div[2]/div[2]/div[3]/div/ul/li[13]/a').get_attribute('href')
                     if link == "":
                         raise NoSuchElementException
                     count += 1
@@ -313,7 +248,7 @@ def crawlForum(driver):
             print(link, e)
         i += 1
 
-    input("Crawling Kingdom Market done sucessfully. Press ENTER to continue\n")
+        print("Crawling the Kingdom market done.")
 
 
 # Returns 'True' if the link is Topic link
@@ -325,7 +260,7 @@ def isDescriptionLink(url):
 
 # Returns True if the link is a listingPage link
 def isListingLink(url):
-    if 'category' in url:
+    if 'filter_category' in url:
         return True
     return False
 
@@ -333,10 +268,8 @@ def isListingLink(url):
 # calling the parser to define the links
 def productPages(html):
     soup = BeautifulSoup(html, "html.parser")
-    #print(soup.find('div', id="container").find('div', id="content").find('table', {"class": "tborder clear"}).find('tbody').find('tr',{"class": "inline_row"}).find('strong').text)
     return kingdom_links_parser(soup)
 
 
 def crawler():
-    startCrawling()
-    # print("Crawling and Parsing BestCardingWorld .... DONE!")
+    startCrawling()
\ No newline at end of file
diff --git a/MarketPlaces/Kingdom/parser.py b/MarketPlaces/Kingdom/parser.py
index b1e05d5..abade27 100644
--- a/MarketPlaces/Kingdom/parser.py
+++ b/MarketPlaces/Kingdom/parser.py
@@ -1,4 +1,4 @@
-__author__ = 'DarkWeb'
+__author__ = 'Helium'
 
 # Here, we are importing the auxiliary functions to clean or convert data
 from MarketPlaces.Utilities.utilities import *
@@ -31,6 +31,8 @@ def kingdom_description_parser(soup):
     left = "-1"                         # 16 Product_QuantityLeft
     shipFrom = "-1"                     # 17 Product_ShippedFrom
     shipTo = "-1"                       # 18 Product_ShippedTo
+    image = "-1"  # 19 Product_Image
+    vendor_image = "-1"  # 20 Vendor_Image
 
     # Finding Product Name
 
@@ -95,7 +97,7 @@ def kingdom_description_parser(soup):
     # Populating the final variable (this should be a list with all fields scraped)
 
     row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
-           BTC, USD, EURO, sold, left, shipFrom, shipTo)
+           BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
 
     # Sending the results
 
@@ -126,7 +128,9 @@ def kingdom_listing_parser(soup):
     qLeft =[]                                 # 17 Product_QuantityLeft
     shipFrom = []                             # 18 Product_ShippedFrom
     shipTo = []                               # 19 Product_ShippedTo
-    href = []                                 # 20 Product_Links
+    image = []  # 20 Product_Image
+    image_vendor = []  # 21 Vendor_Image
+    href = []  # 22 Product_Links
 
     listing = soup.find('div', {"id": "p0"}).find('div').find_all('div', {"class": "row"}, recursive=False)
 
@@ -153,12 +157,20 @@ def kingdom_listing_parser(soup):
         product = product.strip()
         name.append(product)
 
+        # Finding Product Image
+        product_image = a.find('img')
+        product_image = product_image.get('src')
+        product_image = product_image.split('base64,')[-1]
+        image.append(product_image)
+
         # Finding the Vendor
         vendor_name = a.select_one('a[href^="/user"]').text
         vendor_name = vendor_name.replace(",", " ").replace('/', '')
         vendor_name = vendor_name.strip()
         vendor.append(vendor_name)
 
+        image_vendor.append("-1")
+
         # Adding the url to the list of urls
         link = a.find('div', {"class": "col-md-7"}).select_one('a[href^="/offer/view?"]')['href']
         link = cleanLink(link)
@@ -169,7 +181,8 @@ def kingdom_listing_parser(soup):
 
     # Populate the final variable (this should be a list with all fields scraped)
     return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
-                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
+                            reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href,
+                            image, image_vendor)
 
 
 def kingdom_links_parser(soup):