From f0003d4b386ecf7188ee01bb53a40bcd1264ed06 Mon Sep 17 00:00:00 2001 From: Helium Date: Thu, 26 Oct 2023 13:14:25 -0700 Subject: [PATCH] kingdom completed for initial testing, might need to create new account every once in a while. the og account was deleted --- MarketPlaces/Initialization/prepare_parser.py | 6 + MarketPlaces/Kingdom/crawler_selenium.py | 121 ++++-------------- MarketPlaces/Kingdom/parser.py | 21 ++- 3 files changed, 50 insertions(+), 98 deletions(-) diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py index f3c792a..982995f 100644 --- a/MarketPlaces/Initialization/prepare_parser.py +++ b/MarketPlaces/Initialization/prepare_parser.py @@ -15,6 +15,8 @@ from MarketPlaces.M00nkeyMarket.parser import * from MarketPlaces.MikesGrandStore.parser import * from MarketPlaces.PabloEscobarMarket.parser import * from MarketPlaces.CityMarket.parser import * +from MarketPlaces.Kingdom.parser import * + from MarketPlaces.Classifier.classify_product import predict @@ -130,6 +132,8 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile): rw = pabloescobarmarket_listing_parser(soup) elif marketPlace == "CityMarket": rw = city_listing_parser(soup) + elif marketPlace == "Kingdom": + rw = kingdom_listing_parser(soup) else: print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!") raise Exception @@ -164,6 +168,8 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile): rmm = pabloescobarmarket_description_parser(soup) elif marketPlace == "CityMarket": rmm = city_description_parser(soup) + elif marketPlace == "Kingdom": + rmm = kingdom_description_parser(soup) else: print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!") raise Exception diff --git a/MarketPlaces/Kingdom/crawler_selenium.py b/MarketPlaces/Kingdom/crawler_selenium.py index e6b489f..5385150 100644 --- a/MarketPlaces/Kingdom/crawler_selenium.py +++ b/MarketPlaces/Kingdom/crawler_selenium.py @@ -1,4 +1,4 @@ -__author__ = 'DarkWeb' +__author__ = 'Helium' ''' Kingdom Market Crawler (Selenium) @@ -35,55 +35,27 @@ baseURL = 'http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion # Opens Tor Browser, crawls the website def startCrawling(): - # marketName = getMarketName() + mktName = getMKTName() driver = getAccess() if driver != 'down': try: - captcha(driver) login(driver) crawlForum(driver) except Exception as e: print(driver.current_url, e) closeDriver(driver) - # new_parse(marketName, False) + new_parse(mktName, baseURL, True) +# Login using premade account credentials and do login captcha manually +def login(driver): -def captcha(driver): - ''' - # wait for captcha page - WebDriverWait(driver, 100).until(EC.visibility_of_element_located( - (By.XPATH, "/html/body/div/div[1]"))) - - # save captcha to local - driver.find_element(by=By.XPATH, value='/html/body/div/div[2]').screenshot( - r'..\Kingdom\captcha1.png') - - # This method will show image in any image viewer - im = Image.open(r'..\Kingdom\captcha1.png') - im.show() - - iframes = driver.find_elements(by=By.TAG_NAME, value='iframe') - - # ask user input captcha solution in terminal - print("Enter squares from smallest to largest (squares are numbered 1-9 left to right)") - for order in ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']: - id = input(f"{order}: ") - iframes[int(id)-1].click() - ''' input("Press ENTER when CAPTCHA is completed\n") # wait for login page WebDriverWait(driver, 100).until(EC.visibility_of_element_located( - (By.XPATH, "/html/body/div/div/div[3]/div[1]/div/div/form/div[3]/div/div[1]/button"))) - - -# Login using premade account credentials and do login captcha manually -def login(driver): - # wait for login page - WebDriverWait(driver, 100).until(EC.visibility_of_element_located( - (By.XPATH, "/html/body/div/div/div[3]/div[1]/div/div/form/div[3]/div/div[1]/button"))) + (By.XPATH, '//*[@id="login-form"]'))) # entering username and password into input boxes usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="loginformwithcaptcha-name"]') @@ -96,39 +68,17 @@ def login(driver): select = Select(driver.find_element(by=By.XPATH, value='//*[@id="loginformwithcaptcha-sessiontime"]')) select.select_by_visible_text('24 hours') - ''' - # wait for captcha page show up - WebDriverWait(driver, 100).until(EC.visibility_of_element_located( - (By.XPATH, '//*[@id="captcha"]'))) - - # save captcha to local - driver.find_element(by=By.XPATH, value='//*[@id="captcha"]').screenshot(r'..\Kingdom\captcha2.png') - - # This method will show image in any image viewer - im = Image.open(r'..\Kingdom\captcha2.png') - im.show() - - # wait until input space show up - inputBox = driver.find_element(by=By.XPATH, value='//*[@id="loginformwithcaptcha-captcha"]') - - # ask user input captcha solution in terminal - userIn = input("Enter solution: ") - - # send user solution into the input space - inputBox.send_keys(userIn) - - # click the verify(submit) button - driver.find_element(by=By.XPATH, value="/html/body/div/div/div[3]/div[1]/div/div/form/div[3]/div/div[1]/button").click() - ''' - input("Press ENTER when CAPTCHA is completed\n") + input("Press ENTER when CAPTCHA and DDOS is completed\n") # wait for listing page show up (This Xpath may need to change based on different seed url) WebDriverWait(driver, 50).until(EC.visibility_of_element_located( - (By.XPATH, '/html/body/div/div/div[3]/div[2]'))) + (By.XPATH, '/html/body/div/div/div[3]/div[1]/div/div[3]'))) + + # Returns the name of the website -def getMarketName(): +def getMKTName(): name = 'Kingdom' return name @@ -236,30 +186,17 @@ def getInterestedLinks(): links = [] # Software and Malware - links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=127&t=c298a77d9e93ad32') + links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=127&t=597a56b9a0b3e0d0') # # Services - # links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=45&t=c298a77d9e93ad32') - # # Exploits - # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=45') - # # Tools - # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=46') - # # Malware - # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=47') - # # Cryptography - # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=48') - # # Others - # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=49') - # # Hacking Tutorials - # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=50') - # # Hacked Accounts and Database Dumps - # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=30') - # # Android Moded pak - # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53') + links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=45&t=597a56b9a0b3e0d0') + # # guides and tutorials + links.append('http://kingdomm7v6yed55o2rbspvs4exn5bzfxdizqaav27tw6gw4zc65vdad.onion/offers?filter_category=107&t=597a56b9a0b3e0d0') return links def crawlForum(driver): + print("Crawling the Kingdom market") linksToCrawl = getInterestedLinks() @@ -281,6 +218,7 @@ def crawlForum(driver): savePage(driver, html, link) list = productPages(html) + for item in list: itemURL = urlparse.urljoin(baseURL, str(item)) try: @@ -290,18 +228,15 @@ def crawlForum(driver): savePage(driver, driver.page_source, item) driver.back() - # comment out - break - - # comment out - if count == 1: - break + # # comment out + # break + # + # # comment out + # if count == 1: + # break try: - temp = driver.find_element(by=By.XPATH, value= - '/html/body/div/div/div[3]/div[2]/div[2]/div/div/ul') - next = temp.find_element_by_class_name("next") - link = link.find_element_by_tag_name('a').get_attribute('href') + link = driver.find_element(by=By.XPATH, value='/html/body/div/div/div[3]/div[2]/div[2]/div[3]/div/ul/li[13]/a').get_attribute('href') if link == "": raise NoSuchElementException count += 1 @@ -313,7 +248,7 @@ def crawlForum(driver): print(link, e) i += 1 - input("Crawling Kingdom Market done sucessfully. Press ENTER to continue\n") + print("Crawling the Kingdom market done.") # Returns 'True' if the link is Topic link @@ -325,7 +260,7 @@ def isDescriptionLink(url): # Returns True if the link is a listingPage link def isListingLink(url): - if 'category' in url: + if 'filter_category' in url: return True return False @@ -333,10 +268,8 @@ def isListingLink(url): # calling the parser to define the links def productPages(html): soup = BeautifulSoup(html, "html.parser") - #print(soup.find('div', id="container").find('div', id="content").find('table', {"class": "tborder clear"}).find('tbody').find('tr',{"class": "inline_row"}).find('strong').text) return kingdom_links_parser(soup) def crawler(): - startCrawling() - # print("Crawling and Parsing BestCardingWorld .... DONE!") + startCrawling() \ No newline at end of file diff --git a/MarketPlaces/Kingdom/parser.py b/MarketPlaces/Kingdom/parser.py index b1e05d5..abade27 100644 --- a/MarketPlaces/Kingdom/parser.py +++ b/MarketPlaces/Kingdom/parser.py @@ -1,4 +1,4 @@ -__author__ = 'DarkWeb' +__author__ = 'Helium' # Here, we are importing the auxiliary functions to clean or convert data from MarketPlaces.Utilities.utilities import * @@ -31,6 +31,8 @@ def kingdom_description_parser(soup): left = "-1" # 16 Product_QuantityLeft shipFrom = "-1" # 17 Product_ShippedFrom shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image # Finding Product Name @@ -95,7 +97,7 @@ def kingdom_description_parser(soup): # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, - BTC, USD, EURO, sold, left, shipFrom, shipTo) + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results @@ -126,7 +128,9 @@ def kingdom_listing_parser(soup): qLeft =[] # 17 Product_QuantityLeft shipFrom = [] # 18 Product_ShippedFrom shipTo = [] # 19 Product_ShippedTo - href = [] # 20 Product_Links + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links listing = soup.find('div', {"id": "p0"}).find('div').find_all('div', {"class": "row"}, recursive=False) @@ -153,12 +157,20 @@ def kingdom_listing_parser(soup): product = product.strip() name.append(product) + # Finding Product Image + product_image = a.find('img') + product_image = product_image.get('src') + product_image = product_image.split('base64,')[-1] + image.append(product_image) + # Finding the Vendor vendor_name = a.select_one('a[href^="/user"]').text vendor_name = vendor_name.replace(",", " ").replace('/', '') vendor_name = vendor_name.strip() vendor.append(vendor_name) + image_vendor.append("-1") + # Adding the url to the list of urls link = a.find('div', {"class": "col-md-7"}).select_one('a[href^="/offer/view?"]')['href'] link = cleanLink(link) @@ -169,7 +181,8 @@ def kingdom_listing_parser(soup): # Populate the final variable (this should be a list with all fields scraped) return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, - reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href) + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, + image, image_vendor) def kingdom_links_parser(soup):