From 735f4cfd1982afb65285e0b39b4a2d23663f7931 Mon Sep 17 00:00:00 2001 From: andymunoz92 Date: Fri, 10 Nov 2023 18:37:18 -0800 Subject: [PATCH] Crawler & Parser for CityMarket. utilities.py was modified to work with CityMarket. --- MarketPlaces/CityMarket/crawler_selenium.py | 27 ++++----- MarketPlaces/CityMarket/parser.py | 63 +++++++-------------- MarketPlaces/Utilities/utilities.py | 3 +- 3 files changed, 33 insertions(+), 60 deletions(-) diff --git a/MarketPlaces/CityMarket/crawler_selenium.py b/MarketPlaces/CityMarket/crawler_selenium.py index 704b840..6f2a399 100644 --- a/MarketPlaces/CityMarket/crawler_selenium.py +++ b/MarketPlaces/CityMarket/crawler_selenium.py @@ -131,10 +131,10 @@ def login(driver): input("Press ENTER when CAPTCHA is complete and login page has loaded\n") # entering username and password into input boxes - usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]') + usernameBox = driver.find_element(by=By.XPATH, value='//input[@id="username"]') # Username here usernameBox.send_keys('findingmykeys') - passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="inputPassword3"]') + passwordBox = driver.find_element(by=By.XPATH, value='//input[@id="inputPassword3"]') # Password here passwordBox.send_keys('ican’tFindMycarKey$') @@ -185,16 +185,10 @@ def getNameFromURL(url): def getInterestedLinks(): links = [] - # # Hiring hacker - # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3') - # virus and malware - # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=15') + # # Hire hacker + links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3') # # ddos - # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16') - # # software - # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=17') - # # botnets - # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=18') + links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16') # # hacking service links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=31') @@ -227,6 +221,7 @@ def crawlForum(driver): list = productPages(html) for item in list: + # what is this line doing? itemURL = urlparse.urljoin(baseURL, str(item)) try: driver.get(itemURL) @@ -236,15 +231,17 @@ def crawlForum(driver): driver.back() # comment out - break + # break # comment out + """count += 1 if count == 1: - break + break""" try: - link = driver.find_element(by=By.XPATH, value= - '/html/body/div[1]/div/div[2]/nav/ul/li[5]/a').get_attribute('href') + #link = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div/div[2]/nav/ul/li[5]/a').get_attribute('href') + link = driver.find_element(by=By.XPATH, + value='//a[@rel="next"]').get_attribute('href') if link == "": raise NoSuchElementException count += 1 diff --git a/MarketPlaces/CityMarket/parser.py b/MarketPlaces/CityMarket/parser.py index 75ca4fa..8c9bf37 100644 --- a/MarketPlaces/CityMarket/parser.py +++ b/MarketPlaces/CityMarket/parser.py @@ -38,42 +38,20 @@ def city_description_parser(soup): vendor_image = "-1" # 20 Vendor_Image divmd7 = soup.find('div', {'class': "col-md-7"}) - ptag = soup.findAll('p') - - # Finding Product Name - # NA # Finding Vendor vendor = divmd7.find('a').text.strip() - # Finding Vendor Rating - # NA - - # Finding Successful Transactions - success = soup.find('span', {'class': "badge-primary"}) - # Finding Prices USD = soup.find('span', {'class': "total"}).text.strip() - BTC = soup.find('div', {'class': "text-center"}).text.strip() - - # Finding Escrow - escrow = ptag[-1].text.strip() - - # Finding the Product Category - category = ptag[-2].text.strip() - - # Finding the Product Quantity Available - # NA - - # Finding Number Sold - # NA + tempBTC = soup.find('div', {'class': "text-center"}).text.strip() + BTC = tempBTC.replace("BTC", "").strip() - # Finding Shipment Information (Origin) - # NA - - # Finding Shipment Information (Destination) - # NA + # Finding Product Image + image = soup.find('img', {'class': 'img-fluid'}) + image = image.get('src') + image = image.split('base64,')[-1] # Finding the Product description describe = soup.find('div', {'class': "text-white"}).text @@ -150,7 +128,7 @@ def city_listing_parser(soup): image_vendor = [] # 21 Vendor_Image href = [] # 22 Product_Links - listing = soup.findAll('div', {"class": "card"}) + listing = soup.findAll('div', {"class": "p-4"}) # Populating the Number of Products nm = len(listing) @@ -163,7 +141,12 @@ def city_listing_parser(soup): link = cleanLink(link) href.append(link) - # Finding the Product + # Category + tempCategory = soup.find('select', {"name": "category"}) + tempCategory = tempCategory.find('option', selected=True).text.strip() + category.append(tempCategory) + + # Product Name product = a.find('h4', {"class": "text-center"}).text product = product.replace('\n', ' ') product = product.replace(",", "") @@ -171,9 +154,7 @@ def city_listing_parser(soup): product = product.strip() name.append(product) - bae = a.find('div', {'class': "media-content"}).find('div').find_all('div') - - # Finding Prices + # USD and BTC Price price = a.find('div', {"class": "price"}).text tempUSD = price.split("~")[0] tempUSD = tempUSD.replace("$", "") @@ -185,17 +166,11 @@ def city_listing_parser(soup): tempBTC = tempBTC.strip() BTC.append(tempBTC) - # Finding the Vendor - # NA - - # Finding the Category - # NA - - # Finding Number Sold and Quantity Left - # NA - - # Finding Successful Transactions - # NA + # Img + product_image = a.find('img') + product_image = product_image.get('src') + product_image = product_image.split('base64,')[-1] + image.append(product_image) # Searching for CVE and MS categories cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}')) diff --git a/MarketPlaces/Utilities/utilities.py b/MarketPlaces/Utilities/utilities.py index 915f284..7def2f5 100644 --- a/MarketPlaces/Utilities/utilities.py +++ b/MarketPlaces/Utilities/utilities.py @@ -252,7 +252,8 @@ def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nom lne = marketplace # 0 lne += "," - lne += vendor[n] # 1 + # Added for CityMarket + lne += "=1" if len(vendor) == 0 else vendor[n] # 1 lne += "," lne += "-1" if len(rating_vendor) == 0 else rating_vendor[n] # 2 lne += ","