From 19e8513dc4ec6cdb98736089e508b8cefe9b6f58 Mon Sep 17 00:00:00 2001 From: ericssonmarin-cpp <85146518+ericssonmarin-cpp@users.noreply.github.com> Date: Thu, 31 Aug 2023 12:46:16 -0700 Subject: [PATCH] Images tracking. --- MarketPlaces/DB_Connection/db_connection.py | 2 +- MarketPlaces/DarkBazar/parser.py | 66 +++++++----- MarketPlaces/DarkMatter/parser.py | 11 +- MarketPlaces/HiddenMarket/parser.py | 9 +- MarketPlaces/LionMarketplace/parser.py | 106 ++++++++++++-------- 5 files changed, 114 insertions(+), 80 deletions(-) diff --git a/MarketPlaces/DB_Connection/db_connection.py b/MarketPlaces/DB_Connection/db_connection.py index 664f6e8..eb4d996 100644 --- a/MarketPlaces/DB_Connection/db_connection.py +++ b/MarketPlaces/DB_Connection/db_connection.py @@ -266,7 +266,7 @@ def create_items(cur, row, marketId, vendorId): recset = cur.fetchall() - # decode_decrypt_image_in_base64(recset[0][20]) + #decode_decrypt_image_in_base64(recset[0][20]) if (str(recset[0][4]) != str(row[5] if row[5] != '-1' else None) or str(recset[0][5]) != str(row[6] if row[6] != '-1' else None) or str(recset[0][6]) != str(row[7] if row[7] != '-1' else None) or str(recset[0][7]) != str(row[8] if row[8] != '-1' else None) or diff --git a/MarketPlaces/DarkBazar/parser.py b/MarketPlaces/DarkBazar/parser.py index 9b2d823..9386d18 100644 --- a/MarketPlaces/DarkBazar/parser.py +++ b/MarketPlaces/DarkBazar/parser.py @@ -33,6 +33,8 @@ def darkbazar_description_parser(soup): left = "-1" # 16 Product_QuantityLeft shipFrom = "-1" # 17 Product_ShippedFrom shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image # Finding Product Name divmb = soup.findAll('div', {'class': "mb-1"}) @@ -94,6 +96,11 @@ def darkbazar_description_parser(soup): cardbody = soup.findAll('div', {'class': "card-body"}) describe = cardbody[1].text.strip() + # Finding Product Image + image = soup.find('div', {'class': 'product-primary'}).find('img') + image = image.get('src') + image = image.split('base64,')[-1] + # Searching for CVE and MS categories cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}')) if cve: @@ -114,7 +121,7 @@ def darkbazar_description_parser(soup): # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, - BTC, USD, EURO, sold, left, shipFrom, shipTo) + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -128,27 +135,29 @@ def darkbazar_listing_parser(soup): # Fields to be parsed nm = 0 # *Total_Products (Should be Integer) - mktName = "DarkBazar" # 0 *Marketplace_Name - vendor = [] # 1 *Vendor y - rating_vendor = [] # 2 Vendor_Rating - success = [] # 3 Vendor_Successful_Transactions - name = [] # 4 *Product_Name y - CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this - MS = [] # 6 Product_MS_Classification (Microsoft Security) dont worry about this - category = [] # 7 Product_Category y - describe = [] # 8 Product_Description - views = [] # 9 Product_Number_Of_Views - reviews = [] # 10 Product_Number_Of_Reviews - rating_item = [] # 11 Product_Rating - addDate = [] # 12 Product_AddDate - BTC = [] # 13 Product_BTC_SellingPrice - USD = [] # 14 Product_USD_SellingPrice y - EURO = [] # 15 Product_EURO_SellingPrice - sold = [] # 16 Product_QuantitySold - qLeft = [] # 17 Product_QuantityLeft - shipFrom = [] # 18 Product_ShippedFrom - shipTo = [] # 19 Product_ShippedTo - href = [] # 20 Product_Links + mktName = "DarkBazar" # 0 *Marketplace_Name + vendor = [] # 1 *Vendor y + rating_vendor = [] # 2 Vendor_Rating + success = [] # 3 Vendor_Successful_Transactions + name = [] # 4 *Product_Name y + CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this + MS = [] # 6 Product_MS_Classification (Microsoft Security) dont worry about this + category = [] # 7 Product_Category y + describe = [] # 8 Product_Description + views = [] # 9 Product_Number_Of_Views + reviews = [] # 10 Product_Number_Of_Reviews + rating_item = [] # 11 Product_Rating + addDate = [] # 12 Product_AddDate + BTC = [] # 13 Product_BTC_SellingPrice + USD = [] # 14 Product_USD_SellingPrice y + EURO = [] # 15 Product_EURO_SellingPrice + sold = [] # 16 Product_QuantitySold + qLeft = [] # 17 Product_QuantityLeft + shipFrom = [] # 18 Product_ShippedFrom + shipTo = [] # 19 Product_ShippedTo + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links listing = soup.findAll('div', {"id": "itembox"}) @@ -172,6 +181,12 @@ def darkbazar_listing_parser(soup): product = product.strip() name.append(product) + # Finding Product Image + product_image = a.find('img') + product_image = product_image.get('src') + product_image = product_image.split('base64,')[-1] + image.append(product_image) + # Finding Prices price = lb[-1].find('div', {"class": "mb-1"}).text price = price.replace("$","") @@ -184,6 +199,8 @@ def darkbazar_listing_parser(soup): vendor_name = vendor_name.strip() vendor.append(vendor_name) + image_vendor.append("-1") + # Finding the Category cat = lb[-1].find("span").text cat = cat.replace("class:", "") @@ -211,6 +228,9 @@ def darkbazar_listing_parser(soup): qLeft.append(quant) # add shipping information + ship = lb[2].findAll('small')[1].findAll('span')[1].text.split("->") + shipFrom.append(ship[0].replace("Ship from ", "").strip()) + shipTo.append(ship[1].replace("to ", "").strip()) # Searching for CVE and MS categories @@ -242,7 +262,7 @@ def darkbazar_listing_parser(soup): # Populate the final variable (this should be a list with all fields scraped) return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, - reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href) + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) # called by the crawler to get description links on a listing page diff --git a/MarketPlaces/DarkMatter/parser.py b/MarketPlaces/DarkMatter/parser.py index 692db80..9ff203f 100644 --- a/MarketPlaces/DarkMatter/parser.py +++ b/MarketPlaces/DarkMatter/parser.py @@ -58,10 +58,6 @@ def darkmatter_description_parser(soup): temp = temp.replace('\n', ' ') describe = cleanString(temp.strip()) - # Finding Product Image - #image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img') - #image = image.get('src') - #product category try: temp = soup.find('table', {'class', 'vtable'}) @@ -101,6 +97,7 @@ def darkmatter_description_parser(soup): temp2 = temp[6].find('td').text sold = cleanString(temp2.strip()) + # Finding Product Image image = soup.find('td', {"class": "vtop"}).find('img').get('src') image = image.split('base64,')[-1] @@ -167,11 +164,6 @@ def darkmatter_listing_parser(soup): result = temp[index + len("pks x "):] name.append(cleanString(result)) - # Finding Product Image - #product_image = a.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'}) - #product_image = product_image.get('src') - #image.append(product_image) - CVE.append("-1") MS.append("-1") @@ -231,6 +223,7 @@ def darkmatter_listing_parser(soup): temp = a.find('a').get('href') href.append(temp) + # Finding Product Image image = images[count*2].find('img').get('src') image = image.split('base64,')[-1] diff --git a/MarketPlaces/HiddenMarket/parser.py b/MarketPlaces/HiddenMarket/parser.py index b1783e0..106cc6d 100644 --- a/MarketPlaces/HiddenMarket/parser.py +++ b/MarketPlaces/HiddenMarket/parser.py @@ -161,12 +161,13 @@ def hiddenmarket_listing_parser(soup): nm = len(listing) # Finding Category - # cat = soup.find("div", {'class': "heading"}).text - # cat = cat.replace(",", "") - # cat = cat.strip() + cat = soup.find("div", {'class': "heading"}).text + cat = cat.replace(",", "") + cat = cat.strip() for card in listing: - # category.append(cat) + + category.append(cat) # Adding the url to the list of urls diff --git a/MarketPlaces/LionMarketplace/parser.py b/MarketPlaces/LionMarketplace/parser.py index 06a87e3..81a911c 100644 --- a/MarketPlaces/LionMarketplace/parser.py +++ b/MarketPlaces/LionMarketplace/parser.py @@ -12,26 +12,30 @@ from bs4 import BeautifulSoup #@param: soup object looking at html page of description page #return: 'row' that contains a variety of lists that each hold info on the description page def lionmarketplace_description_parser(soup): + # Fields to be parsed - vendor = "-1" # 0 *Vendor_Name - success = "-1" # 1 Vendor_Successful_Transactions - rating_vendor = "-1" # 2 Vendor_Rating - name = "-1" # 3 *Product_Name - describe = "-1" # 4 Product_Description - CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much - MS = "-1" # 6 Product_MS_Classification (Microsoft Security) dont worry about that much - category = "-1" # 7 Product_Category - views = "-1" # 8 Product_Number_Of_Views - reviews = "-1" # 9 Product_Number_Of_Reviews - rating_item = "-1" # 10 Product_Rating - addDate = "-1" # 11 Product_AddedDate - BTC = "-1" # 12 Product_BTC_SellingPrice - USD = "-1" # 13 Product_USD_SellingPrice - EURO = "-1" # 14 Product_EURO_SellingPrice - sold = "-1" # 15 Product_QuantitySold - left = "-1" # 16 Product_QuantityLeft - shipFrom = "-1" # 17 Product_ShippedFrom - shipTo = "-1" # 18 Product_ShippedTo + + vendor = "-1" # 0 *Vendor_Name + success = "-1" # 1 Vendor_Successful_Transactions + rating_vendor = "-1" # 2 Vendor_Rating + name = "-1" # 3 *Product_Name + describe = "-1" # 4 Product_Description + CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) + MS = "-1" # 6 Product_MS_Classification (Microsoft Security) + category = "-1" # 7 Product_Category + views = "-1" # 8 Product_Number_Of_Views + reviews = "-1" # 9 Product_Number_Of_Reviews + rating_item = "-1" # 10 Product_Rating + addDate = "-1" # 11 Product_AddedDate + BTC = "-1" # 12 Product_BTC_SellingPrice + USD = "-1" # 13 Product_USD_SellingPrice + EURO = "-1" # 14 Product_EURO_SellingPrice + sold = "-1" # 15 Product_QuantitySold + left = "-1" # 16 Product_QuantityLeft + shipFrom = "-1" # 17 Product_ShippedFrom + shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image # vendor name temp = soup.find('div', {'class': 'btn-group'}).find('a').text @@ -55,6 +59,11 @@ def lionmarketplace_description_parser(soup): temp = soup.find('div', {'class': "mt-4"}).find(text=True, recursive=False) describe = cleanString(temp.strip()) + # Finding Product Image + image = soup.find('div', {'id': 'slide-1'}).find('img') + image = image.get('src') + image = image.split('base64,')[-1] + CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much MS = "-1" # 6 Product_MS_Classification (Microsoft Security) dont worry about that much @@ -111,7 +120,7 @@ def lionmarketplace_description_parser(soup): # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, - BTC, USD, EURO, sold, left, shipFrom, shipTo) + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -122,29 +131,32 @@ def lionmarketplace_description_parser(soup): #@param: soup object looking at html page of listing page #return: 'row' that contains a variety of lists that each hold info on the listing page def lionmarketplace_listing_parser(soup): + # Fields to be parsed - nm = 0 # *Total_Products (Should be Integer) - mktName = "LionMarketplace" # 0 *Marketplace_Name - vendor = [] # 1 *Vendor y - rating_vendor = [] # 2 Vendor_Rating - success = [] # 3 Vendor_Successful_Transactions - name = [] # 4 *Product_Name y - CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this - MS = [] # 6 Product_MS_Classification (Microsoft Security) dont worry about this - category = [] # 7 Product_Category y - describe = [] # 8 Product_Description - views = [] # 9 Product_Number_Of_Views - reviews = [] # 10 Product_Number_Of_Reviews - rating_item = [] # 11 Product_Rating - addDate = [] # 12 Product_AddDate - BTC = [] # 13 Product_BTC_SellingPrice - USD = [] # 14 Product_USD_SellingPrice y - EURO = [] # 15 Product_EURO_SellingPrice - sold = [] # 16 Product_QuantitySold - qLeft = [] # 17 Product_QuantityLeft - shipFrom = [] # 18 Product_ShippedFrom - shipTo = [] # 19 Product_ShippedTo - href = [] # 20 Product_Links + nm = 0 # *Total_Products (Should be Integer) + mktName = "LionMarketplace" # 0 *Marketplace_Name + vendor = [] # 1 *Vendor y + rating_vendor = [] # 2 Vendor_Rating + success = [] # 3 Vendor_Successful_Transactions + name = [] # 4 *Product_Name y + CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) + MS = [] # 6 Product_MS_Classification (Microsoft Security) + category = [] # 7 Product_Category y + describe = [] # 8 Product_Description + views = [] # 9 Product_Number_Of_Views + reviews = [] # 10 Product_Number_Of_Reviews + rating_item = [] # 11 Product_Rating + addDate = [] # 12 Product_AddDate + BTC = [] # 13 Product_BTC_SellingPrice + USD = [] # 14 Product_USD_SellingPrice y + EURO = [] # 15 Product_EURO_SellingPrice + sold = [] # 16 Product_QuantitySold + qLeft =[] # 17 Product_QuantityLeft + shipFrom = [] # 18 Product_ShippedFrom + shipTo = [] # 19 Product_ShippedTo + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links listings = soup.findAll('div', {"class": "col-md-4 my-md-0 my-2 col-12"}) @@ -161,6 +173,8 @@ def lionmarketplace_listing_parser(soup): temp = temp.replace("Vendor:", "") vendor.append(cleanString(temp.strip())) + image_vendor.append("-1") + # vendor rating rating_vendor.append("-1") @@ -171,6 +185,12 @@ def lionmarketplace_listing_parser(soup): temp = a.find('a').text name.append(cleanString(temp.strip())) + # Finding Product Image + product_image = listing.find('img', {'class': 'card-img-top rounded'}) + product_image = product_image.get('src') + product_image = product_image.split('base64,')[-1] + image.append(product_image) + CVE.append('-1') MS.append('-1') @@ -211,7 +231,7 @@ def lionmarketplace_listing_parser(soup): # Populate the final variable (this should be a list with all fields scraped) return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, - reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href) + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) #called by the crawler to get description links on a listing page