|
@ -1,11 +1,11 @@ |
|
|
__author__ = 'DarkWeb' |
|
|
__author__ = 'DarkWeb' |
|
|
|
|
|
|
|
|
# Here, we are importing the auxiliary functions to clean or convert data |
|
|
# Here, we are importing the auxiliary functions to clean or convert data |
|
|
from typing import List |
|
|
|
|
|
|
|
|
from typing import List, Tuple |
|
|
from MarketPlaces.Utilities.utilities import * |
|
|
from MarketPlaces.Utilities.utilities import * |
|
|
|
|
|
|
|
|
# Here, we are importing BeautifulSoup to search through the HTML tree |
|
|
# Here, we are importing BeautifulSoup to search through the HTML tree |
|
|
from bs4 import BeautifulSoup, Tag |
|
|
|
|
|
|
|
|
from bs4 import BeautifulSoup, ResultSet, Tag |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def thiefWorld_description_parser(soup: BeautifulSoup) -> Tuple: |
|
|
def thiefWorld_description_parser(soup: BeautifulSoup) -> Tuple: |
|
@ -225,6 +225,85 @@ def darkfox_description_parser(soup): |
|
|
return row |
|
|
return row |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def thiefWorld_listing_parser(soup: BeautifulSoup): |
|
|
|
|
|
|
|
|
|
|
|
# Fields to be parsed |
|
|
|
|
|
nm = 0 # Total_Products (Should be Integer) |
|
|
|
|
|
mktName = "ThiefWorld" # 0 Marketplace_Name |
|
|
|
|
|
name = [] # 1 Product_Name |
|
|
|
|
|
CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures) |
|
|
|
|
|
MS = [] # 3 Product_MS_Classification (Microsoft Security) |
|
|
|
|
|
category = [] # 4 Product_Category |
|
|
|
|
|
describe = [] # 5 Product_Description |
|
|
|
|
|
escrow = [] # 6 Vendor_Warranty |
|
|
|
|
|
views = [] # 7 Product_Number_Of_Views |
|
|
|
|
|
reviews = [] # 8 Product_Number_Of_Reviews |
|
|
|
|
|
addDate = [] # 9 Product_AddDate |
|
|
|
|
|
lastSeen = [] # 10 Product_LastViewDate |
|
|
|
|
|
BTC = [] # 11 Product_BTC_SellingPrice |
|
|
|
|
|
USD = [] # 12 Product_USD_SellingPrice |
|
|
|
|
|
EURO = [] # 13 Product_EURO_SellingPrice |
|
|
|
|
|
sold = [] # 14 Product_QuantitySold |
|
|
|
|
|
qLeft =[] # 15 Product_QuantityLeft |
|
|
|
|
|
shipFrom = [] # 16 Product_ShippedFrom |
|
|
|
|
|
shipTo = [] # 17 Product_ShippedTo |
|
|
|
|
|
vendor = [] # 18 Vendor |
|
|
|
|
|
rating = [] # 19 Vendor_Rating |
|
|
|
|
|
success = [] # 20 Vendor_Successful_Transactions |
|
|
|
|
|
href = [] # 23 Product_Links (Urls) |
|
|
|
|
|
|
|
|
|
|
|
productList: ResultSet[Tag] = soup.find_all('div', {'class': 'catalog_item'}) |
|
|
|
|
|
|
|
|
|
|
|
nm = len(productList) |
|
|
|
|
|
|
|
|
|
|
|
for product in productList: |
|
|
|
|
|
|
|
|
|
|
|
productTitle: Tag = product.find('div', {'class': 'title'}).find('a') |
|
|
|
|
|
|
|
|
|
|
|
productName = cleanString(productTitle.text.strip()) |
|
|
|
|
|
name.append(productName) |
|
|
|
|
|
|
|
|
|
|
|
productHref = productTitle.get('href') |
|
|
|
|
|
href.append(productHref) |
|
|
|
|
|
|
|
|
|
|
|
CVE.append('-1') |
|
|
|
|
|
MS.append('-1') |
|
|
|
|
|
category.append('-1') |
|
|
|
|
|
|
|
|
|
|
|
productDescription = product.find('div', {'class': 'text'}).text |
|
|
|
|
|
productDescription = cleanString(productDescription.strip()) |
|
|
|
|
|
describe.append(productDescription) |
|
|
|
|
|
|
|
|
|
|
|
escrow.append('-1') |
|
|
|
|
|
views.append('-1') |
|
|
|
|
|
reviews.append('-1') |
|
|
|
|
|
addDate.append('-1') |
|
|
|
|
|
lastSeen.append('-1') |
|
|
|
|
|
BTC.append('-1') |
|
|
|
|
|
|
|
|
|
|
|
priceText = product.find('span', {'class': 'price'}).find('span').text |
|
|
|
|
|
priceText = priceText.split('USD')[0] |
|
|
|
|
|
priceText = cleanString(priceText.strip()) |
|
|
|
|
|
USD.append(priceText) |
|
|
|
|
|
|
|
|
|
|
|
EURO.append('-1') |
|
|
|
|
|
sold.append('-1') |
|
|
|
|
|
qLeft.append('-1') |
|
|
|
|
|
shipFrom.append('-1') |
|
|
|
|
|
shipTo.append('-1') |
|
|
|
|
|
|
|
|
|
|
|
productVendor = product.find('div', {'class': 'market over'}).find('a').text |
|
|
|
|
|
productVendor = cleanString(productVendor.strip()) |
|
|
|
|
|
vendor.append(productVendor) |
|
|
|
|
|
|
|
|
|
|
|
rating.append('-1') |
|
|
|
|
|
success.append('-1') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen, |
|
|
|
|
|
BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#parses listing pages, so takes html pages of listing pages using soup object, and parses it for info it needs |
|
|
#parses listing pages, so takes html pages of listing pages using soup object, and parses it for info it needs |
|
|
#stores info it needs in different lists, these lists are returned after being organized |
|
|
#stores info it needs in different lists, these lists are returned after being organized |
|
|
#@param: soup object looking at html page of listing page |
|
|
#@param: soup object looking at html page of listing page |
|
|