diff --git a/MarketPlaces/CityMarket/parser.py b/MarketPlaces/CityMarket/parser.py index dbab917..e5f3575 100644 --- a/MarketPlaces/CityMarket/parser.py +++ b/MarketPlaces/CityMarket/parser.py @@ -11,7 +11,7 @@ from bs4 import BeautifulSoup #stores info it needs in different lists, these lists are returned after being organized #@param: soup object looking at html page of description page #return: 'row' that contains a variety of lists that each hold info on the description page -def darkfox_description_parser(soup): +def city_description_parser(soup): # Fields to be parsed @@ -39,68 +39,46 @@ def darkfox_description_parser(soup): success = "-1" # 21 Vendor_Successful_Transactions EURO = "-1" # 22 Product_EURO_SellingPrice + divmd7 = soup.find('div', {'class': "col-md-7"}) + ptag = soup.findAll('p') + # Finding Product Name - name = soup.find('h1').text - name = name.replace('\n', ' ') - name = name.replace(",", "") - name = name.strip() + # NA # Finding Vendor - vendor = soup.find('h3').find('a').text.strip() + vendor = divmd7.find('a').text.strip() # Finding Vendor Rating - rating = soup.find('span', {'class': "tag is-dark"}).text.strip() + # NA # Finding Successful Transactions - success = soup.find('h3').text - success = success.replace("Vendor: ", "") - success = success.replace(vendor, "") - success = success.replace("(", "") - success = success.replace(")", "") - success = success.strip() - - bae = soup.find('div', {'class': "box"}).find_all('ul') + success = soup.find('span', {'class': "badge-primary"}) # Finding Prices - USD = bae[1].find('strong').text.strip() + USD = soup.find('span', {'class': "total"}).text.strip() - li = bae[2].find_all('li') + BTC = soup.find('div', {'class': "text-center"}).text.strip() # Finding Escrow - escrow = li[0].find('span', {'class': "tag is-dark"}).text.strip() + escrow = ptag[-1].text.strip() # Finding the Product Category - category = li[1].find('span', {'class': "tag is-dark"}).text.strip() + category = ptag[-2].text.strip() # Finding the Product Quantity Available - left = li[3].find('span', {'class': "tag is-dark"}).text.strip() + # NA # Finding Number Sold - sold = li[4].find('span', {'class': "tag is-dark"}).text.strip() - - li = bae[3].find_all('li') + # NA # Finding Shipment Information (Origin) - if "Ships from:" in li[-2].text: - shipFrom = li[-2].text - shipFrom = shipFrom.replace("Ships from: ", "") - # shipFrom = shipFrom.replace(",", "") - shipFrom = shipFrom.strip() + # NA # Finding Shipment Information (Destination) - shipTo = li[-1].find('div', {'title': "List of countries is scrollable"}).text - shipTo = shipTo.replace("Ships to: ", "") - shipTo = shipTo.strip() - if "certain countries" in shipTo: - countries = "" - tags = li[-1].find_all('span', {'class': "tag"}) - for tag in tags: - country = tag.text.strip() - countries += country + ", " - shipTo = countries.strip(", ") + # NA # Finding the Product description - describe = soup.find('div', {'class': "pre-line"}).text + describe = soup.find('div', {'class': "text-white"}).text describe = describe.replace("\n", " ") describe = describe.strip() @@ -147,11 +125,11 @@ def darkfox_description_parser(soup): #stores info it needs in different lists, these lists are returned after being organized #@param: soup object looking at html page of listing page #return: 'row' that contains a variety of lists that each hold info on the listing page -def darkfox_listing_parser(soup): +def city_listing_parser(soup): # Fields to be parsed nm = 0 # Total_Products (Should be Integer) - mktName = "DarkFox" # 0 Marketplace_Name + mktName = "CityMarket" # 0 Marketplace_Name name = [] # 1 Product_Name CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures) MS = [] # 3 Product_MS_Classification (Microsoft Security) @@ -188,7 +166,7 @@ def darkfox_listing_parser(soup): href.append(link) # Finding the Product - product = bae[1].find('p').text + product = a.find('h4', {"class": "text-center"}).text product = product.replace('\n', ' ') product = product.replace(",", "") product = product.replace("...", "") @@ -197,49 +175,29 @@ def darkfox_listing_parser(soup): bae = a.find('div', {'class': "media-content"}).find('div').find_all('div') - if len(bae) >= 5: - # Finding Prices - price = bae[0].text - ud = price.replace(" USD", " ") - # u = ud.replace("$","") - u = ud.replace(",", "") - u = u.strip() - USD.append(u) - # bc = (prc[1]).strip(' BTC') - # BTC.append(bc) - - # Finding the Vendor - vendor_name = bae[1].find('a').text - vendor_name = vendor_name.replace(",", "") - vendor_name = vendor_name.strip() - vendor.append(vendor_name) - - # Finding the Category - cat = bae[2].find('small').text - cat = cat.replace("Category: ", "") - cat = cat.replace(",", "") - cat = cat.strip() - category.append(cat) - - # Finding Number Sold and Quantity Left - num = bae[3].text - num = num.replace("Sold: ", "") - num = num.strip() - sold.append(num) - - quant = bae[4].find('small').text - quant = quant.replace("In stock: ", "") - quant = quant.strip() - qLeft.append(quant) - - # Finding Successful Transactions - freq = bae[1].text - freq = freq.replace(vendor_name, "") - freq = re.sub(r'Vendor Level \d+', "", freq) - freq = freq.replace("(", "") - freq = freq.replace(")", "") - freq = freq.strip() - success.append(freq) + # Finding Prices + price = a.find('div', {"class": "price"}).text + tempUSD = price.split("~")[0] + tempUSD = tempUSD.replace("$", "") + tempUSD = tempUSD.strip() + USD.append(tempUSD) + + tempBTC = price.split("~")[1] + tempBTC = tempBTC.replace("BTC", "") + tempBTC = tempBTC.strip() + BTC.append(tempBTC) + + # Finding the Vendor + # NA + + # Finding the Category + # NA + + # Finding Number Sold and Quantity Left + # NA + + # Finding Successful Transactions + # NA # Searching for CVE and MS categories cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}')) @@ -281,7 +239,7 @@ def city_links_parser(soup): # Returning all links that should be visited by the Crawler href = [] - listing = soup.findAll('div', {"class": "col-12 p-0"}) + listing = soup.findAll('div', {"class": "p-4"}) for a in listing: bae = a.find('a', href=True)