diff --git a/MarketPlaces/MetaVerseMarket/parser.py b/MarketPlaces/MetaVerseMarket/parser.py index 8c83293..047db35 100644 --- a/MarketPlaces/MetaVerseMarket/parser.py +++ b/MarketPlaces/MetaVerseMarket/parser.py @@ -11,7 +11,7 @@ from bs4 import BeautifulSoup # stores info it needs in different lists, these lists are returned after being organized # @param: soup object looking at html page of description page # return: 'row' that contains a variety of lists that each hold info on the description page -def darkfox_description_parser(soup): +def metaversemarket_description_parser(soup): # Fields to be parsed name = "-1" # 0 Product_Name @@ -36,67 +36,58 @@ def darkfox_description_parser(soup): EURO = "-1" # 22 Product_EURO_SellingPrice # Finding Product Name - name = soup.find('h1').text - name = name.replace('\n', ' ') - name = name.replace(",", "") - name = name.strip() + name = soup.find('div', {'class': "panel-heading"}).text.strip # Finding Vendor - vendor = soup.find('h3').find('a').text.strip() + temp = soup.findAll('div', {'class': "col-xs-12 col-sm-6 mt-5"}) + temp = temp[1].findAll('span') + temp = temp[1].find('b').text + name = temp.replace("@", "") - # Finding Vendor Rating - rating = soup.find('span', {'class': "tag is-dark"}).text.strip() + # Finding Product Reviews + review = soup.find('span', {'class': "badge bg-success fs-12px"}).text.strip() # Finding Successful Transactions - success = soup.find('h3').text - success = success.replace("Vendor: ", "") - success = success.replace(vendor, "") - success = success.replace("(", "") - success = success.replace(")", "") - success = success.strip() + # NA - bae = soup.find('div', {'class': "box"}).find_all('ul') # Finding Prices - USD = bae[1].find('strong').text.strip() - - li = bae[2].find_all('li') + USD = soup.find('h3', {'class': "mb-2"}).text() + USD = USD.replace("Price: $", "").strip() # Finding Escrow - escrow = li[0].find('span', {'class': "tag is-dark"}).text.strip() + escrow = soup.find('div', {'class': "alert alert-info text-center fw-bold"}).text + escrow = escrow.replace('You are protected by ', "").strip() # Finding the Product Category - category = li[1].find('span', {'class': "tag is-dark"}).text.strip() + temp = soup.select('div[class="mt-2"]') + temp = temp[0].findAll('span') + category = temp[1].text.strip() # Finding the Product Quantity Available - left = li[3].find('span', {'class': "tag is-dark"}).text.strip() + # temp = soup.find('em', {'class': "icon ni ni-layers-fill"}).parent.parent.parent + # left = temp.text + # left = left.replace("Supply:", "") + # left = left.strip() + temp = soup.findAll('span', {'class': "badge bg-success"}) + temp = temp[1].text.split("/") + left = temp[1].strip() # Finding Number Sold - sold = li[4].find('span', {'class': "tag is-dark"}).text.strip() + sold = temp[0].strip() - li = bae[3].find_all('li') # Finding Shipment Information (Origin) - if "Ships from:" in li[-2].text: - shipFrom = li[-2].text - shipFrom = shipFrom.replace("Ships from: ", "") - # shipFrom = shipFrom.replace(",", "") - shipFrom = shipFrom.strip() + temp = soup.findAll('div', {'class': "alert alert-info"}) + temp = temp[1].text.split("to") + shipFrom = temp[0].replace("Shipping from ", "").strip() # Finding Shipment Information (Destination) - shipTo = li[-1].find('div', {'title': "List of countries is scrollable"}).text - shipTo = shipTo.replace("Ships to: ", "") - shipTo = shipTo.strip() - if "certain countries" in shipTo: - countries = "" - tags = li[-1].find_all('span', {'class': "tag"}) - for tag in tags: - country = tag.text.strip() - countries += country + ", " - shipTo = countries.strip(", ") + shipTo = temp[1].split("for") + shipTo = shipTo[0].strip() # Finding the Product description - describe = soup.find('div', {'class': "pre-line"}).text + describe = soup.find('p', {'class': "card-text"}).text describe = describe.replace("\n", " ") describe = describe.strip() @@ -143,7 +134,7 @@ def darkfox_description_parser(soup): # stores info it needs in different lists, these lists are returned after being organized # @param: soup object looking at html page of listing page # return: 'row' that contains a variety of lists that each hold info on the listing page -def darkfox_listing_parser(soup): +def metaversemarket_listing_parser(soup): # Fields to be parsed nm = 0 # Total_Products (Should be Integer) mktName = "DarkFox" # 0 Marketplace_Name @@ -169,7 +160,7 @@ def darkfox_listing_parser(soup): success = [] # 20 Vendor_Successful_Transactions href = [] # 23 Product_Links (Urls) - listing = soup.findAll('div', {"class": "card"}) + listing = soup.findAll('div', {"class": "col-12 col-sm-4 col-xl-3 product_item_col p-1"}) # Populating the Number of Products nm = len(listing) @@ -183,58 +174,77 @@ def darkfox_listing_parser(soup): href.append(link) # Finding the Product - product = bae[1].find('p').text + product = bae[1].find('span', {"class": "text-primary"}).text product = product.replace('\n', ' ') product = product.replace(",", "") product = product.replace("...", "") product = product.strip() name.append(product) - bae = a.find('div', {'class': "media-content"}).find('div').find_all('div') - - if len(bae) >= 5: - # Finding Prices - price = bae[0].text - ud = price.replace(" USD", " ") - # u = ud.replace("$","") - u = ud.replace(",", "") - u = u.strip() - USD.append(u) - # bc = (prc[1]).strip(' BTC') - # BTC.append(bc) - - # Finding the Vendor - vendor_name = bae[1].find('a').text - vendor_name = vendor_name.replace(",", "") - vendor_name = vendor_name.strip() - vendor.append(vendor_name) - - # Finding the Category - cat = bae[2].find('small').text - cat = cat.replace("Category: ", "") - cat = cat.replace(",", "") - cat = cat.strip() - category.append(cat) - - # Finding Number Sold and Quantity Left - num = bae[3].text - num = num.replace("Sold: ", "") - num = num.strip() - sold.append(num) - - quant = bae[4].find('small').text - quant = quant.replace("In stock: ", "") - quant = quant.strip() - qLeft.append(quant) - - # Finding Successful Transactions - freq = bae[1].text - freq = freq.replace(vendor_name, "") - freq = re.sub(r'Vendor Level \d+', "", freq) - freq = freq.replace("(", "") - freq = freq.replace(")", "") - freq = freq.strip() - success.append(freq) + # Finding Prices + price = a.find('strong').text + price = price.replace("Buy for $", "") + price = price.strip() + USD.append(price) + + + # Finding the Vendor + temp = a.find('div', {'class': "mt-1 fs-12px"}) + temp = temp.findAll('span') + temp = temp[1].find('b').text + vendor_name = temp.replace("@", "").strip() + vendor.append(vendor_name) + + # Finding the Category + cat = a.select_one('div[class="fs-12px"]') + cat = cat.findAll('span')[1].text + cat = cat.text + cat = cat.strip() + category.append(cat) + + badge = a.findAll('span', {'class': "badge bg-success"}) + # Finding Number Sold and Quantity Left + temp = badge[1].text + temp = temp.split("/") + num = temp[0] + num = num.strip() + sold.append(num) + + quant = temp[1] + quant = quant.strip() + qLeft.append(quant) + + # Finding Successful Transactions + # NA + + # Finding Product review + review = a.find('span', {'class': "badge bg-success fs-12px"}).text + review = review.replace("+ ", "") + reviews.append(review) + + # Finding Descrption + description = a.find('p', {'class': "alert alert-light text-ssbold p-1"}).text + description = description.replace("\n", " ") + description = description.strip() + describe.append(description) + + # Finding Escrow + es = a.find('span', {'class': "fw-bold"}).text.strip() + escrow.append(es) + + # Finding Number of Views + view = a.find('span', {'class': "badge bg-primary"}).text.strip() + views.append(view) + + # Find where ships from + ships = a.find('div', {'class': "alert alert-info item_alert fs-12px p-1"}) + ships = ships.findAll('b') + sFrom = ships[0].text.strips() + shipFrom.append(sFrom) + + # Find where it ships to + sTo = ships[1].text.strips() + shipTo.append(sTo) # Searching for CVE and MS categories cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}')) @@ -275,7 +285,7 @@ def metaversemarket_links_parser(soup): # Returning all links that should be visited by the Crawler href = [] - listing = soup.findAll('div', {"class": "col-12 p-0"}) + listing = soup.findAll('div', {"class": "col-12 col-sm-4 col-xl-3 product_item_col p-1"}) for a in listing: bae = a.find('a', href=True)