Browse Source

finished listing parser

main
Joshua 1 year ago
parent
commit
1b2047fe61
1 changed files with 69 additions and 63 deletions
  1. +69
    -63
      MarketPlaces/Ares/parser.py

+ 69
- 63
MarketPlaces/Ares/parser.py View File

@ -37,67 +37,72 @@ def ares_description_parser(soup):
vendor_image = "-1" # 20 Vendor_Image vendor_image = "-1" # 20 Vendor_Image
# Finding Product Name # Finding Product Name
divmb = soup.findAll('div', {'class': "mb-1"})
divmb = soup.find('div', {'class': "col-md-12 my-2"})
name = divmb[0].text
name = divmb.find('span', {'class': "btn btn-sm btn-outline-dark w-100 active rounded-0"}).text
name = name.replace('\n', ' ') name = name.replace('\n', ' ')
name = name.replace(",", "") name = name.replace(",", "")
name = name.strip() name = name.strip()
box = soup.find('div', {'class': "col-md-7"}).find('span', {'class': "text-left text-white"})
box = box.findAll('span', {'class': "btn btn-mgray btn-sm w-100 active border-danger"})
# Finding Vendor # Finding Vendor
vendor = divmb[1].find('a').text.strip()
vendor = box[0].text.strip()
# Finding Vendor Rating # Finding Vendor Rating
temp = soup.find('div', {'class': ""}).text
temp = temp.split('(')
rating = temp[0].replace("Vendor's Review : ", "")
rating = rating.replace("%", "")
rating_vendor = rating.strip()
temp = box[2]
stars = len(temp.findAll('i', {"class": "fas fa-star"}))
half_stars = len(temp.findAll('i', {'class': "fas fa-star-half-alt"}))
rating_vendor = str((stars - half_stars)/5)
# Finding the Product Rating and Number of Product Reviews # Finding the Product Rating and Number of Product Reviews
reviews = temp[2].replace(" review)", "")
reviews = reviews.strip()
# reviews = temp[2].replace(" review)", "")
# reviews = reviews.strip()
#
# temp = temp[1].split(")")
# rating = temp[1].replace("Product Review : ", "")
# rating = rating.replace("%", "")
# rating_item = rating.strip()
temp = temp[1].split(")")
rating = temp[1].replace("Product Review : ", "")
rating = rating.replace("%", "")
rating_item = rating.strip()
box2 = soup.find('div', {"class": "col-md-4 text-center"}).find('span', {"class": "text-left"}).findAll('span')
# Finding Prices # Finding Prices
USD = soup.find('div', {'class': "h3 text-primary"}).text.strip()
USD = box2[0].text.strip()
# Finding the Product Category # Finding the Product Category
pmb = soup.findAll('p', {'class': "mb-1"})
# pmb = soup.findAll('p', {'class': "mb-1"})
category = pmb[-1].text
category = category.replace("Category: ", "").strip()
# category = pmb[-1].text
# category = category.replace("Category: ", "").strip()
# Finding the Product Quantity Available # Finding the Product Quantity Available
left = divmb[-1].text
left = left.split(",", 1)[1]
left = left.replace("in stock", "")
left = left.strip()
# left = divmb[-1].text
# left = left.split(",", 1)[1]
# left = left.replace("in stock", "")
# left = left.strip()
# Finding Number Sold # Finding Number Sold
sold = divmb[-1].text
sold = sold.split(",", 1)[0]
sold = sold.replace("sold", "")
sold = sold.strip()
# sold = divmb[-1].text
# sold = sold.split(",", 1)[0]
# sold = sold.replace("sold", "")
# sold = sold.strip()
# Finding Shipment Information (Origin) # Finding Shipment Information (Origin)
pmb[0].text
shipFrom = shipFrom.replace("Ships from: ", "").strip()
# pmb[0].text
# shipFrom = shipFrom.replace("Ships from: ", "").strip()
# Finding Shipment Information (Destination) # Finding Shipment Information (Destination)
pmb[1].text
shipTo = shipTo.replace("Ships to: ", "").strip()
# pmb[1].text
# shipTo = shipTo.replace("Ships to: ", "").strip()
# Finding the Product description # Finding the Product description
cardbody = soup.findAll('div', {'class': "card-body"})
describe = cardbody[1].text.strip()
cardbody = soup.find('div', {"class": 'row-md-12'}).find('div', {"class": 'col-md-4'}).find('textarea', {"class": 'disabled form-control form-control-sm w-100 bg-mgray text-white rounded-0 border-danger'})
describe = cardbody.text.strip()
# Finding Product Image # Finding Product Image
image = soup.find('div', {'class': 'product-primary'}).find('img')
image = soup.find('div', {"class": 'row-md-12'}).find('div', {"class": 'col-md-4 text-center'}).find('img')
image = image.get('src') image = image.get('src')
image = image.split('base64,')[-1] image = image.split('base64,')[-1]
@ -159,22 +164,20 @@ def ares_listing_parser(soup):
image_vendor = [] # 21 Vendor_Image image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links href = [] # 22 Product_Links
listing = soup.findAll('div', {"id": "itembox"})
listing = soup.find('div', {"class": 'card-body text-black text-left bg-dark'}).findAll('div', {"class": 'card mb-4 border-danger rounded-0'})
# Populating the Number of Products # Populating the Number of Products
nm = len(listing) nm = len(listing)
for a in listing: for a in listing:
bae = a.findAll('a', href=True)
lb = a.findAll('div', {"id": "littlebox"})
# Adding the url to the list of urls # Adding the url to the list of urls
link = bae[0].get('href')
link = a.find('a', {'class': "badge badge-danger w-100 text-white"}).get('href')
link = cleanLink(link) link = cleanLink(link)
href.append(link) href.append(link)
# Finding the Product
product = lb[1].find('a').text
# Finding the Product name
product = a.find('div', {"class": 'marquee-parent'}).find('div', {"class": "marquee-child"}).text
product = product.replace('\n', ' ') product = product.replace('\n', ' ')
product = product.replace(",", "") product = product.replace(",", "")
product = product.replace("...", "") product = product.replace("...", "")
@ -188,13 +191,18 @@ def ares_listing_parser(soup):
image.append(product_image) image.append(product_image)
# Finding Prices # Finding Prices
price = lb[-1].find('div', {"class": "mb-1"}).text
price = a.findAll('a', {"class": "text-white"})[-1].text
price = price.replace("$","") price = price.replace("$","")
price = price.strip() price = price.strip()
USD.append(price)
currency = a.find('div', {"class": 'card-header bg-mgray rounded-0'}).findAll('i')[1]
if 'bitcoin' in currency.get('class'):
BTC.append(price)
elif 'USD' in currency.get('class'):
USD.append(price)
# Finding the Vendor # Finding the Vendor
vendor_name = lb[-1].find("a").text
vendor_name = a.find('a', {"class": 'badge badge-dark w-100 text-white my-1'}).text
vendor_name = vendor_name.replace(",", "") vendor_name = vendor_name.replace(",", "")
vendor_name = vendor_name.strip() vendor_name = vendor_name.strip()
vendor.append(vendor_name) vendor.append(vendor_name)
@ -202,35 +210,33 @@ def ares_listing_parser(soup):
image_vendor.append("-1") image_vendor.append("-1")
# Finding the Category # Finding the Category
cat = lb[-1].find("span").text
cat = cat.replace("class:", "")
cat = cat.strip()
category.append(cat)
span = lb[0].findAll("span")
# cat = lb[-1].find("span").text
# cat = cat.replace("class:", "")
# cat = cat.strip()
# category.append(cat)
# Finding Number of Views # Finding Number of Views
num = span[0].text
num = num.replace("views:", "")
num = num.strip()
sold.append(num)
# num = span[0].text
# num = num.replace("views:", "")
# num = num.strip()
# sold.append(num)
# Finding Number Sold # Finding Number Sold
num = span[2].text
num = num.replace("Sold:", "")
num = num.strip()
sold.append(num)
# num = span[2].text
# num = num.replace("Sold:", "")
# num = num.strip()
# sold.append(num)
# Finding Quantity Left # Finding Quantity Left
quant = span[1].text
quant = quant.replace("stock:", "")
quant = quant.strip()
qLeft.append(quant)
# quant = span[1].text
# quant = quant.replace("stock:", "")
# quant = quant.strip()
# qLeft.append(quant)
# add shipping information # add shipping information
ship = lb[2].findAll('small')[1].findAll('span')[1].text.split("->")
shipFrom.append(ship[0].replace("Ship from ", "").strip())
shipTo.append(ship[1].replace("to ", "").strip())
# ship = lb[2].findAll('small')[1].findAll('span')[1].text.split("->")
# shipFrom.append(ship[0].replace("Ship from ", "").strip())
# shipTo.append(ship[1].replace("to ", "").strip())
# Searching for CVE and MS categories # Searching for CVE and MS categories


Loading…
Cancel
Save