Browse Source

fixed Gofish parser

main
Joshua 1 year ago
parent
commit
ac1eb6da97
1 changed files with 16 additions and 7 deletions
  1. +16
    -7
      MarketPlaces/GoFish/parser.py

+ 16
- 7
MarketPlaces/GoFish/parser.py View File

@ -36,15 +36,20 @@ def gofish_description_parser(soup):
vendor_image = "-1" # 20 Vendor_Image vendor_image = "-1" # 20 Vendor_Image
# Finding Product Name # Finding Product Name
divmb = soup.find('div', {'class': "p-3 mb-1 fs-3 fw-bold border border-2 bg-white rounded"}) divmb = soup.find('div', {'class': "p-3 mb-1 fs-3 fw-bold border border-2 bg-white rounded"})
if divmb is None:
divmb = soup.find('div', {'class': "p-3 mb-1 fs-4 fw-bold border border-2 bg-white rounded"})
name = divmb.text name = divmb.text
name = name.replace('\n', ' ') name = name.replace('\n', ' ')
name = name.replace('\r', ' ')
name = name.replace('\t', ' ')
name = name.replace(",", "") name = name.replace(",", "")
name = name.strip() name = name.strip()
# Finding Vendor # Finding Vendor
vendor = soup.find('div', {'class': 'my-1'}).text.strip()
vendor = soup.find('div', {'class': 'my-1'}).find('a').text.strip()
# Finding Vendor Rating # Finding Vendor Rating
# temp = soup.find('div', {'class': ""}).text # temp = soup.find('div', {'class': ""}).text
@ -63,7 +68,7 @@ def gofish_description_parser(soup):
# rating_item = rating.strip() # rating_item = rating.strip()
# Finding Prices # Finding Prices
precios = soup.findall('td', {'class': "text-end text-nowrap"})
precios = soup.findAll('td', {'class': "text-end text-nowrap"})
USD = precios[0].text.strip().replace('$', '') USD = precios[0].text.strip().replace('$', '')
# Finding the Product Category # Finding the Product Category
@ -85,14 +90,18 @@ def gofish_description_parser(soup):
# sold = sold.strip() # sold = sold.strip()
# Finding Shipment Information (Origin) # Finding Shipment Information (Origin)
origin = soup.findall('div', {'class': "p-3 mt-2 mb-3 border border-2 bg-white rounded"})
origin = soup.findAll('div', {'class': "p-3 mt-2 mb-3 border border-2 bg-white rounded"})
remove = origin[0].find('span').text.strip()
origin = origin[0].text.strip() origin = origin[0].text.strip()
origin = origin.replace(remove, '')
shipFrom = origin[1:].strip()
shipFrom = origin.strip()
# Finding Shipment Information (Destination) # Finding Shipment Information (Destination)
dest = soup.findall('div', {'class': 'p-3 mb-3 overflow-auto border border-2 bg-white rounded'})
dest = soup.findAll('div', {'class': 'p-3 mb-3 overflow-auto border border-2 bg-white rounded'})
dest = dest[-1].text.strip() dest = dest[-1].text.strip()
dest = dest.replace('[', '')
dest = dest.replace(']', '')
shipTo = dest[1:].strip() shipTo = dest[1:].strip()
@ -103,7 +112,7 @@ def gofish_description_parser(soup):
describe = describe.strip() describe = describe.strip()
# Finding Product Image # Finding Product Image
img = soup.findall('figure', {'class': 'image-feature'})[0]
img = soup.findAll('figure', {'class': 'image-feature'})[0]
image = img.find('img', {'class': 'image-block rounded'}) image = img.find('img', {'class': 'image-block rounded'})
image = image.get('src') image = image.get('src')
image = image.split('base64,')[-1] image = image.split('base64,')[-1]
@ -166,7 +175,7 @@ def gofish_listing_parser(soup):
image_vendor = [] # 21 Vendor_Image image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links href = [] # 22 Product_Links
listing = soup.find('tbody', {"class": "border border-2 align-middle"}).findall('tr')
listing = soup.find('tbody', {"class": "border border-2 align-middle"}).findAll('tr')
# Populating the Number of Products # Populating the Number of Products
nm = len(listing) nm = len(listing)


Loading…
Cancel
Save