Browse Source

finished crawler and parser

main
Joshua 1 year ago
parent
commit
72da7f2f05
2 changed files with 28 additions and 6 deletions
  1. +1
    -1
      MarketPlaces/Ares/crawler_selenium.py
  2. +27
    -5
      MarketPlaces/Ares/parser.py

+ 1
- 1
MarketPlaces/Ares/crawler_selenium.py View File

@ -41,7 +41,7 @@ def startCrawling():
print(driver.current_url, e) print(driver.current_url, e)
closeDriver(driver) closeDriver(driver)
# new_parse(mktName, baseURL, True)
new_parse(mktName, baseURL, True)
# Returns the name of the website # Returns the name of the website


+ 27
- 5
MarketPlaces/Ares/parser.py View File

@ -44,17 +44,18 @@ def ares_description_parser(soup):
name = name.replace(",", "") name = name.replace(",", "")
name = name.strip() name = name.strip()
box = soup.find('div', {'class': "col-md-7"}).find('span', {'class': "text-left text-white"})
box = soup.find('div', {'class': "col-md-7"}).find('span')
box = box.findAll('span', {'class': "btn btn-mgray btn-sm w-100 active border-danger"}) box = box.findAll('span', {'class': "btn btn-mgray btn-sm w-100 active border-danger"})
# Finding Vendor # Finding Vendor
vendor = box[0].text.strip()
vendor = soup.find('a', {'class': "btn btn-sm btn-mgray my-1 w-100 text-white"}).get('href')
vendor = vendor.split('otherParty=')[-1].strip()
# Finding Vendor Rating # Finding Vendor Rating
temp = box[2]
temp = box[1]
stars = len(temp.findAll('i', {"class": "fas fa-star"})) stars = len(temp.findAll('i', {"class": "fas fa-star"}))
half_stars = len(temp.findAll('i', {'class': "fas fa-star-half-alt"})) half_stars = len(temp.findAll('i', {'class': "fas fa-star-half-alt"}))
rating_vendor = str((stars - half_stars)/5)
rating_vendor = str(((stars - half_stars)/5) * 100)
# Finding the Product Rating and Number of Product Reviews # Finding the Product Rating and Number of Product Reviews
# reviews = temp[2].replace(" review)", "") # reviews = temp[2].replace(" review)", "")
@ -69,7 +70,26 @@ def ares_description_parser(soup):
box2 = soup.find('div', {"class": "col-md-4 text-center"}).find('span', {"class": "text-left"}).findAll('span') box2 = soup.find('div', {"class": "col-md-4 text-center"}).find('span', {"class": "text-left"}).findAll('span')
# Finding Prices # Finding Prices
USD = box2[0].text.strip()
price = box2[0].text
price = price.replace("$", "")
price = price.replace('\n', '')
price = price.strip()
currency = box2[2].find('i').get('class')
if 'bitcoin' in currency:
BTC = price
elif 'USD' in currency:
USD = price
elif 'monero' in currency:
USD = (str(int(price) * 170.97))
USD = box2[0].text
USD = USD.replace('\n', '')
USD = USD.replace('$', '')
USD = USD.strip()
# Finding Vendor Image
vendor_image = soup.find('img', {"class": 'img-fluid'}).get('src')
vendor_image = vendor_image.split('base64,')[-1]
# Finding the Product Category # Finding the Product Category
# pmb = soup.findAll('p', {'class': "mb-1"}) # pmb = soup.findAll('p', {'class': "mb-1"})
@ -199,6 +219,8 @@ def ares_listing_parser(soup):
BTC.append(price) BTC.append(price)
elif 'USD' in currency.get('class'): elif 'USD' in currency.get('class'):
USD.append(price) USD.append(price)
elif 'monero' in currency.get('class'):
USD.append(str(int(price) * 170.97))
# Finding the Vendor # Finding the Vendor


Loading…
Cancel
Save