From 72da7f2f053055234781687f56618fc34f4219ab Mon Sep 17 00:00:00 2001 From: Joshua Date: Wed, 1 Nov 2023 15:58:39 -0700 Subject: [PATCH] finished crawler and parser --- MarketPlaces/Ares/crawler_selenium.py | 2 +- MarketPlaces/Ares/parser.py | 32 ++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/MarketPlaces/Ares/crawler_selenium.py b/MarketPlaces/Ares/crawler_selenium.py index 5120f06..2e0c677 100644 --- a/MarketPlaces/Ares/crawler_selenium.py +++ b/MarketPlaces/Ares/crawler_selenium.py @@ -41,7 +41,7 @@ def startCrawling(): print(driver.current_url, e) closeDriver(driver) - # new_parse(mktName, baseURL, True) + new_parse(mktName, baseURL, True) # Returns the name of the website diff --git a/MarketPlaces/Ares/parser.py b/MarketPlaces/Ares/parser.py index fe68579..1803233 100644 --- a/MarketPlaces/Ares/parser.py +++ b/MarketPlaces/Ares/parser.py @@ -44,17 +44,18 @@ def ares_description_parser(soup): name = name.replace(",", "") name = name.strip() - box = soup.find('div', {'class': "col-md-7"}).find('span', {'class': "text-left text-white"}) + box = soup.find('div', {'class': "col-md-7"}).find('span') box = box.findAll('span', {'class': "btn btn-mgray btn-sm w-100 active border-danger"}) # Finding Vendor - vendor = box[0].text.strip() + vendor = soup.find('a', {'class': "btn btn-sm btn-mgray my-1 w-100 text-white"}).get('href') + vendor = vendor.split('otherParty=')[-1].strip() # Finding Vendor Rating - temp = box[2] + temp = box[1] stars = len(temp.findAll('i', {"class": "fas fa-star"})) half_stars = len(temp.findAll('i', {'class': "fas fa-star-half-alt"})) - rating_vendor = str((stars - half_stars)/5) + rating_vendor = str(((stars - half_stars)/5) * 100) # Finding the Product Rating and Number of Product Reviews # reviews = temp[2].replace(" review)", "") @@ -69,7 +70,26 @@ def ares_description_parser(soup): box2 = soup.find('div', {"class": "col-md-4 text-center"}).find('span', {"class": "text-left"}).findAll('span') # Finding Prices - USD = box2[0].text.strip() + price = box2[0].text + price = price.replace("$", "") + price = price.replace('\n', '') + price = price.strip() + currency = box2[2].find('i').get('class') + if 'bitcoin' in currency: + BTC = price + elif 'USD' in currency: + USD = price + elif 'monero' in currency: + USD = (str(int(price) * 170.97)) + + USD = box2[0].text + USD = USD.replace('\n', '') + USD = USD.replace('$', '') + USD = USD.strip() + + # Finding Vendor Image + vendor_image = soup.find('img', {"class": 'img-fluid'}).get('src') + vendor_image = vendor_image.split('base64,')[-1] # Finding the Product Category # pmb = soup.findAll('p', {'class': "mb-1"}) @@ -199,6 +219,8 @@ def ares_listing_parser(soup): BTC.append(price) elif 'USD' in currency.get('class'): USD.append(price) + elif 'monero' in currency.get('class'): + USD.append(str(int(price) * 170.97)) # Finding the Vendor