|
@ -43,6 +43,10 @@ def nexus_description_parser(soup): |
|
|
name_of_product = soup.find("h1", {"class": "product_title entry-title"}).text |
|
|
name_of_product = soup.find("h1", {"class": "product_title entry-title"}).text |
|
|
name = cleanString(name_of_product.strip()) |
|
|
name = cleanString(name_of_product.strip()) |
|
|
|
|
|
|
|
|
|
|
|
# Find the BTC Price |
|
|
|
|
|
prices = soup.find('p', {"class": "price"}).findAll('span', {"class": "cs"}) |
|
|
|
|
|
BTC = prices[0].text |
|
|
|
|
|
BTC = cleanNumbers(BTC.strip()) |
|
|
|
|
|
|
|
|
# finding the description of the product |
|
|
# finding the description of the product |
|
|
description_div = soup.find("div", {"class": "woocommerce-product-details__short-description"}) |
|
|
description_div = soup.find("div", {"class": "woocommerce-product-details__short-description"}) |
|
@ -52,7 +56,7 @@ def nexus_description_parser(soup): |
|
|
describe = cleanString(description_div.text.strip()) |
|
|
describe = cleanString(description_div.text.strip()) |
|
|
|
|
|
|
|
|
# Finding Product Image |
|
|
# Finding Product Image |
|
|
image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img') |
|
|
|
|
|
|
|
|
image = soup.find('div', {'class': 'woocommerce-product-gallery__wrapper'}).find('img') |
|
|
image = image.get('src') |
|
|
image = image.get('src') |
|
|
image = image.split('base64,')[-1] |
|
|
image = image.split('base64,')[-1] |
|
|
|
|
|
|
|
@ -110,56 +114,53 @@ def nexus_listing_parser(soup): |
|
|
image_vendor = [] # 21 Vendor_Image |
|
|
image_vendor = [] # 21 Vendor_Image |
|
|
href = [] # 22 Product_Links |
|
|
href = [] # 22 Product_Links |
|
|
|
|
|
|
|
|
products_list = soup.find_all('li') |
|
|
|
|
|
nm = 0 |
|
|
|
|
|
|
|
|
main = soup.find('main', {'id': 'main'}) |
|
|
|
|
|
products_list = main.find('ul', recursive=False).find_all('li', recursive=False) |
|
|
|
|
|
nm = len(products_list) |
|
|
|
|
|
|
|
|
for product in products_list: |
|
|
for product in products_list: |
|
|
|
|
|
# Finding the name of the product |
|
|
|
|
|
name_of_product = product.find("h2", {"class": "woocommerce-loop-product__title"}).find("a").text |
|
|
|
|
|
name_of_product_cleaned = cleanString(name_of_product.strip()) |
|
|
|
|
|
# print(name_of_product_cleaned) |
|
|
|
|
|
name.append(name_of_product_cleaned) |
|
|
|
|
|
#finding the URL |
|
|
try: |
|
|
try: |
|
|
# Finding the name of the product |
|
|
|
|
|
name_of_product = product.find("h2", {"class": "woocommerce-loop-product__title"}).find("a").text |
|
|
|
|
|
name_of_product_cleaned = cleanString(name_of_product.strip()) |
|
|
|
|
|
# print(name_of_product_cleaned) |
|
|
|
|
|
name.append(name_of_product_cleaned) |
|
|
|
|
|
#finding the URL |
|
|
|
|
|
try: |
|
|
|
|
|
url = product.find("a", class_="woocommerce-loop-product__link").get('href') |
|
|
|
|
|
href.append(url) |
|
|
|
|
|
except AttributeError as e: |
|
|
|
|
|
print("I can't find the link") |
|
|
|
|
|
raise e |
|
|
|
|
|
|
|
|
|
|
|
# Finding Product Image |
|
|
|
|
|
product_image = product.find('a', {'class': 'woocommerce-loop-image-link woocommerce-LoopProduct-link woocommerce-loop-product__link'}).find('img') |
|
|
|
|
|
product_image = product_image.get('src') |
|
|
|
|
|
product_image = product_image.split('base64,')[-1] |
|
|
|
|
|
image.append(product_image) |
|
|
|
|
|
|
|
|
|
|
|
BTC.append("-1") |
|
|
|
|
|
|
|
|
|
|
|
#everything else appends a -1 |
|
|
|
|
|
rating_vendor.append("-1") |
|
|
|
|
|
USD.append("-1") |
|
|
|
|
|
vendor.append(mktName) |
|
|
|
|
|
success.append("-1") |
|
|
|
|
|
CVE.append("-1") |
|
|
|
|
|
MS.append("-1") |
|
|
|
|
|
category.append("-1") |
|
|
|
|
|
describe.append("-1") |
|
|
|
|
|
views.append("-1") |
|
|
|
|
|
reviews.append("-1") |
|
|
|
|
|
addDate.append("-1") |
|
|
|
|
|
EURO.append("-1") |
|
|
|
|
|
sold.append("-1") |
|
|
|
|
|
qLeft.append("-1") |
|
|
|
|
|
shipFrom.append("-1") |
|
|
|
|
|
shipTo.append("-1") |
|
|
|
|
|
image_vendor.append("-1") |
|
|
|
|
|
# print("Done! moving onto the next product!") |
|
|
|
|
|
# print(len(shipTo)) |
|
|
|
|
|
nm += 1 |
|
|
|
|
|
|
|
|
url = product.find("a", class_="woocommerce-loop-product__link").get('href') |
|
|
|
|
|
href.append(url) |
|
|
except AttributeError as e: |
|
|
except AttributeError as e: |
|
|
print("I'm somewhere I don't belong. I'm going to leave") |
|
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("I can't find the link") |
|
|
|
|
|
raise e |
|
|
|
|
|
|
|
|
|
|
|
# Finding Product Image |
|
|
|
|
|
product_image = product.find('a', {'class': 'woocommerce-loop-image-link woocommerce-LoopProduct-link woocommerce-loop-product__link'}).find('img') |
|
|
|
|
|
product_image = product_image.get('src') |
|
|
|
|
|
product_image = product_image.split('base64,')[-1] |
|
|
|
|
|
image.append(product_image) |
|
|
|
|
|
|
|
|
|
|
|
# Finding BTC Price |
|
|
|
|
|
prices = product.find('span', {"class": "price"}).findAll('span', {"class": "cs"}) |
|
|
|
|
|
price = prices[0].text |
|
|
|
|
|
BTC.append(cleanNumbers(price.strip())) |
|
|
|
|
|
|
|
|
|
|
|
#everything else appends a -1 |
|
|
|
|
|
rating_vendor.append("-1") |
|
|
|
|
|
USD.append("-1") |
|
|
|
|
|
vendor.append('-1') |
|
|
|
|
|
success.append("-1") |
|
|
|
|
|
CVE.append("-1") |
|
|
|
|
|
MS.append("-1") |
|
|
|
|
|
category.append("-1") |
|
|
|
|
|
describe.append("-1") |
|
|
|
|
|
views.append("-1") |
|
|
|
|
|
reviews.append("-1") |
|
|
|
|
|
addDate.append("-1") |
|
|
|
|
|
EURO.append("-1") |
|
|
|
|
|
sold.append("-1") |
|
|
|
|
|
qLeft.append("-1") |
|
|
|
|
|
shipFrom.append("-1") |
|
|
|
|
|
shipTo.append("-1") |
|
|
|
|
|
image_vendor.append("-1") |
|
|
|
|
|
|
|
|
# Populate the final variable (this should be a list with all fields scraped) |
|
|
# Populate the final variable (this should be a list with all fields scraped) |
|
|
return organizeProducts( |
|
|
return organizeProducts( |
|
|