|
@ -42,30 +42,21 @@ def darkmatter_description_parser(soup): |
|
|
temp = soup.find('table', {'class', 'vtable'}) |
|
|
temp = soup.find('table', {'class', 'vtable'}) |
|
|
temp = temp.findAll('tr') |
|
|
temp = temp.findAll('tr') |
|
|
temp2 = temp[3].find('a').text |
|
|
temp2 = temp[3].find('a').text |
|
|
name = cleanString(temp2.strip()) |
|
|
|
|
|
|
|
|
vendor = cleanString(temp2.strip()) |
|
|
except: |
|
|
except: |
|
|
try: |
|
|
|
|
|
temp = soup.find('table', {'class', 'vtable'}) |
|
|
|
|
|
temp = temp.findAll('tr') |
|
|
|
|
|
temp2 = temp[4].find('a').text |
|
|
|
|
|
name = cleanString(temp2.strip()) |
|
|
|
|
|
except: |
|
|
|
|
|
print("vendor") |
|
|
|
|
|
|
|
|
temp = soup.find('table', {'class', 'vtable'}) |
|
|
|
|
|
temp = temp.findAll('tr') |
|
|
|
|
|
temp2 = temp[4].find('a').text |
|
|
|
|
|
vendor = cleanString(temp2.strip()) |
|
|
|
|
|
|
|
|
# product name |
|
|
# product name |
|
|
try: |
|
|
|
|
|
name = soup.find('div', {'class', 'title-h2'}).text |
|
|
|
|
|
name = cleanString(name.strip()) |
|
|
|
|
|
except: |
|
|
|
|
|
print("name") |
|
|
|
|
|
|
|
|
name = soup.find('div', {'class', 'title-h2'}).text |
|
|
|
|
|
name = cleanString(name.strip()) |
|
|
|
|
|
|
|
|
#product description |
|
|
#product description |
|
|
try: |
|
|
|
|
|
temp = soup.find('pre', {'class', 'description'}).text |
|
|
|
|
|
temp = temp.replace('\n', ' ') |
|
|
|
|
|
describe = cleanString(temp.strip()) |
|
|
|
|
|
except: |
|
|
|
|
|
print("description") |
|
|
|
|
|
|
|
|
temp = soup.find('pre', {'class', 'description'}).text |
|
|
|
|
|
temp = temp.replace('\n', ' ') |
|
|
|
|
|
describe = cleanString(temp.strip()) |
|
|
|
|
|
|
|
|
# Finding Product Image |
|
|
# Finding Product Image |
|
|
#image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img') |
|
|
#image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img') |
|
@ -81,44 +72,37 @@ def darkmatter_description_parser(soup): |
|
|
temp2 = temp[4].find('a').text |
|
|
temp2 = temp[4].find('a').text |
|
|
category = cleanString(temp2.strip()) |
|
|
category = cleanString(temp2.strip()) |
|
|
except: |
|
|
except: |
|
|
try: |
|
|
|
|
|
temp = soup.find('table', {'class', 'vtable'}) |
|
|
|
|
|
temp = temp.findAll('tr') |
|
|
|
|
|
temp2 = temp[5].find('th').text |
|
|
|
|
|
temp2 = cleanString(temp2.strip) |
|
|
|
|
|
if (temp2 == "Category"): |
|
|
|
|
|
temp2 = temp[5].find('a').text |
|
|
|
|
|
category = cleanString(temp2.strip()) |
|
|
|
|
|
except: |
|
|
|
|
|
print('category') |
|
|
|
|
|
|
|
|
|
|
|
# usd |
|
|
|
|
|
try: |
|
|
|
|
|
temp = soup.find('table', {'class', 'vtable'}) |
|
|
|
|
|
temp = temp.findAll('tr') |
|
|
|
|
|
temp2 = temp[1].find('td').text |
|
|
|
|
|
temp2 = temp2.replace(' USD', '') |
|
|
|
|
|
USD = cleanString(temp2) |
|
|
|
|
|
except: |
|
|
|
|
|
print('USD') |
|
|
|
|
|
|
|
|
|
|
|
# 15 Product_QuantitySold |
|
|
|
|
|
try: |
|
|
|
|
|
temp = soup.find('table', {'class', 'vtable'}) |
|
|
temp = soup.find('table', {'class', 'vtable'}) |
|
|
temp = temp.findAll('tr') |
|
|
temp = temp.findAll('tr') |
|
|
temp2 = temp[5].find('th').text |
|
|
temp2 = temp[5].find('th').text |
|
|
temp2 = cleanString(temp2) |
|
|
|
|
|
temp3 = temp[6].find('th').text |
|
|
|
|
|
temp3 = cleanString(temp3) |
|
|
|
|
|
if (temp2 == "Sold"): |
|
|
|
|
|
temp2 = temp[5].find('td').text |
|
|
|
|
|
sold = cleanString(temp2.strip()) |
|
|
|
|
|
elif (temp3 == "Sold"): |
|
|
|
|
|
temp2 = temp[6].find('td').text |
|
|
|
|
|
sold = cleanString(temp2.strip()) |
|
|
|
|
|
except: |
|
|
|
|
|
print('sold') |
|
|
|
|
|
|
|
|
temp2 = cleanString(temp2.strip) |
|
|
|
|
|
if (temp2 == "Category"): |
|
|
|
|
|
temp2 = temp[5].find('a').text |
|
|
|
|
|
category = cleanString(temp2.strip()) |
|
|
|
|
|
|
|
|
|
|
|
# usd |
|
|
|
|
|
temp = soup.find('table', {'class', 'vtable'}) |
|
|
|
|
|
temp = temp.findAll('tr') |
|
|
|
|
|
temp2 = temp[1].find('td').text |
|
|
|
|
|
temp2 = temp2.replace(' USD', '') |
|
|
|
|
|
USD = cleanString(temp2) |
|
|
|
|
|
|
|
|
|
|
|
# 15 Product_QuantitySold |
|
|
|
|
|
temp = soup.find('table', {'class', 'vtable'}) |
|
|
|
|
|
temp = temp.findAll('tr') |
|
|
|
|
|
temp2 = temp[5].find('th').text |
|
|
|
|
|
temp2 = cleanString(temp2) |
|
|
|
|
|
temp3 = temp[6].find('th').text |
|
|
|
|
|
temp3 = cleanString(temp3) |
|
|
|
|
|
if (temp2 == "Sold"): |
|
|
|
|
|
temp2 = temp[5].find('td').text |
|
|
|
|
|
sold = cleanString(temp2.strip()) |
|
|
|
|
|
elif (temp3 == "Sold"): |
|
|
|
|
|
temp2 = temp[6].find('td').text |
|
|
|
|
|
sold = cleanString(temp2.strip()) |
|
|
|
|
|
|
|
|
|
|
|
image = soup.find('td', {"class": "vtop"}).find('img').get('src') |
|
|
|
|
|
image = image.split('base64,')[-1] |
|
|
|
|
|
|
|
|
# Populating the final variable (this should be a list with all fields scraped) |
|
|
# Populating the final variable (this should be a list with all fields scraped) |
|
|
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, |
|
|
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, |
|
@ -134,35 +118,8 @@ def darkmatter_description_parser(soup): |
|
|
#return: 'row' that contains a variety of lists that each hold info on the listing page |
|
|
#return: 'row' that contains a variety of lists that each hold info on the listing page |
|
|
def darkmatter_listing_parser(soup): |
|
|
def darkmatter_listing_parser(soup): |
|
|
|
|
|
|
|
|
""" |
|
|
|
|
|
# Fields to be parsed |
|
|
# Fields to be parsed |
|
|
nm = 0 # Total_Products (Should be Integer) |
|
|
|
|
|
mktName = "DarkMatter" # 0 Marketplace_Name |
|
|
|
|
|
name = [] # 1 Product_Name |
|
|
|
|
|
CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures) |
|
|
|
|
|
MS = [] # 3 Product_MS_Classification (Microsoft Security) |
|
|
|
|
|
category = [] # 4 Product_Category |
|
|
|
|
|
describe = [] # 5 Product_Description |
|
|
|
|
|
escrow = [] # 6 Vendor_Warranty |
|
|
|
|
|
views = [] # 7 Product_Number_Of_Views |
|
|
|
|
|
reviews = [] # 8 Product_Number_Of_Reviews |
|
|
|
|
|
addDate = [] # 9 Product_AddDate |
|
|
|
|
|
rating_item = [] # 11 Product_Rating |
|
|
|
|
|
lastSeen = [] # 10 Product_LastViewDate |
|
|
|
|
|
BTC = [] # 11 Product_BTC_SellingPrice |
|
|
|
|
|
USD = [] # 12 Product_USD_SellingPrice |
|
|
|
|
|
EURO = [] # 13 Product_EURO_SellingPrice |
|
|
|
|
|
sold = [] # 14 Product_QuantitySold |
|
|
|
|
|
qLeft =[] # 15 Product_QuantityLeft |
|
|
|
|
|
shipFrom = [] # 16 Product_ShippedFrom |
|
|
|
|
|
shipTo = [] # 17 Product_ShippedTo |
|
|
|
|
|
vendor = [] # 18 Vendor |
|
|
|
|
|
rating = [] # 19 Vendor_Rating |
|
|
|
|
|
success = [] # 20 Vendor_Successful_Transactions |
|
|
|
|
|
href = [] # 23 Product_Links (Urls) |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
# Fields to be parsed |
|
|
|
|
|
nm = 0 # *Total_Products (Should be Integer) |
|
|
nm = 0 # *Total_Products (Should be Integer) |
|
|
mktName = "DarkMatter" # 0 *Marketplace_Name |
|
|
mktName = "DarkMatter" # 0 *Marketplace_Name |
|
|
vendor = [] # 1 *Vendor y |
|
|
vendor = [] # 1 *Vendor y |
|
@ -191,6 +148,7 @@ def darkmatter_listing_parser(soup): |
|
|
names = soup.find('div', {"class": "content"}).findAll('td', {"class": "lefted", "colspan": "3"}) |
|
|
names = soup.find('div', {"class": "content"}).findAll('td', {"class": "lefted", "colspan": "3"}) |
|
|
left = soup.find('div', {"class": "content"}).findAll('table', {"class": "vtable"}) |
|
|
left = soup.find('div', {"class": "content"}).findAll('table', {"class": "vtable"}) |
|
|
right = soup.find('div', {"class": "content"}).findAll('td', {"class": "vtop centered"}) |
|
|
right = soup.find('div', {"class": "content"}).findAll('td', {"class": "vtop centered"}) |
|
|
|
|
|
images = soup.find('div', {"class": "content"}).findAll('td', {"class": "vcentered"}) |
|
|
|
|
|
|
|
|
# vtop centered |
|
|
# vtop centered |
|
|
count = 0 |
|
|
count = 0 |
|
@ -199,18 +157,15 @@ def darkmatter_listing_parser(soup): |
|
|
|
|
|
|
|
|
for a in names: |
|
|
for a in names: |
|
|
# product name |
|
|
# product name |
|
|
try: |
|
|
|
|
|
temp = a.find('a').text |
|
|
|
|
|
if ("pcs x " in temp): |
|
|
|
|
|
index = temp.index("pcs x ") |
|
|
|
|
|
result = temp[index + len("pcs x "):] |
|
|
|
|
|
name.append(cleanString(result)) |
|
|
|
|
|
elif("pks x " in temp): |
|
|
|
|
|
index = temp.index("pks x ") |
|
|
|
|
|
result = temp[index + len("pks x "):] |
|
|
|
|
|
name.append(cleanString(temp)) |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
print("product name", e) |
|
|
|
|
|
|
|
|
temp = a.find('a').text |
|
|
|
|
|
if ("pcs x " in temp): |
|
|
|
|
|
index = temp.index("pcs x ") |
|
|
|
|
|
result = temp[index + len("pcs x "):] |
|
|
|
|
|
name.append(cleanString(result)) |
|
|
|
|
|
elif("pks x " in temp): |
|
|
|
|
|
index = temp.index("pks x ") |
|
|
|
|
|
result = temp[index + len("pks x "):] |
|
|
|
|
|
name.append(cleanString(result)) |
|
|
|
|
|
|
|
|
# Finding Product Image |
|
|
# Finding Product Image |
|
|
#product_image = a.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'}) |
|
|
#product_image = a.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'}) |
|
@ -225,11 +180,8 @@ def darkmatter_listing_parser(soup): |
|
|
length_2 = len(temp2) - 1 |
|
|
length_2 = len(temp2) - 1 |
|
|
|
|
|
|
|
|
# category |
|
|
# category |
|
|
try: |
|
|
|
|
|
temp = temp2[1].find('td').text |
|
|
|
|
|
category.append(cleanString(temp.strip())) |
|
|
|
|
|
except: |
|
|
|
|
|
print('category') |
|
|
|
|
|
|
|
|
temp = temp2[1].find('td').text |
|
|
|
|
|
category.append(cleanString(temp.strip())) |
|
|
|
|
|
|
|
|
describe.append("-1") |
|
|
describe.append("-1") |
|
|
#escrow.append("-1") |
|
|
#escrow.append("-1") |
|
@ -238,63 +190,49 @@ def darkmatter_listing_parser(soup): |
|
|
addDate.append("-1") |
|
|
addDate.append("-1") |
|
|
#lastSeen.append("-1") |
|
|
#lastSeen.append("-1") |
|
|
BTC.append("-1") |
|
|
BTC.append("-1") |
|
|
image.append("-1") |
|
|
|
|
|
image_vendor.append("-1") |
|
|
image_vendor.append("-1") |
|
|
|
|
|
|
|
|
# usd |
|
|
# usd |
|
|
try: |
|
|
|
|
|
temp3 = right[count*2].find('span').text |
|
|
|
|
|
temp = temp3.replace(' USD', '') |
|
|
|
|
|
USD.append(cleanString(temp)) |
|
|
|
|
|
except: |
|
|
|
|
|
print('USD') |
|
|
|
|
|
|
|
|
temp3 = right[count*2].find('span').text |
|
|
|
|
|
temp = temp3.replace(' USD', '') |
|
|
|
|
|
USD.append(cleanString(temp)) |
|
|
|
|
|
|
|
|
EURO.append("-1") |
|
|
EURO.append("-1") |
|
|
|
|
|
|
|
|
# 14 Product_QuantitySold |
|
|
# 14 Product_QuantitySold |
|
|
try: |
|
|
|
|
|
temp3 = temp2[length_2].find('th').text |
|
|
|
|
|
temp3 = cleanString(temp3) |
|
|
|
|
|
if (temp3 == "Sold:"): |
|
|
|
|
|
temp = temp2[length_2].find('td').text |
|
|
|
|
|
sold.append(cleanString(temp.strip())) |
|
|
|
|
|
else: |
|
|
|
|
|
sold.append("-1") |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
|
temp3 = temp2[length_2].find('th').text |
|
|
|
|
|
temp3 = cleanString(temp3) |
|
|
|
|
|
if (temp3 == "Sold:"): |
|
|
|
|
|
temp = temp2[length_2].find('td').text |
|
|
|
|
|
sold.append(cleanString(temp.strip())) |
|
|
|
|
|
else: |
|
|
sold.append("-1") |
|
|
sold.append("-1") |
|
|
print('sold', e) |
|
|
|
|
|
|
|
|
|
|
|
qLeft.append("-1") |
|
|
qLeft.append("-1") |
|
|
shipFrom.append("-1") |
|
|
shipFrom.append("-1") |
|
|
|
|
|
|
|
|
# ship to |
|
|
# ship to |
|
|
try: |
|
|
|
|
|
temp3 = temp2[length_2].find('th').text |
|
|
|
|
|
temp3 = cleanString(temp3) |
|
|
|
|
|
if (temp3 == "Ship To:"): |
|
|
|
|
|
temp = temp2[length_2].find('td').text |
|
|
|
|
|
shipTo.append(cleanString(temp.strip())) |
|
|
|
|
|
else: |
|
|
|
|
|
shipTo.append("-1") |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
|
|
|
temp3 = temp2[length_2].find('th').text |
|
|
|
|
|
temp3 = cleanString(temp3) |
|
|
|
|
|
if (temp3 == "Ship To:"): |
|
|
|
|
|
temp = temp2[length_2].find('td').text |
|
|
|
|
|
shipTo.append(cleanString(temp.strip())) |
|
|
|
|
|
else: |
|
|
shipTo.append("-1") |
|
|
shipTo.append("-1") |
|
|
print('shopto') |
|
|
|
|
|
|
|
|
|
|
|
# vendor |
|
|
# vendor |
|
|
try: |
|
|
|
|
|
temp = temp2[0].find('a').text |
|
|
|
|
|
vendor.append(cleanString(temp.strip())) |
|
|
|
|
|
except: |
|
|
|
|
|
print('vendor') |
|
|
|
|
|
|
|
|
temp = temp2[0].find('a').text |
|
|
|
|
|
vendor.append(cleanString(temp.strip())) |
|
|
|
|
|
|
|
|
|
|
|
# add product rating (stars) |
|
|
rating.append("-1") |
|
|
rating.append("-1") |
|
|
success.append("-1") |
|
|
success.append("-1") |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
temp = a.find('a').get('href') |
|
|
|
|
|
href.append(temp) |
|
|
|
|
|
except: |
|
|
|
|
|
print('href') |
|
|
|
|
|
|
|
|
temp = a.find('a').get('href') |
|
|
|
|
|
href.append(temp) |
|
|
|
|
|
|
|
|
|
|
|
image = images[count*2].find('img').get('src') |
|
|
|
|
|
image = image.split('base64,')[-1] |
|
|
|
|
|
|
|
|
count += 1 |
|
|
count += 1 |
|
|
|
|
|
|
|
|