|
|
@ -49,26 +49,29 @@ def AnonMarket_description_parser(soup): |
|
|
|
info_div = soup.find('div', {'class': 'information'}) |
|
|
|
table = info_div.find('table') if info_div else None |
|
|
|
|
|
|
|
if table: |
|
|
|
# Find all table rows |
|
|
|
rows = table.find_all('tr') |
|
|
|
|
|
|
|
# Parse each row to get relevant data |
|
|
|
data = {} |
|
|
|
for row in rows: |
|
|
|
columns = row.find_all('td') |
|
|
|
if len(columns) == 3: |
|
|
|
key = columns[0].text.strip() |
|
|
|
value = columns[2].text.strip() |
|
|
|
data[key] = value |
|
|
|
|
|
|
|
# Extract specific data from the dictionary and assign them to individual variables |
|
|
|
vendor = data.get('Vendor', '-1') |
|
|
|
shipFrom = data.get('Location', '-1') |
|
|
|
shipTo = data.get('Ships to', '-1') |
|
|
|
category = data.get('Category', '-1') |
|
|
|
USD = data.get('Price', '-1').split()[0] |
|
|
|
left = data.get('Stock', '-1') |
|
|
|
# Find all table rows |
|
|
|
rows = table.find_all('tr') |
|
|
|
|
|
|
|
# Parse each row to get relevant data |
|
|
|
data = {} |
|
|
|
for row in rows: |
|
|
|
columns = row.find_all('td') |
|
|
|
if len(columns) == 3: |
|
|
|
key = columns[0].text.strip() |
|
|
|
value = columns[2].text.strip() |
|
|
|
data[key] = value |
|
|
|
|
|
|
|
# Extract specific data from the dictionary and assign them to individual variables |
|
|
|
vendor = data.get('Vendor', '-1') |
|
|
|
shipFrom = data.get('Location', '-1') |
|
|
|
shipTo = data.get('Ships to', '-1') |
|
|
|
category = data.get('Category', '-1') |
|
|
|
USD = data.get('Price', '-1').split()[0] |
|
|
|
left = data.get('Stock', '-1') |
|
|
|
|
|
|
|
# image |
|
|
|
image = soup.find('img', {"class": "bigthumbnail"}) |
|
|
|
image = image.get('src').split('base64,')[-1] |
|
|
|
|
|
|
|
# Populating the final variable (this should be a list with all fields scraped) |
|
|
|
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, |
|
|
@ -111,51 +114,55 @@ def AnonMarket_listing_parser(soup): |
|
|
|
href = [] # 22 Product_Links |
|
|
|
base_url = "http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion" |
|
|
|
|
|
|
|
cat = soup.find("div", {'class': 'heading'}).text |
|
|
|
|
|
|
|
products_list = soup.find_all('div', {'class': 'item'}) |
|
|
|
nm = 0 |
|
|
|
for product in products_list: |
|
|
|
try: |
|
|
|
name_of_product = product.find("div", {"class": "title"}).text.strip() |
|
|
|
name.append(name_of_product) |
|
|
|
|
|
|
|
name_of_vendor = product.find("a", {'class': 'seller'}).text.strip() |
|
|
|
vendor.append(name_of_vendor) |
|
|
|
|
|
|
|
cat = soup.find("div", {'class': 'heading'}).text |
|
|
|
category.append(cat) |
|
|
|
|
|
|
|
product_link_element = product.find("div", {"class": "title"}).find_parent('a') |
|
|
|
if product_link_element: |
|
|
|
link = product_link_element['href'] |
|
|
|
if "/product/" in link and "/user/" not in link: |
|
|
|
full_link = base_url + link |
|
|
|
href.append(full_link) |
|
|
|
else: |
|
|
|
href.append("-1") |
|
|
|
else: |
|
|
|
href.append("-1") |
|
|
|
|
|
|
|
# Append '-1' for unavailable data |
|
|
|
rating_vendor.append("-1") |
|
|
|
success.append("-1") |
|
|
|
CVE.append("-1") |
|
|
|
MS.append("-1") |
|
|
|
describe.append("-1") |
|
|
|
views.append("-1") |
|
|
|
reviews.append("-1") |
|
|
|
addDate.append("-1") |
|
|
|
BTC.append("-1") |
|
|
|
EURO.append("-1") |
|
|
|
sold.append("-1") |
|
|
|
qLeft.append("-1") |
|
|
|
shipFrom.append("-1") |
|
|
|
shipTo.append("-1") |
|
|
|
|
|
|
|
nm += 1 |
|
|
|
|
|
|
|
except AttributeError as e: |
|
|
|
print("I'm somewhere I don't belong. I'm going to leave") |
|
|
|
continue |
|
|
|
name_of_product = product.find("div", {"class": "title"}).text.strip() |
|
|
|
name.append(name_of_product) |
|
|
|
|
|
|
|
name_of_vendor = product.find("a", {'class': 'seller'}).text.strip() |
|
|
|
vendor.append(name_of_vendor) |
|
|
|
|
|
|
|
category.append(cat) |
|
|
|
|
|
|
|
tbody = product.find('div', {"class": "info"}).find('tbody') |
|
|
|
|
|
|
|
# rating_item |
|
|
|
width = tbody.find('div', {"class": "stars2"}).get('style') |
|
|
|
rating_item.append(cleanNumbers(width.strip())) |
|
|
|
|
|
|
|
tr = tbody.findAll('tr', recursive=False) |
|
|
|
td = tr[2].findAll('td') |
|
|
|
|
|
|
|
# sold |
|
|
|
sold.append(td[0].text.strip()) |
|
|
|
|
|
|
|
# reviews |
|
|
|
reviews.append(td[1].text.strip()) |
|
|
|
|
|
|
|
product_link_element = product.find("div", {"class": "title"}).find_parent('a') |
|
|
|
link = product_link_element['href'] |
|
|
|
full_link = base_url + link |
|
|
|
href.append(full_link) |
|
|
|
|
|
|
|
# Append '-1' for unavailable data |
|
|
|
rating_vendor.append("-1") |
|
|
|
success.append("-1") |
|
|
|
CVE.append("-1") |
|
|
|
MS.append("-1") |
|
|
|
describe.append("-1") |
|
|
|
views.append("-1") |
|
|
|
addDate.append("-1") |
|
|
|
BTC.append("-1") |
|
|
|
USD.append("-1") |
|
|
|
EURO.append("-1") |
|
|
|
qLeft.append("-1") |
|
|
|
shipFrom.append("-1") |
|
|
|
shipTo.append("-1") |
|
|
|
|
|
|
|
nm += 1 |
|
|
|
|
|
|
|
# Populate the final variable (this should be a list with all fields scraped) |
|
|
|
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, |
|
|
|