Browse Source

Images.

main
ericssonmarin-cpp 1 year ago
parent
commit
9207c4f30d
8 changed files with 185 additions and 60 deletions
  1. +23
    -8
      MarketPlaces/Apocalypse/parser.py
  2. +44
    -31
      MarketPlaces/DB_Connection/db_connection.py
  3. +45
    -4
      MarketPlaces/DarkMatter/parser.py
  4. +18
    -5
      MarketPlaces/DigitalThriftShop/parser.py
  5. +16
    -3
      MarketPlaces/HiddenMarket/parser.py
  6. +5
    -1
      MarketPlaces/Initialization/prepare_parser.py
  7. +25
    -4
      MarketPlaces/RobinhoodMarket/parser.py
  8. +9
    -4
      MarketPlaces/Utilities/utilities.py

+ 23
- 8
MarketPlaces/Apocalypse/parser.py View File

@ -30,7 +30,9 @@ def apocalypse_description_parser(soup: Tag):
left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
content: Tag = soup.find("div", {'id': "article_page"})
product_name = content.find("p", {"class": "list-group-item text-center mb-0 box"}).text
@ -38,7 +40,11 @@ def apocalypse_description_parser(soup: Tag):
product_description = content.find("pre").text
describe = cleanString(product_description.strip())
# Finding Product Image
image = soup.find('div', {'class': 'col-md-7 text-center'}).find('img')
image = image.get('src')
product_reviews_list: Tag = content.find("table", {"class": "table product_reviews"}) \
.find_all("li")
@ -72,7 +78,7 @@ def apocalypse_description_parser(soup: Tag):
# Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results
return row
@ -103,15 +109,21 @@ def apocalypse_listing_parser(soup: Tag):
vendor = [] # 18 Vendor
rating = [] # 19 Vendor_Rating
success = [] # 20 Vendor_Successful_Transactions
href = [] # 23 Product_Links (Urls)
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
listings: ResultSet[Tag] = soup.find("div", {"class": "col-lg-9 my-4"}).find_all("div", {"class": "col-lg-4 col-md-6 mb-1"})
for prod in listings:
product_name = prod.find('h5', {"class": "art_title"}).text
name.append(cleanString(product_name.strip()))
# Finding Product Image
product_image = prod.find('img', {'class': 'customHeight'})
product_image = product_image.get('src')
image.append(product_image)
CVE.append("-1")
MS.append("-1")
@ -124,6 +136,7 @@ def apocalypse_listing_parser(soup: Tag):
EURO.append("-1")
shipTo.append("-1")
success.append("-1")
image_vendor.append("-1")
product_price = prod.find("span", {"class": "priceP"}).text
USD.append(cleanString(product_price.strip()))
@ -161,7 +174,7 @@ def apocalypse_listing_parser(soup: Tag):
rating.append(cleanString(product_vendor_rating.strip()))
except Exception as e:
raise e
product_href = prod.find('a').get('href')
href.append(product_href)
@ -190,7 +203,9 @@ def apocalypse_listing_parser(soup: Tag):
qLeft=qLeft,
shipFrom=shipFrom,
shipTo=shipTo,
href=href
href=href,
image=image,
image_vendor=image_vendor
)
#called by the crawler to get description links on a listing page


+ 44
- 31
MarketPlaces/DB_Connection/db_connection.py View File

@ -3,6 +3,7 @@ __author__ = 'DarkWeb'
import psycopg2
import traceback
import configparser
from MarketPlaces.Utilities.utilities import *
def connectDataBase():
@ -146,7 +147,7 @@ def create_marketPlace(cur, row, url):
sql = "Insert into marketplaces (market_id, name_market, url_market, dateinserted_market) " \
"Values (%s, %s, %s, %s)"
recset = [marketId, row[0], url, row[21]]
recset = [marketId, row[0], url, row[23]]
cur.execute(sql, recset)
@ -165,13 +166,15 @@ def create_vendor(cur, row, marketId):
if newVendor:
sql = "Insert into vendors (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, dateinserted_vendor) Values (%s, %s, %s, %s, %s, %s)"
sql = "Insert into vendors (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, image_vendor, dateinserted_vendor) " \
"Values (%s, %s, %s, %s, %s, %s, %s)"
recset = [vendorId, marketId,
row[1],
row[2] if row[2] != '-1' else None,
row[3] if row[3] != '-1' else None,
row[21]]
row[21] if row[21] != '-1' else None,
row[23]]
cur.execute(sql, recset)
@ -183,24 +186,30 @@ def create_vendor(cur, row, marketId):
recset = cur.fetchall()
#aes_decryption(recset[0][5]) trying to decrypt the image
if (str(recset[0][3]) != str(row[2] if row[2] != '-1' else None) or # there was a change in the vendor information
str(recset[0][4]) != str(row[3] if row[3] != '-1' else None)):
str(recset[0][4]) != str(row[3] if row[3] != '-1' else None) or
str(recset[0][5]) != str(row[21] if row[21] != '-1' else None)):
sql = "Insert into vendors_history (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, dateinserted_vendor) Values (%s, %s, %s, %s, %s, %s)"
sql = "Insert into vendors_history (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, image_vendor, dateinserted_vendor) " \
"Values (%s, %s, %s, %s, %s, %s, %s)"
recset = [vendorId, marketId,
recset[0][2],
recset[0][3],
recset[0][4],
recset[0][5]]
recset[0][5],
recset[0][6]]
cur.execute(sql, recset)
sql = "Update vendors set rating_vendor = %(rating_vendor)s, successfultransactions_vendor = %(successfultransactions_vendor)s, " \
"dateinserted_vendor = %(dateinserted_vendor)s where vendor_id = %(vendorId)s"
"image_vendor = %(image_vendor)s, dateinserted_vendor = %(dateinserted_vendor)s where vendor_id = %(vendorId)s"
cur.execute(sql, {'rating_vendor': row[2] if row[2] != '-1' else None,
'successfultransactions_vendor': row[3] if row[3] != '-1' else None,
'dateinserted_vendor': row[21],
'image_vendor': row[21] if row[21] != '-1' else None,
'dateinserted_vendor': row[23],
'vendorId': vendorId})
return vendorId
@ -220,9 +229,9 @@ def create_items(cur, row, marketId, vendorId):
sql = "Insert into items (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \
"views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \
"quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \
"quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \
"classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \
"%s, %s, %s, %s)"
"%s, %s, %s, %s, %s)"
recset = [itemId, marketId, vendorId,
row[4],
@ -241,10 +250,11 @@ def create_items(cur, row, marketId, vendorId):
row[17] if row[17] != '-1' else None,
row[18] if row[18] != '-1' else None,
row[19] if row[19] != '-1' else None,
row[23],
row[20] if row[20] != '-1' else None,
row[21],
row[21],
row[22]]
row[22] if row[22] != '-1' else None,
row[23],
row[24]]
cur.execute(sql, recset)
@ -262,13 +272,14 @@ def create_items(cur, row, marketId, vendorId):
str(recset[0][10]) != str(row[11] if row[11] != '-1' else None) or str(recset[0][11]) != str(row[12] if row[12] != '-1' else None) or
str(recset[0][12]) != str(row[13] if row[13] != '-1' else None) or str(recset[0][13]) != str(row[14] if row[14] != '-1' else None) or
str(recset[0][14]) != str(row[15] if row[15] != '-1' else None) or str(recset[0][15]) != str(row[16] if row[16] != '-1' else None) or
str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None)):
str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None) or
str(recset[0][18]) != str(row[19] if row[19] != '-1' else None) or str(recset[0][20]) != str(row[20] if row[20] != '-1' else None)):
sql = "Insert into items_history (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \
"views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \
"quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \
"quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \
"classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \
"%s, %s, %s, %s)"
"%s, %s, %s, %s, %s)"
recset = [itemId, marketId, vendorId,
recset[0][3],
@ -290,7 +301,8 @@ def create_items(cur, row, marketId, vendorId):
recset[0][19],
recset[0][20],
recset[0][21],
recset[0][22]]
recset[0][22],
recset[0][23]]
cur.execute(sql, recset)
@ -299,7 +311,7 @@ def create_items(cur, row, marketId, vendorId):
"rating_item = %(rating_item)s, dateadded_item = %(dateadded_item)s, btc_item = %(btc_item)s, " \
"usd_item = %(usd_item)s, euro_item = %(euro_item)s, quantitysold_item = %(quantitysold_item)s, " \
"quantityleft_item = %(quantityleft_item)s, shippedfrom_item = %(shippedfrom_item)s, shippedto_item = %(shippedto_item)s, " \
"lastseen_item = %(lastseen_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s"
"lastseen_item = %(lastseen_item)s, image_item = %(image_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s"
cur.execute(sql, {'description_item': row[5] if row[5] != '-1' else None,
'cve_item': row[6] if row[6] != '-1' else None,
@ -316,8 +328,9 @@ def create_items(cur, row, marketId, vendorId):
'quantityleft_item': row[17] if row[17] != '-1' else None,
'shippedfrom_item': row[18] if row[18] != '-1' else None,
'shippedto_item': row[19] if row[19] != '-1' else None,
'dateinserted_item': row[21],
'lastseen_item': row[21],
'dateinserted_item': row[23],
'lastseen_item': row[23],
'image_item': row[20],
'itemId': itemId})
@ -325,7 +338,7 @@ def create_items(cur, row, marketId, vendorId):
sql = "Update items set lastseen_item = %(lastseen_item)s where item_id = %(itemId)s"
cur.execute(sql, {'lastseen_item': row[21],
cur.execute(sql, {'lastseen_item': row[23],
'itemId': itemId})
return itemId
@ -344,8 +357,8 @@ def create_database(cur, con):
sql = "create table vendors(vendor_id integer not null, market_id integer not null, name_vendor character " \
"varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor integer " \
"null, dateinserted_vendor timestamp(6) with time zone not null, constraint vendors_pk primary key (" \
"vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \
"null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
"constraint vendors_pk primary key (vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \
"market_id))"
cur.execute(sql)
@ -354,8 +367,8 @@ def create_database(cur, con):
sql = "create table vendors_history(vendor_id integer not null, market_id integer not null, name_vendor " \
"character varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor " \
"integer null, dateinserted_vendor timestamp(6) with time zone not null, constraint vendors_history_pk " \
"primary key (vendor_id, dateinserted_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \
"integer null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
"constraint vendors_history_pk primary key (vendor_id, dateinserted_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \
"vendor_id) references vendors (vendor_id), constraint vendors_history_market_id_fkey foreign key (" \
"market_id) references marketplaces (market_id))"
cur.execute(sql)
@ -367,9 +380,9 @@ def create_database(cur, con):
"character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \
"null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \
"character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \
"varying(255) null, href_item character varying(255) not null, lastseen_item timestamp(6) with time zone " \
"not null, dateinserted_item timestamp(6) with time zone not null, classification_item double " \
"precision not null, constraint items_pk primary key (item_id), constraint " \
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \
"href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \
"classification_item double precision not null, constraint items_pk primary key (item_id), constraint " \
"items_market_id_fkey foreign key (market_id) references marketplaces (market_id),constraint " \
"items_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id))"
cur.execute(sql)
@ -384,9 +397,9 @@ def create_database(cur, con):
"character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \
"null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \
"character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \
"varying(255) null, href_item character varying(255) not null, lastseen_item timestamp(6) with time zone " \
"not null, dateinserted_item timestamp(6) with time zone not null, classification_item double " \
"precision not null, constraint items_history_pk primary key (item_id, dateinserted_item), " \
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \
"href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \
"classification_item double precision not null, constraint items_history_pk primary key (item_id, dateinserted_item), " \
"constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \
"constraint items_history_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id), " \
"constraint items_history_item_id_fkey foreign key (item_id) references items (item_id))"


+ 45
- 4
MarketPlaces/DarkMatter/parser.py View File

@ -34,6 +34,8 @@ def darkmatter_description_parser(soup):
left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
# 0 *Vendor_Name
try:
@ -65,6 +67,10 @@ def darkmatter_description_parser(soup):
except:
print("description")
# Finding Product Image
#image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
#image = image.get('src')
#product category
try:
temp = soup.find('table', {'class', 'vtable'})
@ -116,7 +122,7 @@ def darkmatter_description_parser(soup):
# Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results
return row
@ -128,6 +134,7 @@ def darkmatter_description_parser(soup):
#return: 'row' that contains a variety of lists that each hold info on the listing page
def darkmatter_listing_parser(soup):
"""
# Fields to be parsed
nm = 0 # Total_Products (Should be Integer)
mktName = "DarkMatter" # 0 Marketplace_Name
@ -153,6 +160,33 @@ def darkmatter_listing_parser(soup):
rating = [] # 19 Vendor_Rating
success = [] # 20 Vendor_Successful_Transactions
href = [] # 23 Product_Links (Urls)
"""
# Fields to be parsed
nm = 0 # *Total_Products (Should be Integer)
mktName = "DarkMatter" # 0 *Marketplace_Name
vendor = [] # 1 *Vendor y
rating = [] # 2 Vendor_Rating
success = [] # 3 Vendor_Successful_Transactions
name = [] # 4 *Product_Name y
CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = [] # 6 Product_MS_Classification (Microsoft Security)
category = [] # 7 Product_Category y
describe = [] # 8 Product_Description
views = [] # 9 Product_Number_Of_Views
reviews = [] # 10 Product_Number_Of_Reviews
rating_item = [] # 11 Product_Rating
addDate = [] # 12 Product_AddDate
BTC = [] # 13 Product_BTC_SellingPrice
USD = [] # 14 Product_USD_SellingPrice y
EURO = [] # 15 Product_EURO_SellingPrice
sold = [] # 16 Product_QuantitySold
qLeft =[] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
names = soup.find('div', {"class": "content"}).findAll('td', {"class": "lefted", "colspan": "3"})
left = soup.find('div', {"class": "content"}).findAll('table', {"class": "vtable"})
@ -178,6 +212,11 @@ def darkmatter_listing_parser(soup):
except Exception as e:
print("product name", e)
# Finding Product Image
#product_image = a.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'})
#product_image = product_image.get('src')
#image.append(product_image)
CVE.append("-1")
MS.append("-1")
@ -193,12 +232,14 @@ def darkmatter_listing_parser(soup):
print('category')
describe.append("-1")
escrow.append("-1")
#escrow.append("-1")
views.append("-1")
reviews.append("-1")
addDate.append("-1")
lastSeen.append("-1")
#lastSeen.append("-1")
BTC.append("-1")
image.append("-1")
image_vendor.append("-1")
# usd
try:
@ -261,7 +302,7 @@ def darkmatter_listing_parser(soup):
# Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)
#called by the crawler to get description links on a listing page


+ 18
- 5
MarketPlaces/DigitalThriftShop/parser.py View File

@ -34,7 +34,8 @@ def digitalThriftShop_description_parser(soup: Tag):
left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
product_name = soup.find("h1", {"class": "product_title entry-title"}).text
@ -42,7 +43,11 @@ def digitalThriftShop_description_parser(soup: Tag):
product_description = soup.find("div", {"id": "tab-description"}).find("p").text
describe = cleanString(product_description.strip())
# Finding Product Image
image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
image = image.get('src')
product_category = soup.find("span", {"class": "posted_in"}).find("a").text
category = cleanString(product_category.strip())
@ -64,7 +69,7 @@ def digitalThriftShop_description_parser(soup: Tag):
# Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results
return row
@ -98,7 +103,9 @@ def digitalThriftShop_listing_parser(soup: Tag):
qLeft =[] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
product_category = soup.find("h1", {"class": "woocommerce-products-header__title page-title"}).text
@ -114,6 +121,11 @@ def digitalThriftShop_listing_parser(soup: Tag):
product_name = product.find("h2", {"class": "woocommerce-loop-product__title"}).text
name.append(cleanString(product_name.strip()))
# Finding Product Image
product_image = product.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'})
product_image = product_image.get('src')
image.append(product_image)
CVE.append("-1")
MS.append("-1")
@ -121,6 +133,7 @@ def digitalThriftShop_listing_parser(soup: Tag):
describe.append("-1")
views.append("-1")
reviews.append("-1")
image_vendor.append("-1")
try:
product_rating = product.find("div", {"class": "star-rating"}).find("strong", {"class": "rating"}).text
@ -146,7 +159,7 @@ def digitalThriftShop_listing_parser(soup: Tag):
# Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)
#called by the crawler to get description links on a listing page


+ 16
- 3
MarketPlaces/HiddenMarket/parser.py View File

@ -30,6 +30,8 @@ def hiddenmarket_description_parser(soup):
left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
bae = soup.find('div', {'class': "main"})
@ -84,6 +86,10 @@ def hiddenmarket_description_parser(soup):
describe = describe.replace("-", " ")
describe = describe.strip()
# Finding Product Image
image = soup.find('div', {"class": "thumbnails"}).find('img', {"class": "bigthumbnail"})
image = image.get('src')
# Finding the Product Category
category = mb[-4].text
category = category.replace("Category:", "")
@ -115,7 +121,7 @@ def hiddenmarket_description_parser(soup):
# Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results
return row
@ -145,7 +151,9 @@ def hiddenmarket_listing_parser(soup):
qLeft = [] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
listing = soup.findAll('div', {"class": "item"})
@ -175,12 +183,17 @@ def hiddenmarket_listing_parser(soup):
product = product.strip()
name.append(product)
# Finding Product Image
image.append("-1")
# Finding Vendor
vendor_name = card.text
vendor_name = vendor_name.replace(",", "")
vendor_name = vendor_name.strip()
vendor.append(vendor_name)
image_vendor.append("-1")
# Finding USD
usd = card.next_sibling.find('div', {"class": "buttons"}).find('div', {'class': "price"}).text
usd = usd.replace("USD", "")
@ -262,7 +275,7 @@ def hiddenmarket_listing_parser(soup):
# Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)
def hiddenmarket_links_parser(soup):


+ 5
- 1
MarketPlaces/Initialization/prepare_parser.py View File

@ -72,6 +72,10 @@ def mergePages(rmm, rec):
rec[18] = rmm[17]
if rec[19] == "-1": # shippedto_item
rec[19] = rmm[18]
if rec[20] == "-1": # image
rec[20] = rmm[19]
if rec[21] == "-1": # image_vendor
rec[21] = rmm[20]
return rec
@ -318,7 +322,7 @@ def new_parse(marketPlace, url, createLog):
rec = rec.split(',')
descriptionPattern = cleanLink(rec[20]) + ".html"
descriptionPattern = cleanLink(rec[22]) + ".html"
# Reading the associated description Html Pages
descriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", descriptionPattern))


+ 25
- 4
MarketPlaces/RobinhoodMarket/parser.py View File

@ -39,6 +39,8 @@ def Robinhood_description_parser(soup):
left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
# Finding Product Name
name = soup.find('h1').text
@ -59,12 +61,20 @@ def Robinhood_description_parser(soup):
desc = desc + para.text
describe = desc
# Finding Product Image
image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
image = image.get('src')
# Finding Vendor
vendor = soup.find('a', {'class': 'wcfm_dashboard_item_title'}).text
vendor = vendor.replace(",", "")
vendor = vendor.replace("Sold by:", "")
vendor = vendor.strip()
# Finding Vendor Image
vendor_image = soup.find('div', {'class': 'wcfmmp_sold_by_container_left'}).find('img')
vendor_image = vendor_image.get('src')
# Finding Category
catSpan = soup.find('span', {'class': 'posted_in'})
category = catSpan.find('a').text
@ -93,7 +103,7 @@ def Robinhood_description_parser(soup):
# Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results
return row
@ -124,7 +134,9 @@ def Robinhood_listing_parser(soup):
qLeft =[] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
listing = soup.find('ul', {"class": "products columns-4"})
items = listing.findAll('li')
@ -153,6 +165,11 @@ def Robinhood_listing_parser(soup):
product = product.strip()
name.append(product)
# Finding Product Image
product_image = card.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'})
product_image = product_image.get('src')
image.append(product_image)
info = card.find('div', {'class': 'wcfmmp_sold_by_container'})
# Finding Vendor
@ -161,6 +178,11 @@ def Robinhood_listing_parser(soup):
vendor_name = vendor_name.strip()
vendor.append(vendor_name)
# Finding Vendor Image
vendor_icon = info.find('img', {'class', 'wcfmmp_sold_by_logo'})
vendor_icon = vendor_icon.get('src')
image_vendor.append(vendor_icon)
# Finding USD
span = card.find('span', {'class': 'price'})
if span is not None:
@ -198,13 +220,12 @@ def Robinhood_listing_parser(soup):
MSValue=me
MS.append(MSValue)
#print(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
# reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
# Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)
def Robinhood_links_parser(soup):


+ 9
- 4
MarketPlaces/Utilities/utilities.py View File

@ -242,7 +242,7 @@ def cleanLink(originalLink):
def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nombre, CVE, MS, category, describe,
views, reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href):
views, reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor):
rw = []
@ -291,9 +291,13 @@ def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nom
lne += ","
lne += "-1" if len(shipTo) == 0 else shipTo[n] # 19
lne += ","
lne += "-1" if len(href) == 0 else href[n] # 20
lne += "-1" if len(image) == 0 else image[n] # 20
lne += ","
lne += day + " " + ahora # 21
lne += "-1" if len(image_vendor) == 0 else image_vendor[n] # 21
lne += ","
lne += "-1" if len(href) == 0 else href[n] # 22
lne += ","
lne += day + " " + ahora # 23
rw.append(lne)
@ -338,6 +342,7 @@ def aes_encryption(item):
def aes_decryption(item):
to_bytes = bytes(item)
#to_bytes = bytes(item, 'utf-8')
decrypted_bytes = decryptCipher.decrypt(to_bytes)
@ -403,7 +408,7 @@ def replace_image_sources(driver, html_content):
string_image = encrypt_encode_image_to_base64(driver, img_xpath)
if string_image:
img_tag.set('src', f'data:image/png;base64,{string_image}')
img_tag.set('src', f'data:image/png;base64;{string_image}')
else:
img_tag.getparent().remove(img_tag)


Loading…
Cancel
Save