Browse Source

Images.

main
ericssonmarin-cpp 1 year ago
parent
commit
9207c4f30d
8 changed files with 185 additions and 60 deletions
  1. +23
    -8
      MarketPlaces/Apocalypse/parser.py
  2. +44
    -31
      MarketPlaces/DB_Connection/db_connection.py
  3. +45
    -4
      MarketPlaces/DarkMatter/parser.py
  4. +18
    -5
      MarketPlaces/DigitalThriftShop/parser.py
  5. +16
    -3
      MarketPlaces/HiddenMarket/parser.py
  6. +5
    -1
      MarketPlaces/Initialization/prepare_parser.py
  7. +25
    -4
      MarketPlaces/RobinhoodMarket/parser.py
  8. +9
    -4
      MarketPlaces/Utilities/utilities.py

+ 23
- 8
MarketPlaces/Apocalypse/parser.py View File

@ -30,7 +30,9 @@ def apocalypse_description_parser(soup: Tag):
left = "-1" # 16 Product_QuantityLeft left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
content: Tag = soup.find("div", {'id': "article_page"}) content: Tag = soup.find("div", {'id': "article_page"})
product_name = content.find("p", {"class": "list-group-item text-center mb-0 box"}).text product_name = content.find("p", {"class": "list-group-item text-center mb-0 box"}).text
@ -38,7 +40,11 @@ def apocalypse_description_parser(soup: Tag):
product_description = content.find("pre").text product_description = content.find("pre").text
describe = cleanString(product_description.strip()) describe = cleanString(product_description.strip())
# Finding Product Image
image = soup.find('div', {'class': 'col-md-7 text-center'}).find('img')
image = image.get('src')
product_reviews_list: Tag = content.find("table", {"class": "table product_reviews"}) \ product_reviews_list: Tag = content.find("table", {"class": "table product_reviews"}) \
.find_all("li") .find_all("li")
@ -72,7 +78,7 @@ def apocalypse_description_parser(soup: Tag):
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results # Sending the results
return row return row
@ -103,15 +109,21 @@ def apocalypse_listing_parser(soup: Tag):
vendor = [] # 18 Vendor vendor = [] # 18 Vendor
rating = [] # 19 Vendor_Rating rating = [] # 19 Vendor_Rating
success = [] # 20 Vendor_Successful_Transactions success = [] # 20 Vendor_Successful_Transactions
href = [] # 23 Product_Links (Urls)
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
listings: ResultSet[Tag] = soup.find("div", {"class": "col-lg-9 my-4"}).find_all("div", {"class": "col-lg-4 col-md-6 mb-1"}) listings: ResultSet[Tag] = soup.find("div", {"class": "col-lg-9 my-4"}).find_all("div", {"class": "col-lg-4 col-md-6 mb-1"})
for prod in listings: for prod in listings:
product_name = prod.find('h5', {"class": "art_title"}).text product_name = prod.find('h5', {"class": "art_title"}).text
name.append(cleanString(product_name.strip())) name.append(cleanString(product_name.strip()))
# Finding Product Image
product_image = prod.find('img', {'class': 'customHeight'})
product_image = product_image.get('src')
image.append(product_image)
CVE.append("-1") CVE.append("-1")
MS.append("-1") MS.append("-1")
@ -124,6 +136,7 @@ def apocalypse_listing_parser(soup: Tag):
EURO.append("-1") EURO.append("-1")
shipTo.append("-1") shipTo.append("-1")
success.append("-1") success.append("-1")
image_vendor.append("-1")
product_price = prod.find("span", {"class": "priceP"}).text product_price = prod.find("span", {"class": "priceP"}).text
USD.append(cleanString(product_price.strip())) USD.append(cleanString(product_price.strip()))
@ -161,7 +174,7 @@ def apocalypse_listing_parser(soup: Tag):
rating.append(cleanString(product_vendor_rating.strip())) rating.append(cleanString(product_vendor_rating.strip()))
except Exception as e: except Exception as e:
raise e raise e
product_href = prod.find('a').get('href') product_href = prod.find('a').get('href')
href.append(product_href) href.append(product_href)
@ -190,7 +203,9 @@ def apocalypse_listing_parser(soup: Tag):
qLeft=qLeft, qLeft=qLeft,
shipFrom=shipFrom, shipFrom=shipFrom,
shipTo=shipTo, shipTo=shipTo,
href=href
href=href,
image=image,
image_vendor=image_vendor
) )
#called by the crawler to get description links on a listing page #called by the crawler to get description links on a listing page


+ 44
- 31
MarketPlaces/DB_Connection/db_connection.py View File

@ -3,6 +3,7 @@ __author__ = 'DarkWeb'
import psycopg2 import psycopg2
import traceback import traceback
import configparser import configparser
from MarketPlaces.Utilities.utilities import *
def connectDataBase(): def connectDataBase():
@ -146,7 +147,7 @@ def create_marketPlace(cur, row, url):
sql = "Insert into marketplaces (market_id, name_market, url_market, dateinserted_market) " \ sql = "Insert into marketplaces (market_id, name_market, url_market, dateinserted_market) " \
"Values (%s, %s, %s, %s)" "Values (%s, %s, %s, %s)"
recset = [marketId, row[0], url, row[21]]
recset = [marketId, row[0], url, row[23]]
cur.execute(sql, recset) cur.execute(sql, recset)
@ -165,13 +166,15 @@ def create_vendor(cur, row, marketId):
if newVendor: if newVendor:
sql = "Insert into vendors (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, dateinserted_vendor) Values (%s, %s, %s, %s, %s, %s)"
sql = "Insert into vendors (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, image_vendor, dateinserted_vendor) " \
"Values (%s, %s, %s, %s, %s, %s, %s)"
recset = [vendorId, marketId, recset = [vendorId, marketId,
row[1], row[1],
row[2] if row[2] != '-1' else None, row[2] if row[2] != '-1' else None,
row[3] if row[3] != '-1' else None, row[3] if row[3] != '-1' else None,
row[21]]
row[21] if row[21] != '-1' else None,
row[23]]
cur.execute(sql, recset) cur.execute(sql, recset)
@ -183,24 +186,30 @@ def create_vendor(cur, row, marketId):
recset = cur.fetchall() recset = cur.fetchall()
#aes_decryption(recset[0][5]) trying to decrypt the image
if (str(recset[0][3]) != str(row[2] if row[2] != '-1' else None) or # there was a change in the vendor information if (str(recset[0][3]) != str(row[2] if row[2] != '-1' else None) or # there was a change in the vendor information
str(recset[0][4]) != str(row[3] if row[3] != '-1' else None)):
str(recset[0][4]) != str(row[3] if row[3] != '-1' else None) or
str(recset[0][5]) != str(row[21] if row[21] != '-1' else None)):
sql = "Insert into vendors_history (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, dateinserted_vendor) Values (%s, %s, %s, %s, %s, %s)"
sql = "Insert into vendors_history (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, image_vendor, dateinserted_vendor) " \
"Values (%s, %s, %s, %s, %s, %s, %s)"
recset = [vendorId, marketId, recset = [vendorId, marketId,
recset[0][2], recset[0][2],
recset[0][3], recset[0][3],
recset[0][4], recset[0][4],
recset[0][5]]
recset[0][5],
recset[0][6]]
cur.execute(sql, recset) cur.execute(sql, recset)
sql = "Update vendors set rating_vendor = %(rating_vendor)s, successfultransactions_vendor = %(successfultransactions_vendor)s, " \ sql = "Update vendors set rating_vendor = %(rating_vendor)s, successfultransactions_vendor = %(successfultransactions_vendor)s, " \
"dateinserted_vendor = %(dateinserted_vendor)s where vendor_id = %(vendorId)s"
"image_vendor = %(image_vendor)s, dateinserted_vendor = %(dateinserted_vendor)s where vendor_id = %(vendorId)s"
cur.execute(sql, {'rating_vendor': row[2] if row[2] != '-1' else None, cur.execute(sql, {'rating_vendor': row[2] if row[2] != '-1' else None,
'successfultransactions_vendor': row[3] if row[3] != '-1' else None, 'successfultransactions_vendor': row[3] if row[3] != '-1' else None,
'dateinserted_vendor': row[21],
'image_vendor': row[21] if row[21] != '-1' else None,
'dateinserted_vendor': row[23],
'vendorId': vendorId}) 'vendorId': vendorId})
return vendorId return vendorId
@ -220,9 +229,9 @@ def create_items(cur, row, marketId, vendorId):
sql = "Insert into items (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \ sql = "Insert into items (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \
"views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \ "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \
"quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \
"quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \
"classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ "classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \
"%s, %s, %s, %s)"
"%s, %s, %s, %s, %s)"
recset = [itemId, marketId, vendorId, recset = [itemId, marketId, vendorId,
row[4], row[4],
@ -241,10 +250,11 @@ def create_items(cur, row, marketId, vendorId):
row[17] if row[17] != '-1' else None, row[17] if row[17] != '-1' else None,
row[18] if row[18] != '-1' else None, row[18] if row[18] != '-1' else None,
row[19] if row[19] != '-1' else None, row[19] if row[19] != '-1' else None,
row[23],
row[20] if row[20] != '-1' else None, row[20] if row[20] != '-1' else None,
row[21],
row[21],
row[22]]
row[22] if row[22] != '-1' else None,
row[23],
row[24]]
cur.execute(sql, recset) cur.execute(sql, recset)
@ -262,13 +272,14 @@ def create_items(cur, row, marketId, vendorId):
str(recset[0][10]) != str(row[11] if row[11] != '-1' else None) or str(recset[0][11]) != str(row[12] if row[12] != '-1' else None) or str(recset[0][10]) != str(row[11] if row[11] != '-1' else None) or str(recset[0][11]) != str(row[12] if row[12] != '-1' else None) or
str(recset[0][12]) != str(row[13] if row[13] != '-1' else None) or str(recset[0][13]) != str(row[14] if row[14] != '-1' else None) or str(recset[0][12]) != str(row[13] if row[13] != '-1' else None) or str(recset[0][13]) != str(row[14] if row[14] != '-1' else None) or
str(recset[0][14]) != str(row[15] if row[15] != '-1' else None) or str(recset[0][15]) != str(row[16] if row[16] != '-1' else None) or str(recset[0][14]) != str(row[15] if row[15] != '-1' else None) or str(recset[0][15]) != str(row[16] if row[16] != '-1' else None) or
str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None)):
str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None) or
str(recset[0][18]) != str(row[19] if row[19] != '-1' else None) or str(recset[0][20]) != str(row[20] if row[20] != '-1' else None)):
sql = "Insert into items_history (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \ sql = "Insert into items_history (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \
"views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \ "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \
"quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \
"quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \
"classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ "classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \
"%s, %s, %s, %s)"
"%s, %s, %s, %s, %s)"
recset = [itemId, marketId, vendorId, recset = [itemId, marketId, vendorId,
recset[0][3], recset[0][3],
@ -290,7 +301,8 @@ def create_items(cur, row, marketId, vendorId):
recset[0][19], recset[0][19],
recset[0][20], recset[0][20],
recset[0][21], recset[0][21],
recset[0][22]]
recset[0][22],
recset[0][23]]
cur.execute(sql, recset) cur.execute(sql, recset)
@ -299,7 +311,7 @@ def create_items(cur, row, marketId, vendorId):
"rating_item = %(rating_item)s, dateadded_item = %(dateadded_item)s, btc_item = %(btc_item)s, " \ "rating_item = %(rating_item)s, dateadded_item = %(dateadded_item)s, btc_item = %(btc_item)s, " \
"usd_item = %(usd_item)s, euro_item = %(euro_item)s, quantitysold_item = %(quantitysold_item)s, " \ "usd_item = %(usd_item)s, euro_item = %(euro_item)s, quantitysold_item = %(quantitysold_item)s, " \
"quantityleft_item = %(quantityleft_item)s, shippedfrom_item = %(shippedfrom_item)s, shippedto_item = %(shippedto_item)s, " \ "quantityleft_item = %(quantityleft_item)s, shippedfrom_item = %(shippedfrom_item)s, shippedto_item = %(shippedto_item)s, " \
"lastseen_item = %(lastseen_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s"
"lastseen_item = %(lastseen_item)s, image_item = %(image_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s"
cur.execute(sql, {'description_item': row[5] if row[5] != '-1' else None, cur.execute(sql, {'description_item': row[5] if row[5] != '-1' else None,
'cve_item': row[6] if row[6] != '-1' else None, 'cve_item': row[6] if row[6] != '-1' else None,
@ -316,8 +328,9 @@ def create_items(cur, row, marketId, vendorId):
'quantityleft_item': row[17] if row[17] != '-1' else None, 'quantityleft_item': row[17] if row[17] != '-1' else None,
'shippedfrom_item': row[18] if row[18] != '-1' else None, 'shippedfrom_item': row[18] if row[18] != '-1' else None,
'shippedto_item': row[19] if row[19] != '-1' else None, 'shippedto_item': row[19] if row[19] != '-1' else None,
'dateinserted_item': row[21],
'lastseen_item': row[21],
'dateinserted_item': row[23],
'lastseen_item': row[23],
'image_item': row[20],
'itemId': itemId}) 'itemId': itemId})
@ -325,7 +338,7 @@ def create_items(cur, row, marketId, vendorId):
sql = "Update items set lastseen_item = %(lastseen_item)s where item_id = %(itemId)s" sql = "Update items set lastseen_item = %(lastseen_item)s where item_id = %(itemId)s"
cur.execute(sql, {'lastseen_item': row[21],
cur.execute(sql, {'lastseen_item': row[23],
'itemId': itemId}) 'itemId': itemId})
return itemId return itemId
@ -344,8 +357,8 @@ def create_database(cur, con):
sql = "create table vendors(vendor_id integer not null, market_id integer not null, name_vendor character " \ sql = "create table vendors(vendor_id integer not null, market_id integer not null, name_vendor character " \
"varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor integer " \ "varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor integer " \
"null, dateinserted_vendor timestamp(6) with time zone not null, constraint vendors_pk primary key (" \
"vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \
"null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
"constraint vendors_pk primary key (vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \
"market_id))" "market_id))"
cur.execute(sql) cur.execute(sql)
@ -354,8 +367,8 @@ def create_database(cur, con):
sql = "create table vendors_history(vendor_id integer not null, market_id integer not null, name_vendor " \ sql = "create table vendors_history(vendor_id integer not null, market_id integer not null, name_vendor " \
"character varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor " \ "character varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor " \
"integer null, dateinserted_vendor timestamp(6) with time zone not null, constraint vendors_history_pk " \
"primary key (vendor_id, dateinserted_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \
"integer null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
"constraint vendors_history_pk primary key (vendor_id, dateinserted_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \
"vendor_id) references vendors (vendor_id), constraint vendors_history_market_id_fkey foreign key (" \ "vendor_id) references vendors (vendor_id), constraint vendors_history_market_id_fkey foreign key (" \
"market_id) references marketplaces (market_id))" "market_id) references marketplaces (market_id))"
cur.execute(sql) cur.execute(sql)
@ -367,9 +380,9 @@ def create_database(cur, con):
"character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \
"null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \
"character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \
"varying(255) null, href_item character varying(255) not null, lastseen_item timestamp(6) with time zone " \
"not null, dateinserted_item timestamp(6) with time zone not null, classification_item double " \
"precision not null, constraint items_pk primary key (item_id), constraint " \
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \
"href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \
"classification_item double precision not null, constraint items_pk primary key (item_id), constraint " \
"items_market_id_fkey foreign key (market_id) references marketplaces (market_id),constraint " \ "items_market_id_fkey foreign key (market_id) references marketplaces (market_id),constraint " \
"items_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id))" "items_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id))"
cur.execute(sql) cur.execute(sql)
@ -384,9 +397,9 @@ def create_database(cur, con):
"character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \
"null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \
"character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \
"varying(255) null, href_item character varying(255) not null, lastseen_item timestamp(6) with time zone " \
"not null, dateinserted_item timestamp(6) with time zone not null, classification_item double " \
"precision not null, constraint items_history_pk primary key (item_id, dateinserted_item), " \
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \
"href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \
"classification_item double precision not null, constraint items_history_pk primary key (item_id, dateinserted_item), " \
"constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \ "constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \
"constraint items_history_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id), " \ "constraint items_history_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id), " \
"constraint items_history_item_id_fkey foreign key (item_id) references items (item_id))" "constraint items_history_item_id_fkey foreign key (item_id) references items (item_id))"


+ 45
- 4
MarketPlaces/DarkMatter/parser.py View File

@ -34,6 +34,8 @@ def darkmatter_description_parser(soup):
left = "-1" # 16 Product_QuantityLeft left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
# 0 *Vendor_Name # 0 *Vendor_Name
try: try:
@ -65,6 +67,10 @@ def darkmatter_description_parser(soup):
except: except:
print("description") print("description")
# Finding Product Image
#image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
#image = image.get('src')
#product category #product category
try: try:
temp = soup.find('table', {'class', 'vtable'}) temp = soup.find('table', {'class', 'vtable'})
@ -116,7 +122,7 @@ def darkmatter_description_parser(soup):
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results # Sending the results
return row return row
@ -128,6 +134,7 @@ def darkmatter_description_parser(soup):
#return: 'row' that contains a variety of lists that each hold info on the listing page #return: 'row' that contains a variety of lists that each hold info on the listing page
def darkmatter_listing_parser(soup): def darkmatter_listing_parser(soup):
"""
# Fields to be parsed # Fields to be parsed
nm = 0 # Total_Products (Should be Integer) nm = 0 # Total_Products (Should be Integer)
mktName = "DarkMatter" # 0 Marketplace_Name mktName = "DarkMatter" # 0 Marketplace_Name
@ -153,6 +160,33 @@ def darkmatter_listing_parser(soup):
rating = [] # 19 Vendor_Rating rating = [] # 19 Vendor_Rating
success = [] # 20 Vendor_Successful_Transactions success = [] # 20 Vendor_Successful_Transactions
href = [] # 23 Product_Links (Urls) href = [] # 23 Product_Links (Urls)
"""
# Fields to be parsed
nm = 0 # *Total_Products (Should be Integer)
mktName = "DarkMatter" # 0 *Marketplace_Name
vendor = [] # 1 *Vendor y
rating = [] # 2 Vendor_Rating
success = [] # 3 Vendor_Successful_Transactions
name = [] # 4 *Product_Name y
CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = [] # 6 Product_MS_Classification (Microsoft Security)
category = [] # 7 Product_Category y
describe = [] # 8 Product_Description
views = [] # 9 Product_Number_Of_Views
reviews = [] # 10 Product_Number_Of_Reviews
rating_item = [] # 11 Product_Rating
addDate = [] # 12 Product_AddDate
BTC = [] # 13 Product_BTC_SellingPrice
USD = [] # 14 Product_USD_SellingPrice y
EURO = [] # 15 Product_EURO_SellingPrice
sold = [] # 16 Product_QuantitySold
qLeft =[] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
names = soup.find('div', {"class": "content"}).findAll('td', {"class": "lefted", "colspan": "3"}) names = soup.find('div', {"class": "content"}).findAll('td', {"class": "lefted", "colspan": "3"})
left = soup.find('div', {"class": "content"}).findAll('table', {"class": "vtable"}) left = soup.find('div', {"class": "content"}).findAll('table', {"class": "vtable"})
@ -178,6 +212,11 @@ def darkmatter_listing_parser(soup):
except Exception as e: except Exception as e:
print("product name", e) print("product name", e)
# Finding Product Image
#product_image = a.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'})
#product_image = product_image.get('src')
#image.append(product_image)
CVE.append("-1") CVE.append("-1")
MS.append("-1") MS.append("-1")
@ -193,12 +232,14 @@ def darkmatter_listing_parser(soup):
print('category') print('category')
describe.append("-1") describe.append("-1")
escrow.append("-1")
#escrow.append("-1")
views.append("-1") views.append("-1")
reviews.append("-1") reviews.append("-1")
addDate.append("-1") addDate.append("-1")
lastSeen.append("-1")
#lastSeen.append("-1")
BTC.append("-1") BTC.append("-1")
image.append("-1")
image_vendor.append("-1")
# usd # usd
try: try:
@ -261,7 +302,7 @@ def darkmatter_listing_parser(soup):
# Populate the final variable (this should be a list with all fields scraped) # Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating, success, name, CVE, MS, category, describe, views, return organizeProducts(mktName, nm, vendor, rating, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)
#called by the crawler to get description links on a listing page #called by the crawler to get description links on a listing page


+ 18
- 5
MarketPlaces/DigitalThriftShop/parser.py View File

@ -34,7 +34,8 @@ def digitalThriftShop_description_parser(soup: Tag):
left = "-1" # 16 Product_QuantityLeft left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
product_name = soup.find("h1", {"class": "product_title entry-title"}).text product_name = soup.find("h1", {"class": "product_title entry-title"}).text
@ -42,7 +43,11 @@ def digitalThriftShop_description_parser(soup: Tag):
product_description = soup.find("div", {"id": "tab-description"}).find("p").text product_description = soup.find("div", {"id": "tab-description"}).find("p").text
describe = cleanString(product_description.strip()) describe = cleanString(product_description.strip())
# Finding Product Image
image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
image = image.get('src')
product_category = soup.find("span", {"class": "posted_in"}).find("a").text product_category = soup.find("span", {"class": "posted_in"}).find("a").text
category = cleanString(product_category.strip()) category = cleanString(product_category.strip())
@ -64,7 +69,7 @@ def digitalThriftShop_description_parser(soup: Tag):
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results # Sending the results
return row return row
@ -98,7 +103,9 @@ def digitalThriftShop_listing_parser(soup: Tag):
qLeft =[] # 17 Product_QuantityLeft qLeft =[] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
product_category = soup.find("h1", {"class": "woocommerce-products-header__title page-title"}).text product_category = soup.find("h1", {"class": "woocommerce-products-header__title page-title"}).text
@ -114,6 +121,11 @@ def digitalThriftShop_listing_parser(soup: Tag):
product_name = product.find("h2", {"class": "woocommerce-loop-product__title"}).text product_name = product.find("h2", {"class": "woocommerce-loop-product__title"}).text
name.append(cleanString(product_name.strip())) name.append(cleanString(product_name.strip()))
# Finding Product Image
product_image = product.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'})
product_image = product_image.get('src')
image.append(product_image)
CVE.append("-1") CVE.append("-1")
MS.append("-1") MS.append("-1")
@ -121,6 +133,7 @@ def digitalThriftShop_listing_parser(soup: Tag):
describe.append("-1") describe.append("-1")
views.append("-1") views.append("-1")
reviews.append("-1") reviews.append("-1")
image_vendor.append("-1")
try: try:
product_rating = product.find("div", {"class": "star-rating"}).find("strong", {"class": "rating"}).text product_rating = product.find("div", {"class": "star-rating"}).find("strong", {"class": "rating"}).text
@ -146,7 +159,7 @@ def digitalThriftShop_listing_parser(soup: Tag):
# Populate the final variable (this should be a list with all fields scraped) # Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)
#called by the crawler to get description links on a listing page #called by the crawler to get description links on a listing page


+ 16
- 3
MarketPlaces/HiddenMarket/parser.py View File

@ -30,6 +30,8 @@ def hiddenmarket_description_parser(soup):
left = "-1" # 16 Product_QuantityLeft left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
bae = soup.find('div', {'class': "main"}) bae = soup.find('div', {'class': "main"})
@ -84,6 +86,10 @@ def hiddenmarket_description_parser(soup):
describe = describe.replace("-", " ") describe = describe.replace("-", " ")
describe = describe.strip() describe = describe.strip()
# Finding Product Image
image = soup.find('div', {"class": "thumbnails"}).find('img', {"class": "bigthumbnail"})
image = image.get('src')
# Finding the Product Category # Finding the Product Category
category = mb[-4].text category = mb[-4].text
category = category.replace("Category:", "") category = category.replace("Category:", "")
@ -115,7 +121,7 @@ def hiddenmarket_description_parser(soup):
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results # Sending the results
return row return row
@ -145,7 +151,9 @@ def hiddenmarket_listing_parser(soup):
qLeft = [] # 17 Product_QuantityLeft qLeft = [] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
listing = soup.findAll('div', {"class": "item"}) listing = soup.findAll('div', {"class": "item"})
@ -175,12 +183,17 @@ def hiddenmarket_listing_parser(soup):
product = product.strip() product = product.strip()
name.append(product) name.append(product)
# Finding Product Image
image.append("-1")
# Finding Vendor # Finding Vendor
vendor_name = card.text vendor_name = card.text
vendor_name = vendor_name.replace(",", "") vendor_name = vendor_name.replace(",", "")
vendor_name = vendor_name.strip() vendor_name = vendor_name.strip()
vendor.append(vendor_name) vendor.append(vendor_name)
image_vendor.append("-1")
# Finding USD # Finding USD
usd = card.next_sibling.find('div', {"class": "buttons"}).find('div', {'class': "price"}).text usd = card.next_sibling.find('div', {"class": "buttons"}).find('div', {'class': "price"}).text
usd = usd.replace("USD", "") usd = usd.replace("USD", "")
@ -262,7 +275,7 @@ def hiddenmarket_listing_parser(soup):
# Populate the final variable (this should be a list with all fields scraped) # Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)
def hiddenmarket_links_parser(soup): def hiddenmarket_links_parser(soup):


+ 5
- 1
MarketPlaces/Initialization/prepare_parser.py View File

@ -72,6 +72,10 @@ def mergePages(rmm, rec):
rec[18] = rmm[17] rec[18] = rmm[17]
if rec[19] == "-1": # shippedto_item if rec[19] == "-1": # shippedto_item
rec[19] = rmm[18] rec[19] = rmm[18]
if rec[20] == "-1": # image
rec[20] = rmm[19]
if rec[21] == "-1": # image_vendor
rec[21] = rmm[20]
return rec return rec
@ -318,7 +322,7 @@ def new_parse(marketPlace, url, createLog):
rec = rec.split(',') rec = rec.split(',')
descriptionPattern = cleanLink(rec[20]) + ".html"
descriptionPattern = cleanLink(rec[22]) + ".html"
# Reading the associated description Html Pages # Reading the associated description Html Pages
descriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", descriptionPattern)) descriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", descriptionPattern))


+ 25
- 4
MarketPlaces/RobinhoodMarket/parser.py View File

@ -39,6 +39,8 @@ def Robinhood_description_parser(soup):
left = "-1" # 16 Product_QuantityLeft left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
# Finding Product Name # Finding Product Name
name = soup.find('h1').text name = soup.find('h1').text
@ -59,12 +61,20 @@ def Robinhood_description_parser(soup):
desc = desc + para.text desc = desc + para.text
describe = desc describe = desc
# Finding Product Image
image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img')
image = image.get('src')
# Finding Vendor # Finding Vendor
vendor = soup.find('a', {'class': 'wcfm_dashboard_item_title'}).text vendor = soup.find('a', {'class': 'wcfm_dashboard_item_title'}).text
vendor = vendor.replace(",", "") vendor = vendor.replace(",", "")
vendor = vendor.replace("Sold by:", "") vendor = vendor.replace("Sold by:", "")
vendor = vendor.strip() vendor = vendor.strip()
# Finding Vendor Image
vendor_image = soup.find('div', {'class': 'wcfmmp_sold_by_container_left'}).find('img')
vendor_image = vendor_image.get('src')
# Finding Category # Finding Category
catSpan = soup.find('span', {'class': 'posted_in'}) catSpan = soup.find('span', {'class': 'posted_in'})
category = catSpan.find('a').text category = catSpan.find('a').text
@ -93,7 +103,7 @@ def Robinhood_description_parser(soup):
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results # Sending the results
return row return row
@ -124,7 +134,9 @@ def Robinhood_listing_parser(soup):
qLeft =[] # 17 Product_QuantityLeft qLeft =[] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
listing = soup.find('ul', {"class": "products columns-4"}) listing = soup.find('ul', {"class": "products columns-4"})
items = listing.findAll('li') items = listing.findAll('li')
@ -153,6 +165,11 @@ def Robinhood_listing_parser(soup):
product = product.strip() product = product.strip()
name.append(product) name.append(product)
# Finding Product Image
product_image = card.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'})
product_image = product_image.get('src')
image.append(product_image)
info = card.find('div', {'class': 'wcfmmp_sold_by_container'}) info = card.find('div', {'class': 'wcfmmp_sold_by_container'})
# Finding Vendor # Finding Vendor
@ -161,6 +178,11 @@ def Robinhood_listing_parser(soup):
vendor_name = vendor_name.strip() vendor_name = vendor_name.strip()
vendor.append(vendor_name) vendor.append(vendor_name)
# Finding Vendor Image
vendor_icon = info.find('img', {'class', 'wcfmmp_sold_by_logo'})
vendor_icon = vendor_icon.get('src')
image_vendor.append(vendor_icon)
# Finding USD # Finding USD
span = card.find('span', {'class': 'price'}) span = card.find('span', {'class': 'price'})
if span is not None: if span is not None:
@ -198,13 +220,12 @@ def Robinhood_listing_parser(soup):
MSValue=me MSValue=me
MS.append(MSValue) MS.append(MSValue)
#print(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, #print(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
# reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href) # reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
# Populate the final variable (this should be a list with all fields scraped) # Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)
def Robinhood_links_parser(soup): def Robinhood_links_parser(soup):


+ 9
- 4
MarketPlaces/Utilities/utilities.py View File

@ -242,7 +242,7 @@ def cleanLink(originalLink):
def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nombre, CVE, MS, category, describe, def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nombre, CVE, MS, category, describe,
views, reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href):
views, reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor):
rw = [] rw = []
@ -291,9 +291,13 @@ def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nom
lne += "," lne += ","
lne += "-1" if len(shipTo) == 0 else shipTo[n] # 19 lne += "-1" if len(shipTo) == 0 else shipTo[n] # 19
lne += "," lne += ","
lne += "-1" if len(href) == 0 else href[n] # 20
lne += "-1" if len(image) == 0 else image[n] # 20
lne += "," lne += ","
lne += day + " " + ahora # 21
lne += "-1" if len(image_vendor) == 0 else image_vendor[n] # 21
lne += ","
lne += "-1" if len(href) == 0 else href[n] # 22
lne += ","
lne += day + " " + ahora # 23
rw.append(lne) rw.append(lne)
@ -338,6 +342,7 @@ def aes_encryption(item):
def aes_decryption(item): def aes_decryption(item):
to_bytes = bytes(item) to_bytes = bytes(item)
#to_bytes = bytes(item, 'utf-8')
decrypted_bytes = decryptCipher.decrypt(to_bytes) decrypted_bytes = decryptCipher.decrypt(to_bytes)
@ -403,7 +408,7 @@ def replace_image_sources(driver, html_content):
string_image = encrypt_encode_image_to_base64(driver, img_xpath) string_image = encrypt_encode_image_to_base64(driver, img_xpath)
if string_image: if string_image:
img_tag.set('src', f'data:image/png;base64,{string_image}')
img_tag.set('src', f'data:image/png;base64;{string_image}')
else: else:
img_tag.getparent().remove(img_tag) img_tag.getparent().remove(img_tag)


Loading…
Cancel
Save