Browse Source

Image tracking and version.

main
ericssonmarin-cpp 1 year ago
parent
commit
c6589f4b82
2 changed files with 122 additions and 67 deletions
  1. +57
    -44
      MarketPlaces/AnonMarket/parser.py
  2. +65
    -23
      MarketPlaces/DB_Connection/db_connection.py

+ 57
- 44
MarketPlaces/AnonMarket/parser.py View File

@ -14,26 +14,27 @@ import re
#return: 'row' that contains a variety of lists that each hold info on the description page #return: 'row' that contains a variety of lists that each hold info on the description page
def AnonMarket_description_parser(soup): def AnonMarket_description_parser(soup):
# Fields to be parsed
vendor = "-1" # 0 *Vendor_Name
success = "-1" # 1 Vendor_Successful_Transactions
rating_vendor = "-1" # 2 Vendor_Rating
name = "-1" # 3 *Product_Name
describe = "-1" # 4 Product_Description
CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much
MS = "-1" # 6 Product_MS_Classification (Microsoft Security) dont worry about that much
category = "-1" # 7 Product_Category
views = "-1" # 8 Product_Number_Of_Views
reviews = "-1" # 9 Product_Number_Of_Reviews
rating_item = "-1" # 10 Product_Rating
addDate = "-1" # 11 Product_AddedDate
BTC = "-1" # 12 Product_BTC_SellingPrice
USD = "-1" # 13 Product_USD_SellingPrice
EURO = "-1" # 14 Product_EURO_SellingPrice
sold = "-1" # 15 Product_QuantitySold
left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
vendor = "-1" # 0 *Vendor_Name
success = "-1" # 1 Vendor_Successful_Transactions
rating_vendor = "-1" # 2 Vendor_Rating
name = "-1" # 3 *Product_Name
describe = "-1" # 4 Product_Description
CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = "-1" # 6 Product_MS_Classification (Microsoft Security)
category = "-1" # 7 Product_Category
views = "-1" # 8 Product_Number_Of_Views
reviews = "-1" # 9 Product_Number_Of_Reviews
rating_item = "-1" # 10 Product_Rating
addDate = "-1" # 11 Product_AddedDate
BTC = "-1" # 12 Product_BTC_SellingPrice
USD = "-1" # 13 Product_USD_SellingPrice
EURO = "-1" # 14 Product_EURO_SellingPrice
sold = "-1" # 15 Product_QuantitySold
left = "-1" # 16 Product_QuantityLeft
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
image = "-1" # 19 Product_Image
vendor_image = "-1" # 20 Vendor_Image
name_of_product = soup.find("div", {"class": "heading"}).text name_of_product = soup.find("div", {"class": "heading"}).text
name = cleanString(name_of_product.strip()) name = cleanString(name_of_product.strip())
@ -44,6 +45,11 @@ def AnonMarket_description_parser(soup):
else: else:
describe = cleanString(description_div.text.strip()) describe = cleanString(description_div.text.strip())
# Finding Product Image
image = soup.find('div', {'class': 'thumbnails'}).find('img')
image = image.get('src')
image = image.split('base64,')[-1]
info_div = soup.find('div', {'class': 'information'}) info_div = soup.find('div', {'class': 'information'})
table = info_div.find('table') if info_div else None table = info_div.find('table') if info_div else None
@ -70,7 +76,7 @@ def AnonMarket_description_parser(soup):
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
# Sending the results # Sending the results
@ -84,28 +90,31 @@ def AnonMarket_description_parser(soup):
def AnonMarket_listing_parser(soup): def AnonMarket_listing_parser(soup):
# Fields to be parsed # Fields to be parsed
nm = 0 # *Total_Products (Should be Integer)
mktName = "AnonMarket" # 0 *Marketplace_Name
vendor = [] # 1 *Vendor y
rating_vendor = [] # 2 Vendor_Rating
success = [] # 3 Vendor_Successful_Transactions
name = [] # 4 *Product_Name y
CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = [] # 6 Product_MS_Classification (Microsoft Security)
category = [] # 7 Product_Category y
describe = [] # 8 Product_Description
views = [] # 9 Product_Number_Of_Views
reviews = [] # 10 Product_Number_Of_Reviews
rating_item = [] # 11 Product_Rating
addDate = [] # 12 Product_AddDate
BTC = [] # 13 Product_BTC_SellingPrice
USD = [] # 14 Product_USD_SellingPrice y
EURO = [] # 15 Product_EURO_SellingPrice
sold = [] # 16 Product_QuantitySold
qLeft = [] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
nm = 0 # *Total_Products (Should be Integer)
mktName = "AnonMarket" # 0 *Marketplace_Name
vendor = [] # 1 *Vendor y
rating_vendor = [] # 2 Vendor_Rating
success = [] # 3 Vendor_Successful_Transactions
name = [] # 4 *Product_Name y
CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = [] # 6 Product_MS_Classification (Microsoft Security)
category = [] # 7 Product_Category y
describe = [] # 8 Product_Description
views = [] # 9 Product_Number_Of_Views
reviews = [] # 10 Product_Number_Of_Reviews
rating_item = [] # 11 Product_Rating
addDate = [] # 12 Product_AddDate
BTC = [] # 13 Product_BTC_SellingPrice
USD = [] # 14 Product_USD_SellingPrice y
EURO = [] # 15 Product_EURO_SellingPrice
sold = [] # 16 Product_QuantitySold
qLeft =[] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo
image = [] # 20 Product_Image
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
base_url = "http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion" base_url = "http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion"
products_list = soup.find_all('div', {'class': 'item'}) products_list = soup.find_all('div', {'class': 'item'})
@ -147,6 +156,8 @@ def AnonMarket_listing_parser(soup):
qLeft.append("-1") qLeft.append("-1")
shipFrom.append("-1") shipFrom.append("-1")
shipTo.append("-1") shipTo.append("-1")
image.append("-1")
image_vendor.append("-1")
nm += 1 nm += 1
@ -177,7 +188,9 @@ def AnonMarket_listing_parser(soup):
qLeft = qLeft, qLeft = qLeft,
shipFrom = shipFrom, shipFrom = shipFrom,
shipTo = shipTo, shipTo = shipTo,
href = href
href = href,
image = image,
image_vendor = image_vendor
) )


+ 65
- 23
MarketPlaces/DB_Connection/db_connection.py View File

@ -119,6 +119,24 @@ def getLastVendor(cur):
trace = traceback.format_exc() trace = traceback.format_exc()
print (trace) print (trace)
def getLastVendorVersion(cur, vendorId):
try:
cur.execute("select version_vendor from vendors_history where vendor_id = %(vendorId)s order by version_vendor desc limit 1", {'vendorId': vendorId})
recset = cur.fetchall()
if recset:
return recset[0][0]
else:
return 0
except:
trace = traceback.format_exc()
print (trace)
def getLastItem(cur): def getLastItem(cur):
try: try:
@ -137,6 +155,25 @@ def getLastItem(cur):
trace = traceback.format_exc() trace = traceback.format_exc()
print (trace) print (trace)
def getLastItemVersion(cur, itemId):
try:
cur.execute("select version_item from items_history where item_id = %(itemId)s order by version_item desc limit 1",
{'itemId': itemId})
recset = cur.fetchall()
if recset:
return recset[0][0]
else:
return 0
except:
trace = traceback.format_exc()
print(trace)
def create_marketPlace(cur, row, url): def create_marketPlace(cur, row, url):
marketId = verifyMarketPlace(cur, row[0]) marketId = verifyMarketPlace(cur, row[0])
@ -192,10 +229,13 @@ def create_vendor(cur, row, marketId):
str(recset[0][4]) != str(row[3] if row[3] != '-1' else None) or str(recset[0][4]) != str(row[3] if row[3] != '-1' else None) or
str(recset[0][5]) != str(row[21] if row[21] != '-1' else None)): str(recset[0][5]) != str(row[21] if row[21] != '-1' else None)):
sql = "Insert into vendors_history (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, image_vendor, dateinserted_vendor) " \
"Values (%s, %s, %s, %s, %s, %s, %s)"
vendorVersionId = int(getLastVendorVersion(cur, vendorId) + 1)
sql = "Insert into vendors_history (vendor_id, version_vendor, market_id, name_vendor, rating_vendor, " \
"successfultransactions_vendor, image_vendor, dateinserted_vendor) " \
"Values (%s, %s, %s, %s, %s, %s, %s, %s)"
recset = [vendorId, marketId,
recset = [vendorId, vendorVersionId, marketId,
recset[0][2], recset[0][2],
recset[0][3], recset[0][3],
recset[0][4], recset[0][4],
@ -277,13 +317,15 @@ def create_items(cur, row, marketId, vendorId):
str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None) or str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None) or
str(recset[0][18]) != str(row[19] if row[19] != '-1' else None) or str(recset[0][20]) != str(row[20] if row[20] != '-1' else None)): str(recset[0][18]) != str(row[19] if row[19] != '-1' else None) or str(recset[0][20]) != str(row[20] if row[20] != '-1' else None)):
sql = "Insert into items_history (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \
itemVersionId = int(getLastItemVersion(cur, itemId) + 1)
sql = "Insert into items_history (item_id, version_item, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \
"views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \ "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \
"quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \ "quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \
"classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ "classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \
"%s, %s, %s, %s, %s)"
"%s, %s, %s, %s, %s, %s)"
recset = [itemId, marketId, vendorId,
recset = [itemId, itemVersionId, marketId, vendorId,
recset[0][3], recset[0][3],
recset[0][4], recset[0][4],
recset[0][5], recset[0][5],
@ -306,16 +348,16 @@ def create_items(cur, row, marketId, vendorId):
recset[0][22], recset[0][22],
recset[0][23]] recset[0][23]]
cur.execute(sql, recset)
cur.execute(sql, recset)
sql = "Update items set description_item = %(description_item)s, cve_item = %(cve_item)s, ms_item = %(ms_item)s, " \
"category_item = %(category_item)s, views_item = %(views_item)s, reviews_item = %(reviews_item)s, " \
"rating_item = %(rating_item)s, dateadded_item = %(dateadded_item)s, btc_item = %(btc_item)s, " \
"usd_item = %(usd_item)s, euro_item = %(euro_item)s, quantitysold_item = %(quantitysold_item)s, " \
"quantityleft_item = %(quantityleft_item)s, shippedfrom_item = %(shippedfrom_item)s, shippedto_item = %(shippedto_item)s, " \
"lastseen_item = %(lastseen_item)s, image_item = %(image_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s"
sql = "Update items set description_item = %(description_item)s, cve_item = %(cve_item)s, ms_item = %(ms_item)s, " \
"category_item = %(category_item)s, views_item = %(views_item)s, reviews_item = %(reviews_item)s, " \
"rating_item = %(rating_item)s, dateadded_item = %(dateadded_item)s, btc_item = %(btc_item)s, " \
"usd_item = %(usd_item)s, euro_item = %(euro_item)s, quantitysold_item = %(quantitysold_item)s, " \
"quantityleft_item = %(quantityleft_item)s, shippedfrom_item = %(shippedfrom_item)s, shippedto_item = %(shippedto_item)s, " \
"lastseen_item = %(lastseen_item)s, image_item = %(image_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s"
cur.execute(sql, {'description_item': row[5] if row[5] != '-1' else None,
cur.execute(sql, {'description_item': row[5] if row[5] != '-1' else None,
'cve_item': row[6] if row[6] != '-1' else None, 'cve_item': row[6] if row[6] != '-1' else None,
'ms_item': row[7] if row[7] != '-1' else None, 'ms_item': row[7] if row[7] != '-1' else None,
'category_item': row[8] if row[8] != '-1' else None, 'category_item': row[8] if row[8] != '-1' else None,
@ -338,10 +380,10 @@ def create_items(cur, row, marketId, vendorId):
else: #updating when was the last time the crawler saw that item else: #updating when was the last time the crawler saw that item
sql = "Update items set lastseen_item = %(lastseen_item)s where item_id = %(itemId)s"
sql = "Update items set lastseen_item = %(lastseen_item)s where item_id = %(itemId)s"
cur.execute(sql, {'lastseen_item': row[23],
'itemId': itemId})
cur.execute(sql, {'lastseen_item': row[23],
'itemId': itemId})
return itemId return itemId
@ -367,10 +409,10 @@ def create_database(cur, con):
sql = "create unique index unique_vendor ON vendors USING btree (market_id ASC NULLS LAST, name_vendor ASC NULLS LAST)" sql = "create unique index unique_vendor ON vendors USING btree (market_id ASC NULLS LAST, name_vendor ASC NULLS LAST)"
cur.execute(sql) cur.execute(sql)
sql = "create table vendors_history(vendor_id integer not null, market_id integer not null, name_vendor " \
sql = "create table vendors_history(vendor_id integer not null, version_vendor integer not null, market_id integer not null, name_vendor " \
"character varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor " \ "character varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor " \
"integer null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \ "integer null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
"constraint vendors_history_pk primary key (vendor_id, dateinserted_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \
"constraint vendors_history_pk primary key (vendor_id, version_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \
"vendor_id) references vendors (vendor_id), constraint vendors_history_market_id_fkey foreign key (" \ "vendor_id) references vendors (vendor_id), constraint vendors_history_market_id_fkey foreign key (" \
"market_id) references marketplaces (market_id))" "market_id) references marketplaces (market_id))"
cur.execute(sql) cur.execute(sql)
@ -392,16 +434,16 @@ def create_database(cur, con):
sql = "create unique index unique_item ON items USING btree (market_id ASC NULLS LAST, vendor_id ASC NULLS LAST, name_item ASC NULLS LAST)" sql = "create unique index unique_item ON items USING btree (market_id ASC NULLS LAST, vendor_id ASC NULLS LAST, name_item ASC NULLS LAST)"
cur.execute(sql) cur.execute(sql)
sql = "create table items_history(item_id integer not null, market_id integer not null, vendor_id integer not null, name_item character " \
"varying(255) not null, description_item character varying(1000000) null, cve_item character varying(" \
"255) null, ms_item character varying(255) null, category_item character varying(255) null, views_item " \
sql = "create table items_history(item_id integer not null, version_item integer not null, market_id integer not null, " \
"vendor_id integer not null, name_item character varying(255) not null, description_item character varying(1000000) null, " \
"cve_item character varying(255) null, ms_item character varying(255) null, category_item character varying(255) null, views_item " \
"integer null, reviews_item integer null, rating_item character varying(255) null, dateadded_item " \ "integer null, reviews_item integer null, rating_item character varying(255) null, dateadded_item " \
"character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \
"null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \
"character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \ "varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \
"href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \ "href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \
"classification_item double precision not null, constraint items_history_pk primary key (item_id, dateinserted_item), " \
"classification_item double precision not null, constraint items_history_pk primary key (item_id, version_item), " \
"constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \ "constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \
"constraint items_history_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id), " \ "constraint items_history_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id), " \
"constraint items_history_item_id_fkey foreign key (item_id) references items (item_id))" "constraint items_history_item_id_fkey foreign key (item_id) references items (item_id))"


Loading…
Cancel
Save