|
|
@ -67,14 +67,14 @@ def verifyVendor(cur, nameVendor, marketId): |
|
|
|
trace = traceback.format_exc() |
|
|
|
print (trace) |
|
|
|
|
|
|
|
def verifyItem(cur, marketId, vendorId, nameItem): |
|
|
|
def verifyItem(cur, marketId, hrefItem): |
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
cur.execute("lock table items IN ACCESS EXCLUSIVE MODE") |
|
|
|
|
|
|
|
cur.execute("select item_id from items where market_id = %(marketId)s and vendor_id = %(vendorId)s and name_item = %(nameItem)s" |
|
|
|
"limit 1", {'marketId': marketId, 'vendorId': vendorId, 'nameItem': nameItem}) |
|
|
|
cur.execute("select item_id from items where market_id = %(marketId)s and href_item = %(hrefItem)s limit 1", |
|
|
|
{'marketId': marketId, 'hrefItem': hrefItem}) |
|
|
|
|
|
|
|
recset = cur.fetchall() |
|
|
|
|
|
|
@ -263,7 +263,9 @@ def create_vendor(cur, row, marketId): |
|
|
|
|
|
|
|
def create_items(cur, row, marketId, vendorId): |
|
|
|
|
|
|
|
itemId = verifyItem(cur, marketId, vendorId, row[4]) |
|
|
|
hrefItem = get_relative_url(row[22]) |
|
|
|
|
|
|
|
itemId = verifyItem(cur, marketId, hrefItem) |
|
|
|
|
|
|
|
if not itemId: |
|
|
|
itemId = int(getLastItem(cur) + 1) |
|
|
@ -277,7 +279,7 @@ def create_items(cur, row, marketId, vendorId): |
|
|
|
|
|
|
|
sql = "Insert into items (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \ |
|
|
|
"views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \ |
|
|
|
"quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \ |
|
|
|
"quantityleft_item, shippedfrom_item, shippedto_item, image_item, href_item, lastseen_item, dateinserted_item, " \ |
|
|
|
"classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ |
|
|
|
"%s, %s, %s, %s, %s)" |
|
|
|
|
|
|
@ -298,11 +300,11 @@ def create_items(cur, row, marketId, vendorId): |
|
|
|
row[17] if row[17] != '-1' else None, |
|
|
|
row[18] if row[18] != '-1' else None, |
|
|
|
row[19] if row[19] != '-1' else None, |
|
|
|
row[23], |
|
|
|
row[20] if row[20] != '-1' else None, |
|
|
|
row[22] if row[22] != '-1' else None, |
|
|
|
hrefItem, |
|
|
|
row[23], |
|
|
|
row[23], |
|
|
|
row[24]] |
|
|
|
row[24] if row[24] != '-1' else None] |
|
|
|
|
|
|
|
cur.execute(sql, recset) |
|
|
|
|
|
|
@ -316,7 +318,9 @@ def create_items(cur, row, marketId, vendorId): |
|
|
|
|
|
|
|
# decode_decrypt_image_in_base64(recset[0]['image_item']) |
|
|
|
|
|
|
|
if (str(recset[0]['description_item']) != str(row[5] if row[5] != '-1' else None) or |
|
|
|
if (str(recset[0]['vendor_id']) != str(vendorId) or |
|
|
|
str(recset[0]['name_item']) != str(row[4] if row[4] != '-1' else None) or |
|
|
|
str(recset[0]['description_item']) != str(row[5] if row[5] != '-1' else None) or |
|
|
|
str(recset[0]['cve_item']) != str(row[6] if row[6] != '-1' else None) or |
|
|
|
str(recset[0]['ms_item']) != str(row[7] if row[7] != '-1' else None) or |
|
|
|
str(recset[0]['category_item']) != str(row[8] if row[8] != '-1' else None) or |
|
|
@ -331,7 +335,8 @@ def create_items(cur, row, marketId, vendorId): |
|
|
|
str(recset[0]['quantityleft_item']) != str(row[17] if row[17] != '-1' else None) or |
|
|
|
str(recset[0]['shippedfrom_item']) != str(row[18] if row[18] != '-1' else None) or |
|
|
|
str(recset[0]['shippedto_item']) != str(row[19] if row[19] != '-1' else None) or |
|
|
|
str(recset[0]['image_item']) != str(row[20] if row[20] != '-1' else None)): |
|
|
|
str(recset[0]['image_item']) != str(row[20] if row[20] != '-1' else None) or |
|
|
|
str(recset[0]['classification_item']) != str(row[24] if row[24] != '-1' else None)): |
|
|
|
|
|
|
|
itemVersionId = int(getLastItemVersion(cur, itemId) + 1) |
|
|
|
|
|
|
@ -341,7 +346,8 @@ def create_items(cur, row, marketId, vendorId): |
|
|
|
"classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ |
|
|
|
"%s, %s, %s, %s, %s, %s)" |
|
|
|
|
|
|
|
recset = [itemId, itemVersionId, marketId, vendorId, |
|
|
|
recset = [itemId, itemVersionId, marketId, |
|
|
|
recset[0]['vendor_id'], |
|
|
|
recset[0]['name_item'], |
|
|
|
recset[0]['description_item'], |
|
|
|
recset[0]['cve_item'], |
|
|
@ -358,22 +364,26 @@ def create_items(cur, row, marketId, vendorId): |
|
|
|
recset[0]['quantityleft_item'], |
|
|
|
recset[0]['shippedfrom_item'], |
|
|
|
recset[0]['shippedto_item'], |
|
|
|
recset[0]['lastseen_item'], |
|
|
|
recset[0]['image_item'], |
|
|
|
recset[0]['href_item'], |
|
|
|
recset[0]['lastseen_item'], |
|
|
|
recset[0]['dateinserted_item'], |
|
|
|
recset[0]['classification_item']] |
|
|
|
|
|
|
|
cur.execute(sql, recset) |
|
|
|
|
|
|
|
sql = "Update items set description_item = %(description_item)s, cve_item = %(cve_item)s, ms_item = %(ms_item)s, " \ |
|
|
|
sql = "Update items set vendor_id = %(vendor_id)s, name_item = %(name_item)s, " \ |
|
|
|
"description_item = %(description_item)s, cve_item = %(cve_item)s, ms_item = %(ms_item)s, " \ |
|
|
|
"category_item = %(category_item)s, views_item = %(views_item)s, reviews_item = %(reviews_item)s, " \ |
|
|
|
"rating_item = %(rating_item)s, dateadded_item = %(dateadded_item)s, btc_item = %(btc_item)s, " \ |
|
|
|
"usd_item = %(usd_item)s, euro_item = %(euro_item)s, quantitysold_item = %(quantitysold_item)s, " \ |
|
|
|
"quantityleft_item = %(quantityleft_item)s, shippedfrom_item = %(shippedfrom_item)s, shippedto_item = %(shippedto_item)s, " \ |
|
|
|
"lastseen_item = %(lastseen_item)s, image_item = %(image_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s" |
|
|
|
"image_item = %(image_item)s, lastseen_item = %(lastseen_item)s, dateinserted_item = %(dateinserted_item)s." \ |
|
|
|
"classification_item = %(classification_item)s where item_id = %(itemId)s" |
|
|
|
|
|
|
|
cur.execute(sql, {'description_item': row[5] if row[5] != '-1' else None, |
|
|
|
cur.execute(sql, {'vendor_id': vendorId, |
|
|
|
'name_item': row[4] if row[4] != '-1' else None, |
|
|
|
'description_item': row[5] if row[5] != '-1' else None, |
|
|
|
'cve_item': row[6] if row[6] != '-1' else None, |
|
|
|
'ms_item': row[7] if row[7] != '-1' else None, |
|
|
|
'category_item': row[8] if row[8] != '-1' else None, |
|
|
@ -388,12 +398,12 @@ def create_items(cur, row, marketId, vendorId): |
|
|
|
'quantityleft_item': row[17] if row[17] != '-1' else None, |
|
|
|
'shippedfrom_item': row[18] if row[18] != '-1' else None, |
|
|
|
'shippedto_item': row[19] if row[19] != '-1' else None, |
|
|
|
'dateinserted_item': row[23], |
|
|
|
'lastseen_item': row[23], |
|
|
|
'image_item': row[20] if row[20] != '-1' else None, |
|
|
|
'lastseen_item': row[23], |
|
|
|
'dateinserted_item': row[23], |
|
|
|
'classification_item': row[24] if row[24] != '-1' else None, |
|
|
|
'itemId': itemId}) |
|
|
|
|
|
|
|
|
|
|
|
else: #updating when was the last time the crawler saw that item |
|
|
|
|
|
|
|
sql = "Update items set lastseen_item = %(lastseen_item)s where item_id = %(itemId)s" |
|
|
@ -438,15 +448,15 @@ def create_database(cur, con): |
|
|
|
|
|
|
|
sql = "Create table marketplaces_status (market_id integer NOT NULL, date_inserted date NOT NULL, " \ |
|
|
|
"listings integer NOT NULL, descriptions integer NOT NULL, status bit(1) NOT NULL, date_reference date NOT NULL, " \ |
|
|
|
"CONSTRAINT marketplaces_log_pkey PRIMARY KEY (market_id, date_inserted), " \ |
|
|
|
"CONSTRAINT marketplaces_fk FOREIGN KEY (market_id) REFERENCES marketplaces (market_id))" |
|
|
|
"CONSTRAINT marketplaces_status_pk PRIMARY KEY (market_id, date_inserted), " \ |
|
|
|
"CONSTRAINT marketplaces_status_fk FOREIGN KEY (market_id) REFERENCES marketplaces (market_id))" |
|
|
|
cur.execute(sql) |
|
|
|
|
|
|
|
sql = "create table vendors(vendor_id integer not null, market_id integer not null, name_vendor character " \ |
|
|
|
"varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor integer " \ |
|
|
|
"null, image_vendor character varying(10000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \ |
|
|
|
"constraint vendors_pk primary key (vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \ |
|
|
|
"market_id))" |
|
|
|
"constraint vendors_pk primary key (vendor_id), " \ |
|
|
|
"constraint vendors_market_id_fk foreign key (market_id) references marketplaces (market_id))" |
|
|
|
cur.execute(sql) |
|
|
|
|
|
|
|
sql = "create unique index unique_vendor ON vendors USING btree (market_id ASC NULLS LAST, name_vendor ASC NULLS LAST)" |
|
|
@ -467,14 +477,14 @@ def create_database(cur, con): |
|
|
|
"character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ |
|
|
|
"null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ |
|
|
|
"character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ |
|
|
|
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(10000000) null, " \ |
|
|
|
"href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \ |
|
|
|
"classification_item double precision not null, constraint items_pk primary key (item_id), constraint " \ |
|
|
|
"varying(255) null, image_item character varying(10000000) null, href_item character varying(255) not null, " \ |
|
|
|
"lastseen_item timestamp(6) with time zone not null, dateinserted_item timestamp(6) with time zone not null, " \ |
|
|
|
"classification_item double precision null, constraint items_pk primary key (item_id), constraint " \ |
|
|
|
"items_market_id_fkey foreign key (market_id) references marketplaces (market_id),constraint " \ |
|
|
|
"items_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id))" |
|
|
|
cur.execute(sql) |
|
|
|
|
|
|
|
sql = "create unique index unique_item ON items USING btree (market_id ASC NULLS LAST, vendor_id ASC NULLS LAST, name_item ASC NULLS LAST)" |
|
|
|
sql = "create unique index unique_item ON items USING btree (market_id ASC NULLS LAST, href_item ASC NULLS LAST)" |
|
|
|
cur.execute(sql) |
|
|
|
|
|
|
|
sql = "create table items_history(item_id integer not null, version_item integer not null, market_id integer not null, " \ |
|
|
@ -484,9 +494,9 @@ def create_database(cur, con): |
|
|
|
"character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ |
|
|
|
"null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ |
|
|
|
"character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ |
|
|
|
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(10000000) null, " \ |
|
|
|
"href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \ |
|
|
|
"classification_item double precision not null, constraint items_history_pk primary key (item_id, version_item), " \ |
|
|
|
"varying(255) null, image_item character varying(10000000) null, href_item character varying(255) not null, " \ |
|
|
|
"lastseen_item timestamp(6) with time zone not null, dateinserted_item timestamp(6) with time zone not null, " \ |
|
|
|
"classification_item double precision null, constraint items_history_pk primary key (item_id, version_item), " \ |
|
|
|
"constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \ |
|
|
|
"constraint items_history_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id), " \ |
|
|
|
"constraint items_history_item_id_fkey foreign key (item_id) references items (item_id))" |
|
|
@ -502,4 +512,5 @@ def create_database(cur, con): |
|
|
|
|
|
|
|
if (trace.find("already exists")==-1): |
|
|
|
print ("There was a problem during the database creation." ) |
|
|
|
traceback.print_exc() |
|
|
|
raise SystemExit |