diff --git a/Forums/DB_Connection/db_connection.py b/Forums/DB_Connection/db_connection.py index 3fb53ff..fc29c07 100644 --- a/Forums/DB_Connection/db_connection.py +++ b/Forums/DB_Connection/db_connection.py @@ -271,7 +271,8 @@ def create_topic(cur, forumId, row, authorId): recset = cur.fetchall() - if (str(recset[0][4]) != str(row[1]) or str(recset[0][5]) != str(row[4]) or str(recset[0][6]) != str(row[5])): # there was a change in the topic information + if (str(recset[0][4]) != str(row[1]) or str(recset[0][5]) != str(row[4] if row[4] != '-1' else None) or # there was a change in the topic information + str(recset[0][6]) != str(row[5] if row[5] != '-1' else None)): sql = "Insert into topics_history (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \ "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, " \ @@ -352,7 +353,7 @@ def create_user(cur, row, forumId, index): recset = cur.fetchall() - if (str(recset[0][3]) != str(row[10][index]) or str(recset[0][4]) != str(row[11][index]) or + if (str(recset[0][3]) != str(row[10][index] if row[10][index] != '-1' else None) or str(recset[0][4]) != str(row[11][index] if row[11][index] != '-1' else None) or str(recset[0][5]) != str(row[12][index] if row[12][index] != '-1' else None) or str(recset[0][6]) != str(row[13][index] if row[13][index] != '-1' else None)): # there was a change in the user information if (str(recset[0][3]) != 'Nan' or str(recset[0][4]) != 'Nan' or str(recset[0][5]) != 'Nan' or str(recset[0][6]) != 'Nan'): diff --git a/MarketPlaces/DB_Connection/db_connection.py b/MarketPlaces/DB_Connection/db_connection.py index a1b27ff..0032e59 100644 --- a/MarketPlaces/DB_Connection/db_connection.py +++ b/MarketPlaces/DB_Connection/db_connection.py @@ -7,9 +7,9 @@ import configparser def connectDataBase(): - try: + from MarketPlaces.Initialization.markets_mining import config - from MarketPlaces.Initialization.markets_mining import config + try: ip = config.get('PostgreSQL', 'ip') username = config.get('PostgreSQL', 'username') @@ -20,7 +20,7 @@ def connectDataBase(): except: - print ("Data base (darkweb_marketplaces) not found.") + print("Data base " + config.get('PostgreSQL', 'database') + " not found.") raise SystemExit @@ -62,6 +62,24 @@ def verifyVendor(cur, nameVendor, marketId): trace = traceback.format_exc() print (trace) +def verifyItem(cur, marketId, vendorId, nameItem): + + try: + + cur.execute("select item_id from items where market_id = %(marketId)s and vendor_id = %(vendorId)s and name_item = %(nameItem)s" + "limit 1", {'marketId': marketId, 'vendorId': vendorId, 'nameItem': nameItem}) + + recset = cur.fetchall() + + if recset: + return recset[0][0] + else: + return 0 + + except: + + trace = traceback.format_exc() + print (trace) def getLastMarketPlace(cur): @@ -100,6 +118,23 @@ def getLastVendor(cur): trace = traceback.format_exc() print (trace) +def getLastItem(cur): + + try: + + cur.execute("select item_id from items order by item_id desc limit 1") + + recset = cur.fetchall() + + if recset: + return recset[0][0] + else: + return 0 + + except: + + trace = traceback.format_exc() + print (trace) def create_marketPlace(cur, row, url): @@ -124,6 +159,11 @@ def create_vendor(cur, row, marketId): if not vendorId: vendorId = int(getLastVendor(cur) + 1) + newVendor = True + else: + newVendor = False + + if newVendor: sql = "Insert into vendors (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, dateinserted_vendor) Values (%s, %s, %s, %s, %s, %s)" @@ -135,51 +175,164 @@ def create_vendor(cur, row, marketId): cur.execute(sql, recset) + else: + + # Tracking potential vendor changes + sql = "select * from vendors where vendor_id = %(vendorId)s" + cur.execute(sql, {'vendorId': vendorId}) + + recset = cur.fetchall() + + if (str(recset[0][3]) != str(row[2] if row[2] != '-1' else None) or # there was a change in the vendor information + str(recset[0][4]) != str(row[3] if row[3] != '-1' else None)): + + sql = "Insert into vendors_history (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, dateinserted_vendor) Values (%s, %s, %s, %s, %s, %s)" + + recset = [vendorId, marketId, + recset[0][2], + recset[0][3], + recset[0][4], + recset[0][5]] + + cur.execute(sql, recset) + + sql = "Update vendors set rating_vendor = %(rating_vendor)s, successfultransactions_vendor = %(successfultransactions_vendor)s, " \ + "dateinserted_vendor = %(dateinserted_vendor)s where vendor_id = %(vendorId)s" + cur.execute(sql, {'rating_vendor': row[2] if row[2] != '-1' else None, + 'successfultransactions_vendor': row[3] if row[3] != '-1' else None, + 'dateinserted_vendor': row[21], + 'vendorId': vendorId}) + return vendorId def create_items(cur, row, marketId, vendorId): - sql = "Insert into items (market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \ - "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \ - "quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \ - "classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ - "%s, %s, %s)" - - recset = [marketId, vendorId, - row[4], - row[5] if row[5] != '-1' else None, - row[6] if row[6] != '-1' else None, - row[7] if row[7] != '-1' else None, - row[8] if row[8] != '-1' else None, - row[9] if row[9] != '-1' else None, - row[10] if row[10] != '-1' else None, - row[11] if row[11] != '-1' else None, - row[12] if row[12] != '-1' else None, - row[13] if row[13] != '-1' else None, - row[14] if row[14] != '-1' else None, - row[15] if row[15] != '-1' else None, - row[16] if row[16] != '-1' else None, - row[17] if row[17] != '-1' else None, - row[18] if row[18] != '-1' else None, - row[19] if row[19] != '-1' else None, - row[20] if row[20] != '-1' else None, - row[21], - row[21], - row[22]] - - cur.execute(sql, recset) - + itemId = verifyItem(cur, marketId, vendorId, row[4]) + + if not itemId: + itemId = int(getLastItem(cur) + 1) + newItem = True + else: + newItem = False + + if newItem: + + sql = "Insert into items (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \ + "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \ + "quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \ + "classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ + "%s, %s, %s, %s)" + + recset = [itemId, marketId, vendorId, + row[4], + row[5] if row[5] != '-1' else None, + row[6] if row[6] != '-1' else None, + row[7] if row[7] != '-1' else None, + row[8] if row[8] != '-1' else None, + row[9] if row[9] != '-1' else None, + row[10] if row[10] != '-1' else None, + row[11] if row[11] != '-1' else None, + row[12] if row[12] != '-1' else None, + row[13] if row[13] != '-1' else None, + row[14] if row[14] != '-1' else None, + row[15] if row[15] != '-1' else None, + row[16] if row[16] != '-1' else None, + row[17] if row[17] != '-1' else None, + row[18] if row[18] != '-1' else None, + row[19] if row[19] != '-1' else None, + row[20] if row[20] != '-1' else None, + row[21], + row[21], + row[22]] + + cur.execute(sql, recset) + + else: + + # Tracking potential item changes + sql = "select * from items where item_id = %(itemId)s" + cur.execute(sql, {'itemId': itemId}) + + recset = cur.fetchall() + + if (str(recset[0][4]) != str(row[5] if row[5] != '-1' else None) or str(recset[0][5]) != str(row[6] if row[6] != '-1' else None) or + str(recset[0][6]) != str(row[7] if row[7] != '-1' else None) or str(recset[0][7]) != str(row[8] if row[8] != '-1' else None) or + str(recset[0][8]) != str(row[9] if row[9] != '-1' else None) or str(recset[0][9]) != str(row[10] if row[10] != '-1' else None) or + str(recset[0][10]) != str(row[11] if row[11] != '-1' else None) or str(recset[0][11]) != str(row[12] if row[12] != '-1' else None) or + str(recset[0][12]) != str(row[13] if row[13] != '-1' else None) or str(recset[0][13]) != str(row[14] if row[14] != '-1' else None) or + str(recset[0][14]) != str(row[15] if row[15] != '-1' else None) or str(recset[0][15]) != str(row[16] if row[16] != '-1' else None) or + str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None)): + + sql = "Insert into items_history (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \ + "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \ + "quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \ + "classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ + "%s, %s, %s, %s)" + + recset = [itemId, marketId, vendorId, + recset[0][3], + recset[0][4], + recset[0][5], + recset[0][6], + recset[0][7], + recset[0][8], + recset[0][9], + recset[0][10], + recset[0][11], + recset[0][12], + recset[0][13], + recset[0][14], + recset[0][15], + recset[0][16], + recset[0][17], + recset[0][18], + recset[0][19], + recset[0][20], + recset[0][21], + recset[0][22]] + + cur.execute(sql, recset) + + sql = "Update items set description_item = %(description_item)s, cve_item = %(cve_item)s, ms_item = %(ms_item)s, " \ + "category_item = %(category_item)s, views_item = %(views_item)s, reviews_item = %(reviews_item)s, " \ + "rating_item = %(rating_item)s, dateadded_item = %(dateadded_item)s, btc_item = %(btc_item)s, " \ + "usd_item = %(usd_item)s, euro_item = %(euro_item)s, quantitysold_item = %(quantitysold_item)s, " \ + "quantityleft_item = %(quantityleft_item)s, shippedfrom_item = %(shippedfrom_item)s, shippedto_item = %(shippedto_item)s, " \ + "dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s" + + cur.execute(sql, {'description_item': row[5] if row[5] != '-1' else None, + 'cve_item': row[6] if row[6] != '-1' else None, + 'ms_item': row[7] if row[7] != '-1' else None, + 'category_item': row[8] if row[8] != '-1' else None, + 'views_item': row[9] if row[9] != '-1' else None, + 'reviews_item': row[10] if row[10] != '-1' else None, + 'rating_item': row[11] if row[11] != '-1' else None, + 'dateadded_item': row[12] if row[12] != '-1' else None, + 'btc_item': row[13] if row[13] != '-1' else None, + 'usd_item': row[14] if row[14] != '-1' else None, + 'euro_item': row[15] if row[15] != '-1' else None, + 'quantitysold_item': row[16] if row[16] != '-1' else None, + 'quantityleft_item': row[17] if row[17] != '-1' else None, + 'shippedfrom_item': row[18] if row[18] != '-1' else None, + 'shippedto_item': row[19] if row[19] != '-1' else None, + 'dateinserted_item': row[21], + 'itemId': itemId}) + + return itemId def create_database(cur, con): try: sql = "create table marketplaces(market_id integer not null, name_market character varying(255) not null, " \ - "url_market character varying(255) null, dateinserted_market timestamp(6) with time zone not null, " \ + "url_market character varying(255) not null, dateinserted_market timestamp(6) with time zone not null, " \ "constraint markets_pk primary key (market_id))" cur.execute(sql) + sql = "create unique index unique_market ON marketplaces USING btree (name_market ASC NULLS LAST)" + cur.execute(sql) + sql = "create table vendors(vendor_id integer not null, market_id integer not null, name_vendor character " \ "varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor integer " \ "null, dateinserted_vendor timestamp(6) with time zone not null, constraint vendors_pk primary key (" \ @@ -187,6 +340,9 @@ def create_database(cur, con): "market_id))" cur.execute(sql) + sql = "create unique index unique_vendor ON vendors USING btree (market_id ASC NULLS LAST, name_vendor ASC NULLS LAST)" + cur.execute(sql) + sql = "create table vendors_history(vendor_id integer not null, market_id integer not null, name_vendor " \ "character varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor " \ "integer null, dateinserted_vendor timestamp(6) with time zone not null, constraint vendors_history_pk " \ @@ -195,33 +351,36 @@ def create_database(cur, con): "market_id) references marketplaces (market_id))" cur.execute(sql) - sql = "create table items(market_id integer not null, vendor_id integer not null, name_item character " \ + sql = "create table items(item_id integer not null, market_id integer not null, vendor_id integer not null, name_item character " \ "varying(255) not null, description_item character varying(1000000) null, cve_item character varying(" \ "255) null, ms_item character varying(255) null, category_item character varying(255) null, views_item " \ "integer null, reviews_item integer null, rating_item character varying(255) null, dateadded_item " \ "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ - "varying(255) null, href_item character varying(255) null, lastseen_item timestamp(6) with time zone " \ + "varying(255) null, href_item character varying(255) not null, lastseen_item timestamp(6) with time zone " \ "not null, dateinserted_item timestamp(6) with time zone not null, classification_item double " \ - "precision not null, constraint items_pk primary key (market_id, vendor_id, name_item), constraint " \ + "precision not null, constraint items_pk primary key (item_id), constraint " \ "items_market_id_fkey foreign key (market_id) references marketplaces (market_id),constraint " \ "items_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id))" cur.execute(sql) - sql = "create table items_history(market_id integer not null, vendor_id integer not null, name_item character " \ + sql = "create unique index unique_item ON items USING btree (market_id ASC NULLS LAST, vendor_id ASC NULLS LAST, name_item ASC NULLS LAST)" + cur.execute(sql) + + sql = "create table items_history(item_id integer not null, market_id integer not null, vendor_id integer not null, name_item character " \ "varying(255) not null, description_item character varying(1000000) null, cve_item character varying(" \ "255) null, ms_item character varying(255) null, category_item character varying(255) null, views_item " \ "integer null, reviews_item integer null, rating_item character varying(255) null, dateadded_item " \ "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ - "varying(255) null, href_item character varying(255) null, lastseen_item timestamp(6) with time zone " \ + "varying(255) null, href_item character varying(255) not null, lastseen_item timestamp(6) with time zone " \ "not null, dateinserted_item timestamp(6) with time zone not null, classification_item double " \ - "precision not null, constraint items_history_pk primary key (market_id, vendor_id, name_item, " \ - "dateinserted_item), constraint items_history_market_id_fkey foreign key (market_id) references " \ - "marketplaces (market_id), constraint items_history_vendor_id_fkey foreign key (vendor_id) references " \ - "vendors (vendor_id))" + "precision not null, constraint items_history_pk primary key (item_id, dateinserted_item), " \ + "constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \ + "constraint items_history_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id), " \ + "constraint items_history_item_id_fkey foreign key (item_id) references items (item_id))" cur.execute(sql) con.commit()