diff --git a/MarketPlaces/Apocalypse/parser.py b/MarketPlaces/Apocalypse/parser.py index 469241a..bc139fe 100644 --- a/MarketPlaces/Apocalypse/parser.py +++ b/MarketPlaces/Apocalypse/parser.py @@ -30,7 +30,9 @@ def apocalypse_description_parser(soup: Tag): left = "-1" # 16 Product_QuantityLeft shipFrom = "-1" # 17 Product_ShippedFrom shipTo = "-1" # 18 Product_ShippedTo - + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image + content: Tag = soup.find("div", {'id': "article_page"}) product_name = content.find("p", {"class": "list-group-item text-center mb-0 box"}).text @@ -38,7 +40,11 @@ def apocalypse_description_parser(soup: Tag): product_description = content.find("pre").text describe = cleanString(product_description.strip()) - + + # Finding Product Image + image = soup.find('div', {'class': 'col-md-7 text-center'}).find('img') + image = image.get('src') + product_reviews_list: Tag = content.find("table", {"class": "table product_reviews"}) \ .find_all("li") @@ -72,7 +78,7 @@ def apocalypse_description_parser(soup: Tag): # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, - BTC, USD, EURO, sold, left, shipFrom, shipTo) + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -103,15 +109,21 @@ def apocalypse_listing_parser(soup: Tag): vendor = [] # 18 Vendor rating = [] # 19 Vendor_Rating success = [] # 20 Vendor_Successful_Transactions - href = [] # 23 Product_Links (Urls) - - + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links + listings: ResultSet[Tag] = soup.find("div", {"class": "col-lg-9 my-4"}).find_all("div", {"class": "col-lg-4 col-md-6 mb-1"}) for prod in listings: product_name = prod.find('h5', {"class": "art_title"}).text name.append(cleanString(product_name.strip())) + + # Finding Product Image + product_image = prod.find('img', {'class': 'customHeight'}) + product_image = product_image.get('src') + image.append(product_image) CVE.append("-1") MS.append("-1") @@ -124,6 +136,7 @@ def apocalypse_listing_parser(soup: Tag): EURO.append("-1") shipTo.append("-1") success.append("-1") + image_vendor.append("-1") product_price = prod.find("span", {"class": "priceP"}).text USD.append(cleanString(product_price.strip())) @@ -161,7 +174,7 @@ def apocalypse_listing_parser(soup: Tag): rating.append(cleanString(product_vendor_rating.strip())) except Exception as e: raise e - + product_href = prod.find('a').get('href') href.append(product_href) @@ -190,7 +203,9 @@ def apocalypse_listing_parser(soup: Tag): qLeft=qLeft, shipFrom=shipFrom, shipTo=shipTo, - href=href + href=href, + image=image, + image_vendor=image_vendor ) #called by the crawler to get description links on a listing page diff --git a/MarketPlaces/DB_Connection/db_connection.py b/MarketPlaces/DB_Connection/db_connection.py index 5c29e03..81e3a1c 100644 --- a/MarketPlaces/DB_Connection/db_connection.py +++ b/MarketPlaces/DB_Connection/db_connection.py @@ -3,6 +3,7 @@ __author__ = 'DarkWeb' import psycopg2 import traceback import configparser +from MarketPlaces.Utilities.utilities import * def connectDataBase(): @@ -146,7 +147,7 @@ def create_marketPlace(cur, row, url): sql = "Insert into marketplaces (market_id, name_market, url_market, dateinserted_market) " \ "Values (%s, %s, %s, %s)" - recset = [marketId, row[0], url, row[21]] + recset = [marketId, row[0], url, row[23]] cur.execute(sql, recset) @@ -165,13 +166,15 @@ def create_vendor(cur, row, marketId): if newVendor: - sql = "Insert into vendors (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, dateinserted_vendor) Values (%s, %s, %s, %s, %s, %s)" + sql = "Insert into vendors (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, image_vendor, dateinserted_vendor) " \ + "Values (%s, %s, %s, %s, %s, %s, %s)" recset = [vendorId, marketId, row[1], row[2] if row[2] != '-1' else None, row[3] if row[3] != '-1' else None, - row[21]] + row[21] if row[21] != '-1' else None, + row[23]] cur.execute(sql, recset) @@ -183,24 +186,30 @@ def create_vendor(cur, row, marketId): recset = cur.fetchall() + #aes_decryption(recset[0][5]) trying to decrypt the image + if (str(recset[0][3]) != str(row[2] if row[2] != '-1' else None) or # there was a change in the vendor information - str(recset[0][4]) != str(row[3] if row[3] != '-1' else None)): + str(recset[0][4]) != str(row[3] if row[3] != '-1' else None) or + str(recset[0][5]) != str(row[21] if row[21] != '-1' else None)): - sql = "Insert into vendors_history (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, dateinserted_vendor) Values (%s, %s, %s, %s, %s, %s)" + sql = "Insert into vendors_history (vendor_id, market_id, name_vendor, rating_vendor, successfultransactions_vendor, image_vendor, dateinserted_vendor) " \ + "Values (%s, %s, %s, %s, %s, %s, %s)" recset = [vendorId, marketId, recset[0][2], recset[0][3], recset[0][4], - recset[0][5]] + recset[0][5], + recset[0][6]] cur.execute(sql, recset) sql = "Update vendors set rating_vendor = %(rating_vendor)s, successfultransactions_vendor = %(successfultransactions_vendor)s, " \ - "dateinserted_vendor = %(dateinserted_vendor)s where vendor_id = %(vendorId)s" + "image_vendor = %(image_vendor)s, dateinserted_vendor = %(dateinserted_vendor)s where vendor_id = %(vendorId)s" cur.execute(sql, {'rating_vendor': row[2] if row[2] != '-1' else None, 'successfultransactions_vendor': row[3] if row[3] != '-1' else None, - 'dateinserted_vendor': row[21], + 'image_vendor': row[21] if row[21] != '-1' else None, + 'dateinserted_vendor': row[23], 'vendorId': vendorId}) return vendorId @@ -220,9 +229,9 @@ def create_items(cur, row, marketId, vendorId): sql = "Insert into items (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \ "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \ - "quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \ + "quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \ "classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ - "%s, %s, %s, %s)" + "%s, %s, %s, %s, %s)" recset = [itemId, marketId, vendorId, row[4], @@ -241,10 +250,11 @@ def create_items(cur, row, marketId, vendorId): row[17] if row[17] != '-1' else None, row[18] if row[18] != '-1' else None, row[19] if row[19] != '-1' else None, + row[23], row[20] if row[20] != '-1' else None, - row[21], - row[21], - row[22]] + row[22] if row[22] != '-1' else None, + row[23], + row[24]] cur.execute(sql, recset) @@ -262,13 +272,14 @@ def create_items(cur, row, marketId, vendorId): str(recset[0][10]) != str(row[11] if row[11] != '-1' else None) or str(recset[0][11]) != str(row[12] if row[12] != '-1' else None) or str(recset[0][12]) != str(row[13] if row[13] != '-1' else None) or str(recset[0][13]) != str(row[14] if row[14] != '-1' else None) or str(recset[0][14]) != str(row[15] if row[15] != '-1' else None) or str(recset[0][15]) != str(row[16] if row[16] != '-1' else None) or - str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None)): + str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None) or + str(recset[0][18]) != str(row[19] if row[19] != '-1' else None) or str(recset[0][20]) != str(row[20] if row[20] != '-1' else None)): sql = "Insert into items_history (item_id, market_id, vendor_id, name_item, description_item, cve_item, ms_item, category_item, " \ "views_item, reviews_item, rating_item, dateadded_item, btc_item, usd_item, euro_item, quantitysold_item, " \ - "quantityleft_item, shippedfrom_item, shippedto_item, href_item, lastseen_item, dateinserted_item, " \ + "quantityleft_item, shippedfrom_item, shippedto_item, lastseen_item, image_item, href_item, dateinserted_item, " \ "classification_item) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ - "%s, %s, %s, %s)" + "%s, %s, %s, %s, %s)" recset = [itemId, marketId, vendorId, recset[0][3], @@ -290,7 +301,8 @@ def create_items(cur, row, marketId, vendorId): recset[0][19], recset[0][20], recset[0][21], - recset[0][22]] + recset[0][22], + recset[0][23]] cur.execute(sql, recset) @@ -299,7 +311,7 @@ def create_items(cur, row, marketId, vendorId): "rating_item = %(rating_item)s, dateadded_item = %(dateadded_item)s, btc_item = %(btc_item)s, " \ "usd_item = %(usd_item)s, euro_item = %(euro_item)s, quantitysold_item = %(quantitysold_item)s, " \ "quantityleft_item = %(quantityleft_item)s, shippedfrom_item = %(shippedfrom_item)s, shippedto_item = %(shippedto_item)s, " \ - "lastseen_item = %(lastseen_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s" + "lastseen_item = %(lastseen_item)s, image_item = %(image_item)s, dateinserted_item = %(dateinserted_item)s where item_id = %(itemId)s" cur.execute(sql, {'description_item': row[5] if row[5] != '-1' else None, 'cve_item': row[6] if row[6] != '-1' else None, @@ -316,8 +328,9 @@ def create_items(cur, row, marketId, vendorId): 'quantityleft_item': row[17] if row[17] != '-1' else None, 'shippedfrom_item': row[18] if row[18] != '-1' else None, 'shippedto_item': row[19] if row[19] != '-1' else None, - 'dateinserted_item': row[21], - 'lastseen_item': row[21], + 'dateinserted_item': row[23], + 'lastseen_item': row[23], + 'image_item': row[20], 'itemId': itemId}) @@ -325,7 +338,7 @@ def create_items(cur, row, marketId, vendorId): sql = "Update items set lastseen_item = %(lastseen_item)s where item_id = %(itemId)s" - cur.execute(sql, {'lastseen_item': row[21], + cur.execute(sql, {'lastseen_item': row[23], 'itemId': itemId}) return itemId @@ -344,8 +357,8 @@ def create_database(cur, con): sql = "create table vendors(vendor_id integer not null, market_id integer not null, name_vendor character " \ "varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor integer " \ - "null, dateinserted_vendor timestamp(6) with time zone not null, constraint vendors_pk primary key (" \ - "vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \ + "null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \ + "constraint vendors_pk primary key (vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \ "market_id))" cur.execute(sql) @@ -354,8 +367,8 @@ def create_database(cur, con): sql = "create table vendors_history(vendor_id integer not null, market_id integer not null, name_vendor " \ "character varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor " \ - "integer null, dateinserted_vendor timestamp(6) with time zone not null, constraint vendors_history_pk " \ - "primary key (vendor_id, dateinserted_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \ + "integer null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \ + "constraint vendors_history_pk primary key (vendor_id, dateinserted_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \ "vendor_id) references vendors (vendor_id), constraint vendors_history_market_id_fkey foreign key (" \ "market_id) references marketplaces (market_id))" cur.execute(sql) @@ -367,9 +380,9 @@ def create_database(cur, con): "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ - "varying(255) null, href_item character varying(255) not null, lastseen_item timestamp(6) with time zone " \ - "not null, dateinserted_item timestamp(6) with time zone not null, classification_item double " \ - "precision not null, constraint items_pk primary key (item_id), constraint " \ + "varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \ + "href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \ + "classification_item double precision not null, constraint items_pk primary key (item_id), constraint " \ "items_market_id_fkey foreign key (market_id) references marketplaces (market_id),constraint " \ "items_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id))" cur.execute(sql) @@ -384,9 +397,9 @@ def create_database(cur, con): "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ - "varying(255) null, href_item character varying(255) not null, lastseen_item timestamp(6) with time zone " \ - "not null, dateinserted_item timestamp(6) with time zone not null, classification_item double " \ - "precision not null, constraint items_history_pk primary key (item_id, dateinserted_item), " \ + "varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \ + "href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \ + "classification_item double precision not null, constraint items_history_pk primary key (item_id, dateinserted_item), " \ "constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \ "constraint items_history_vendor_id_fkey foreign key (vendor_id) references vendors (vendor_id), " \ "constraint items_history_item_id_fkey foreign key (item_id) references items (item_id))" diff --git a/MarketPlaces/DarkMatter/parser.py b/MarketPlaces/DarkMatter/parser.py index 1482034..17d775c 100644 --- a/MarketPlaces/DarkMatter/parser.py +++ b/MarketPlaces/DarkMatter/parser.py @@ -34,6 +34,8 @@ def darkmatter_description_parser(soup): left = "-1" # 16 Product_QuantityLeft shipFrom = "-1" # 17 Product_ShippedFrom shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image # 0 *Vendor_Name try: @@ -65,6 +67,10 @@ def darkmatter_description_parser(soup): except: print("description") + # Finding Product Image + #image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img') + #image = image.get('src') + #product category try: temp = soup.find('table', {'class', 'vtable'}) @@ -116,7 +122,7 @@ def darkmatter_description_parser(soup): # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, - BTC, USD, EURO, sold, left, shipFrom, shipTo) + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -128,6 +134,7 @@ def darkmatter_description_parser(soup): #return: 'row' that contains a variety of lists that each hold info on the listing page def darkmatter_listing_parser(soup): + """ # Fields to be parsed nm = 0 # Total_Products (Should be Integer) mktName = "DarkMatter" # 0 Marketplace_Name @@ -153,6 +160,33 @@ def darkmatter_listing_parser(soup): rating = [] # 19 Vendor_Rating success = [] # 20 Vendor_Successful_Transactions href = [] # 23 Product_Links (Urls) + """ + + # Fields to be parsed + nm = 0 # *Total_Products (Should be Integer) + mktName = "DarkMatter" # 0 *Marketplace_Name + vendor = [] # 1 *Vendor y + rating = [] # 2 Vendor_Rating + success = [] # 3 Vendor_Successful_Transactions + name = [] # 4 *Product_Name y + CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) + MS = [] # 6 Product_MS_Classification (Microsoft Security) + category = [] # 7 Product_Category y + describe = [] # 8 Product_Description + views = [] # 9 Product_Number_Of_Views + reviews = [] # 10 Product_Number_Of_Reviews + rating_item = [] # 11 Product_Rating + addDate = [] # 12 Product_AddDate + BTC = [] # 13 Product_BTC_SellingPrice + USD = [] # 14 Product_USD_SellingPrice y + EURO = [] # 15 Product_EURO_SellingPrice + sold = [] # 16 Product_QuantitySold + qLeft =[] # 17 Product_QuantityLeft + shipFrom = [] # 18 Product_ShippedFrom + shipTo = [] # 19 Product_ShippedTo + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links names = soup.find('div', {"class": "content"}).findAll('td', {"class": "lefted", "colspan": "3"}) left = soup.find('div', {"class": "content"}).findAll('table', {"class": "vtable"}) @@ -178,6 +212,11 @@ def darkmatter_listing_parser(soup): except Exception as e: print("product name", e) + # Finding Product Image + #product_image = a.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'}) + #product_image = product_image.get('src') + #image.append(product_image) + CVE.append("-1") MS.append("-1") @@ -193,12 +232,14 @@ def darkmatter_listing_parser(soup): print('category') describe.append("-1") - escrow.append("-1") + #escrow.append("-1") views.append("-1") reviews.append("-1") addDate.append("-1") - lastSeen.append("-1") + #lastSeen.append("-1") BTC.append("-1") + image.append("-1") + image_vendor.append("-1") # usd try: @@ -261,7 +302,7 @@ def darkmatter_listing_parser(soup): # Populate the final variable (this should be a list with all fields scraped) return organizeProducts(mktName, nm, vendor, rating, success, name, CVE, MS, category, describe, views, - reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href) + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) #called by the crawler to get description links on a listing page diff --git a/MarketPlaces/DigitalThriftShop/parser.py b/MarketPlaces/DigitalThriftShop/parser.py index b45c3b9..f55c1e6 100644 --- a/MarketPlaces/DigitalThriftShop/parser.py +++ b/MarketPlaces/DigitalThriftShop/parser.py @@ -34,7 +34,8 @@ def digitalThriftShop_description_parser(soup: Tag): left = "-1" # 16 Product_QuantityLeft shipFrom = "-1" # 17 Product_ShippedFrom shipTo = "-1" # 18 Product_ShippedTo - + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image product_name = soup.find("h1", {"class": "product_title entry-title"}).text @@ -42,7 +43,11 @@ def digitalThriftShop_description_parser(soup: Tag): product_description = soup.find("div", {"id": "tab-description"}).find("p").text describe = cleanString(product_description.strip()) - + + # Finding Product Image + image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img') + image = image.get('src') + product_category = soup.find("span", {"class": "posted_in"}).find("a").text category = cleanString(product_category.strip()) @@ -64,7 +69,7 @@ def digitalThriftShop_description_parser(soup: Tag): # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, - BTC, USD, EURO, sold, left, shipFrom, shipTo) + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -98,7 +103,9 @@ def digitalThriftShop_listing_parser(soup: Tag): qLeft =[] # 17 Product_QuantityLeft shipFrom = [] # 18 Product_ShippedFrom shipTo = [] # 19 Product_ShippedTo - href = [] # 20 Product_Links + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links product_category = soup.find("h1", {"class": "woocommerce-products-header__title page-title"}).text @@ -114,6 +121,11 @@ def digitalThriftShop_listing_parser(soup: Tag): product_name = product.find("h2", {"class": "woocommerce-loop-product__title"}).text name.append(cleanString(product_name.strip())) + + # Finding Product Image + product_image = product.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'}) + product_image = product_image.get('src') + image.append(product_image) CVE.append("-1") MS.append("-1") @@ -121,6 +133,7 @@ def digitalThriftShop_listing_parser(soup: Tag): describe.append("-1") views.append("-1") reviews.append("-1") + image_vendor.append("-1") try: product_rating = product.find("div", {"class": "star-rating"}).find("strong", {"class": "rating"}).text @@ -146,7 +159,7 @@ def digitalThriftShop_listing_parser(soup: Tag): # Populate the final variable (this should be a list with all fields scraped) return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, - reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href) + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) #called by the crawler to get description links on a listing page diff --git a/MarketPlaces/HiddenMarket/parser.py b/MarketPlaces/HiddenMarket/parser.py index 8f77f9d..0dc4bc9 100644 --- a/MarketPlaces/HiddenMarket/parser.py +++ b/MarketPlaces/HiddenMarket/parser.py @@ -30,6 +30,8 @@ def hiddenmarket_description_parser(soup): left = "-1" # 16 Product_QuantityLeft shipFrom = "-1" # 17 Product_ShippedFrom shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image bae = soup.find('div', {'class': "main"}) @@ -84,6 +86,10 @@ def hiddenmarket_description_parser(soup): describe = describe.replace("-", " ") describe = describe.strip() + # Finding Product Image + image = soup.find('div', {"class": "thumbnails"}).find('img', {"class": "bigthumbnail"}) + image = image.get('src') + # Finding the Product Category category = mb[-4].text category = category.replace("Category:", "") @@ -115,7 +121,7 @@ def hiddenmarket_description_parser(soup): # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, - BTC, USD, EURO, sold, left, shipFrom, shipTo) + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -145,7 +151,9 @@ def hiddenmarket_listing_parser(soup): qLeft = [] # 17 Product_QuantityLeft shipFrom = [] # 18 Product_ShippedFrom shipTo = [] # 19 Product_ShippedTo - href = [] # 20 Product_Links + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links listing = soup.findAll('div', {"class": "item"}) @@ -175,12 +183,17 @@ def hiddenmarket_listing_parser(soup): product = product.strip() name.append(product) + # Finding Product Image + image.append("-1") + # Finding Vendor vendor_name = card.text vendor_name = vendor_name.replace(",", "") vendor_name = vendor_name.strip() vendor.append(vendor_name) + image_vendor.append("-1") + # Finding USD usd = card.next_sibling.find('div', {"class": "buttons"}).find('div', {'class': "price"}).text usd = usd.replace("USD", "") @@ -262,7 +275,7 @@ def hiddenmarket_listing_parser(soup): # Populate the final variable (this should be a list with all fields scraped) return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, - reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href) + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) def hiddenmarket_links_parser(soup): diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py index c626b6a..a460a18 100644 --- a/MarketPlaces/Initialization/prepare_parser.py +++ b/MarketPlaces/Initialization/prepare_parser.py @@ -72,6 +72,10 @@ def mergePages(rmm, rec): rec[18] = rmm[17] if rec[19] == "-1": # shippedto_item rec[19] = rmm[18] + if rec[20] == "-1": # image + rec[20] = rmm[19] + if rec[21] == "-1": # image_vendor + rec[21] = rmm[20] return rec @@ -318,7 +322,7 @@ def new_parse(marketPlace, url, createLog): rec = rec.split(',') - descriptionPattern = cleanLink(rec[20]) + ".html" + descriptionPattern = cleanLink(rec[22]) + ".html" # Reading the associated description Html Pages descriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", descriptionPattern)) diff --git a/MarketPlaces/RobinhoodMarket/parser.py b/MarketPlaces/RobinhoodMarket/parser.py index 4973a1b..1a3bdb8 100644 --- a/MarketPlaces/RobinhoodMarket/parser.py +++ b/MarketPlaces/RobinhoodMarket/parser.py @@ -39,6 +39,8 @@ def Robinhood_description_parser(soup): left = "-1" # 16 Product_QuantityLeft shipFrom = "-1" # 17 Product_ShippedFrom shipTo = "-1" # 18 Product_ShippedTo + image = "-1" # 19 Product_Image + vendor_image = "-1" # 20 Vendor_Image # Finding Product Name name = soup.find('h1').text @@ -59,12 +61,20 @@ def Robinhood_description_parser(soup): desc = desc + para.text describe = desc + # Finding Product Image + image = soup.find('div', {'class': 'woocommerce-product-gallery__image'}).find('img') + image = image.get('src') + # Finding Vendor vendor = soup.find('a', {'class': 'wcfm_dashboard_item_title'}).text vendor = vendor.replace(",", "") vendor = vendor.replace("Sold by:", "") vendor = vendor.strip() + # Finding Vendor Image + vendor_image = soup.find('div', {'class': 'wcfmmp_sold_by_container_left'}).find('img') + vendor_image = vendor_image.get('src') + # Finding Category catSpan = soup.find('span', {'class': 'posted_in'}) category = catSpan.find('a').text @@ -93,7 +103,7 @@ def Robinhood_description_parser(soup): # Populating the final variable (this should be a list with all fields scraped) row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, - BTC, USD, EURO, sold, left, shipFrom, shipTo) + BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) # Sending the results return row @@ -124,7 +134,9 @@ def Robinhood_listing_parser(soup): qLeft =[] # 17 Product_QuantityLeft shipFrom = [] # 18 Product_ShippedFrom shipTo = [] # 19 Product_ShippedTo - href = [] # 20 Product_Links + image = [] # 20 Product_Image + image_vendor = [] # 21 Vendor_Image + href = [] # 22 Product_Links listing = soup.find('ul', {"class": "products columns-4"}) items = listing.findAll('li') @@ -153,6 +165,11 @@ def Robinhood_listing_parser(soup): product = product.strip() name.append(product) + # Finding Product Image + product_image = card.find('img', {'class': 'attachment-woocommerce_thumbnail size-woocommerce_thumbnail'}) + product_image = product_image.get('src') + image.append(product_image) + info = card.find('div', {'class': 'wcfmmp_sold_by_container'}) # Finding Vendor @@ -161,6 +178,11 @@ def Robinhood_listing_parser(soup): vendor_name = vendor_name.strip() vendor.append(vendor_name) + # Finding Vendor Image + vendor_icon = info.find('img', {'class', 'wcfmmp_sold_by_logo'}) + vendor_icon = vendor_icon.get('src') + image_vendor.append(vendor_icon) + # Finding USD span = card.find('span', {'class': 'price'}) if span is not None: @@ -198,13 +220,12 @@ def Robinhood_listing_parser(soup): MSValue=me MS.append(MSValue) - #print(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, # reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href) # Populate the final variable (this should be a list with all fields scraped) return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, - reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href) + reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) def Robinhood_links_parser(soup): diff --git a/MarketPlaces/Utilities/utilities.py b/MarketPlaces/Utilities/utilities.py index 120997e..df74e92 100644 --- a/MarketPlaces/Utilities/utilities.py +++ b/MarketPlaces/Utilities/utilities.py @@ -242,7 +242,7 @@ def cleanLink(originalLink): def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nombre, CVE, MS, category, describe, - views, reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href): + views, reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor): rw = [] @@ -291,9 +291,13 @@ def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nom lne += "," lne += "-1" if len(shipTo) == 0 else shipTo[n] # 19 lne += "," - lne += "-1" if len(href) == 0 else href[n] # 20 + lne += "-1" if len(image) == 0 else image[n] # 20 lne += "," - lne += day + " " + ahora # 21 + lne += "-1" if len(image_vendor) == 0 else image_vendor[n] # 21 + lne += "," + lne += "-1" if len(href) == 0 else href[n] # 22 + lne += "," + lne += day + " " + ahora # 23 rw.append(lne) @@ -338,6 +342,7 @@ def aes_encryption(item): def aes_decryption(item): to_bytes = bytes(item) + #to_bytes = bytes(item, 'utf-8') decrypted_bytes = decryptCipher.decrypt(to_bytes) @@ -403,7 +408,7 @@ def replace_image_sources(driver, html_content): string_image = encrypt_encode_image_to_base64(driver, img_xpath) if string_image: - img_tag.set('src', f'data:image/png;base64,{string_image}') + img_tag.set('src', f'data:image/png;base64;{string_image}') else: img_tag.getparent().remove(img_tag)