Browse Source

finished fully running completed markets

main
westernmeadow 1 year ago
parent
commit
e12abc1fa5
23 changed files with 438 additions and 409 deletions
  1. +40
    -40
      MarketPlaces/AnonMarket/crawler_selenium.py
  2. +57
    -49
      MarketPlaces/DB_Connection/db_connection.py
  3. +14
    -14
      MarketPlaces/DarkMatter/crawler_selenium.py
  4. +9
    -2
      MarketPlaces/DarkMatter/parser.py
  5. +19
    -17
      MarketPlaces/DigitalThriftShop/crawler_selenium.py
  6. +5
    -12
      MarketPlaces/DigitalThriftShop/parser.py
  7. +26
    -26
      MarketPlaces/HiddenMarket/crawler_selenium.py
  8. +7
    -13
      MarketPlaces/HiddenMarket/parser.py
  9. +6
    -2
      MarketPlaces/Initialization/prepare_parser.py
  10. +27
    -17
      MarketPlaces/LionMarketplace/crawler_selenium.py
  11. +32
    -50
      MarketPlaces/LionMarketplace/parser.py
  12. +15
    -15
      MarketPlaces/MetaVerseMarket/crawler_selenium.py
  13. +39
    -53
      MarketPlaces/MetaVerseMarket/parser.py
  14. +34
    -27
      MarketPlaces/Nexus/crawler_selenium.py
  15. +32
    -5
      MarketPlaces/Nexus/parser.py
  16. +1
    -1
      MarketPlaces/RobinhoodMarket/parser.py
  17. +15
    -14
      MarketPlaces/ThiefWorld/crawler_selenium.py
  18. +15
    -15
      MarketPlaces/Tor2door/crawler_selenium.py
  19. +8
    -8
      MarketPlaces/TorBay/crawler_selenium.py
  20. +2
    -0
      MarketPlaces/TorBay/parser.py
  21. +10
    -10
      MarketPlaces/TorMarket/crawler_selenium.py
  22. +24
    -16
      MarketPlaces/TorMarket/parser.py
  23. +1
    -3
      MarketPlaces/Utilities/utilities.py

+ 40
- 40
MarketPlaces/AnonMarket/crawler_selenium.py View File

@ -162,40 +162,40 @@ def getInterestedLinks():
# Malware # Malware
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/malware') links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/malware')
# # Bootkits
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/bootkits')
# # Backdoors
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/backdoors')
# # Keyloggers
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/keyloggers')
# # Wireless Trackers
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/wireless_trackers')
# # Screen Scrapers
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/screen_scrapers')
# # Mobile Forensic Tools
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/mobile_forensics_tools')
# # Wifi Jammers
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/wifi_jammers')
# # Carding
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/carding')
# # Worms
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/worms')
# # Viruses
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/viruses')
# # Trojans
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/trojans')
# # Botnets
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/botnets')
# # Security Technology
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/security_technology')
# # Hacks
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/hacks')
# # Exploit kits
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/exploit_kit')
# # Security
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/security')
# # Ransomware
# links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/ransomware')
# Bootkits
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/bootkits')
# Backdoors
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/backdoors')
# Keyloggers
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/keyloggers')
# Wireless Trackers
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/wireless_trackers')
# Screen Scrapers
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/screen_scrapers')
# Mobile Forensic Tools
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/mobile_forensics_tools')
# Wifi Jammers
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/wifi_jammers')
# Carding
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/carding')
# Worms
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/worms')
# Viruses
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/viruses')
# Trojans
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/trojans')
# Botnets
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/botnets')
# Security Technology
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/security_technology')
# Hacks
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/hacks')
# Exploit kits
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/exploit_kit')
# Security
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/security')
# Ransomware
links.append('http://2r7wa5og3ly4umqhmmqqytae6bufl5ql5kz7sorndpqtrkc2ri7tohad.onion/category/ransomware')
return links return links
@ -235,12 +235,12 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() # Go back to listing after visiting each product driver.back() # Go back to listing after visiting each product
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
# Locate the next page link # Locate the next page link
try: try:


+ 57
- 49
MarketPlaces/DB_Connection/db_connection.py View File

@ -34,7 +34,7 @@ def verifyMarketPlace(cur, nameMarket):
recset = cur.fetchall() recset = cur.fetchall()
if recset: if recset:
return recset[0][0]
return recset[0]['market_id']
else: else:
return 0 return 0
@ -54,7 +54,7 @@ def verifyVendor(cur, nameVendor, marketId):
recset = cur.fetchall() recset = cur.fetchall()
if recset: if recset:
return recset[0][0]
return recset[0]['vendor_id']
else: else:
return 0 return 0
@ -73,7 +73,7 @@ def verifyItem(cur, marketId, vendorId, nameItem):
recset = cur.fetchall() recset = cur.fetchall()
if recset: if recset:
return recset[0][0]
return recset[0]['item_id']
else: else:
return 0 return 0
@ -91,7 +91,7 @@ def getLastMarketPlace(cur):
recset = cur.fetchall() recset = cur.fetchall()
if recset: if recset:
return recset[0][0]
return recset[0]['market_id']
else: else:
return 0 return 0
@ -110,7 +110,7 @@ def getLastVendor(cur):
recset = cur.fetchall() recset = cur.fetchall()
if recset: if recset:
return recset[0][0]
return recset[0]['vendor_id']
else: else:
return 0 return 0
@ -128,7 +128,7 @@ def getLastVendorVersion(cur, vendorId):
recset = cur.fetchall() recset = cur.fetchall()
if recset: if recset:
return recset[0][0]
return recset[0]['version_vendor']
else: else:
return 0 return 0
@ -146,7 +146,7 @@ def getLastItem(cur):
recset = cur.fetchall() recset = cur.fetchall()
if recset: if recset:
return recset[0][0]
return recset[0]['item_id']
else: else:
return 0 return 0
@ -165,7 +165,7 @@ def getLastItemVersion(cur, itemId):
recset = cur.fetchall() recset = cur.fetchall()
if recset: if recset:
return recset[0][0]
return recset[0]['version_item']
else: else:
return 0 return 0
@ -225,9 +225,9 @@ def create_vendor(cur, row, marketId):
# decode_decrypt_image_in_base64(recset[0][5]) # decode_decrypt_image_in_base64(recset[0][5])
if (str(recset[0][3]) != str(row[2] if row[2] != '-1' else None) or # there was a change in the vendor information
str(recset[0][4]) != str(row[3] if row[3] != '-1' else None) or
str(recset[0][5]) != str(row[21] if row[21] != '-1' else None)):
if (str(recset[0]['rating_vendor']) != str(row[2] if row[2] != '-1' else None) or # there was a change in the vendor information
str(recset[0]['successfultransactions_vendor']) != str(row[3] if row[3] != '-1' else None) or
str(recset[0]['image_vendor']) != str(row[21] if row[21] != '-1' else None)):
vendorVersionId = int(getLastVendorVersion(cur, vendorId) + 1) vendorVersionId = int(getLastVendorVersion(cur, vendorId) + 1)
@ -236,11 +236,11 @@ def create_vendor(cur, row, marketId):
"Values (%s, %s, %s, %s, %s, %s, %s, %s)" "Values (%s, %s, %s, %s, %s, %s, %s, %s)"
recset = [vendorId, vendorVersionId, marketId, recset = [vendorId, vendorVersionId, marketId,
recset[0][2],
recset[0][3],
recset[0][4],
recset[0][5],
recset[0][6]]
recset[0]['name_vendor'],
recset[0]['rating_vendor'],
recset[0]['successfultransactions_vendor'],
recset[0]['image_vendor'],
recset[0]['dateinserted_vendor']]
cur.execute(sql, recset) cur.execute(sql, recset)
@ -308,14 +308,22 @@ def create_items(cur, row, marketId, vendorId):
# decode_decrypt_image_in_base64(recset[0][20]) # decode_decrypt_image_in_base64(recset[0][20])
if (str(recset[0][4]) != str(row[5] if row[5] != '-1' else None) or str(recset[0][5]) != str(row[6] if row[6] != '-1' else None) or
str(recset[0][6]) != str(row[7] if row[7] != '-1' else None) or str(recset[0][7]) != str(row[8] if row[8] != '-1' else None) or
str(recset[0][8]) != str(row[9] if row[9] != '-1' else None) or str(recset[0][9]) != str(row[10] if row[10] != '-1' else None) or
str(recset[0][10]) != str(row[11] if row[11] != '-1' else None) or str(recset[0][11]) != str(row[12] if row[12] != '-1' else None) or
str(recset[0][12]) != str(row[13] if row[13] != '-1' else None) or str(recset[0][13]) != str(row[14] if row[14] != '-1' else None) or
str(recset[0][14]) != str(row[15] if row[15] != '-1' else None) or str(recset[0][15]) != str(row[16] if row[16] != '-1' else None) or
str(recset[0][16]) != str(row[17] if row[17] != '-1' else None) or str(recset[0][17]) != str(row[18] if row[18] != '-1' else None) or
str(recset[0][18]) != str(row[19] if row[19] != '-1' else None) or str(recset[0][20]) != str(row[20] if row[20] != '-1' else None)):
if (str(recset[0]['description_item']) != str(row[5] if row[5] != '-1' else None) or
str(recset[0]['cve_item']) != str(row[6] if row[6] != '-1' else None) or
str(recset[0]['ms_item']) != str(row[7] if row[7] != '-1' else None) or
str(recset[0]['category_item']) != str(row[8] if row[8] != '-1' else None) or
str(recset[0]['views_item']) != str(row[9] if row[9] != '-1' else None) or
str(recset[0]['reviews_item']) != str(row[10] if row[10] != '-1' else None) or
str(recset[0]['rating_item']) != str(row[11] if row[11] != '-1' else None) or
str(recset[0]['dateadded_item']) != str(row[12] if row[12] != '-1' else None) or
str(recset[0]['btc_item']) != str(row[13] if row[13] != '-1' else None) or
str(recset[0]['usd_item']) != str(row[14] if row[14] != '-1' else None) or
str(recset[0]['euro_item']) != str(row[15] if row[15] != '-1' else None) or
str(recset[0]['quantitysold_item']) != str(row[16] if row[16] != '-1' else None) or
str(recset[0]['quantityleft_item']) != str(row[17] if row[17] != '-1' else None) or
str(recset[0]['shippedfrom_item']) != str(row[18] if row[18] != '-1' else None) or
str(recset[0]['shippedto_item']) != str(row[19] if row[19] != '-1' else None) or
str(recset[0]['image_item']) != str(row[20] if row[20] != '-1' else None)):
itemVersionId = int(getLastItemVersion(cur, itemId) + 1) itemVersionId = int(getLastItemVersion(cur, itemId) + 1)
@ -326,27 +334,27 @@ def create_items(cur, row, marketId, vendorId):
"%s, %s, %s, %s, %s, %s)" "%s, %s, %s, %s, %s, %s)"
recset = [itemId, itemVersionId, marketId, vendorId, recset = [itemId, itemVersionId, marketId, vendorId,
recset[0][3],
recset[0][4],
recset[0][5],
recset[0][6],
recset[0][7],
recset[0][8],
recset[0][9],
recset[0][10],
recset[0][11],
recset[0][12],
recset[0][13],
recset[0][14],
recset[0][15],
recset[0][16],
recset[0][17],
recset[0][18],
recset[0][19],
recset[0][20],
recset[0][21],
recset[0][22],
recset[0][23]]
recset[0]['name_item'],
recset[0]['description_item'],
recset[0]['cve_item'],
recset[0]['ms_item'],
recset[0]['category_item'],
recset[0]['views_item'],
recset[0]['reviews_item'],
recset[0]['rating_item'],
recset[0]['dateadded_item'],
recset[0]['btc_item'],
recset[0]['usd_item'],
recset[0]['euro_item'],
recset[0]['quantitysold_item'],
recset[0]['quantityleft_item'],
recset[0]['shippedfrom_item'],
recset[0]['shippedto_item'],
recset[0]['lastseen_item'],
recset[0]['image_item'],
recset[0]['href_item'],
recset[0]['dateinserted_item'],
recset[0]['classification_item']]
cur.execute(sql, recset) cur.execute(sql, recset)
@ -401,7 +409,7 @@ def create_database(cur, con):
sql = "create table vendors(vendor_id integer not null, market_id integer not null, name_vendor character " \ sql = "create table vendors(vendor_id integer not null, market_id integer not null, name_vendor character " \
"varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor integer " \ "varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor integer " \
"null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
"null, image_vendor character varying(10000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
"constraint vendors_pk primary key (vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \ "constraint vendors_pk primary key (vendor_id), constraint vendors_market_id_fkey foreign key (market_id) references marketplaces (" \
"market_id))" "market_id))"
cur.execute(sql) cur.execute(sql)
@ -411,7 +419,7 @@ def create_database(cur, con):
sql = "create table vendors_history(vendor_id integer not null, version_vendor integer not null, market_id integer not null, name_vendor " \ sql = "create table vendors_history(vendor_id integer not null, version_vendor integer not null, market_id integer not null, name_vendor " \
"character varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor " \ "character varying(255) not null, rating_vendor character varying(255), successfultransactions_vendor " \
"integer null, image_vendor character varying(1000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
"integer null, image_vendor character varying(10000000) null, dateinserted_vendor timestamp(6) with time zone not null, " \
"constraint vendors_history_pk primary key (vendor_id, version_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \ "constraint vendors_history_pk primary key (vendor_id, version_vendor), constraint vendors_history_vendor_id_fkey foreign key (" \
"vendor_id) references vendors (vendor_id), constraint vendors_history_market_id_fkey foreign key (" \ "vendor_id) references vendors (vendor_id), constraint vendors_history_market_id_fkey foreign key (" \
"market_id) references marketplaces (market_id))" "market_id) references marketplaces (market_id))"
@ -424,7 +432,7 @@ def create_database(cur, con):
"character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \
"null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \
"character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(10000000) null, " \
"href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \ "href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \
"classification_item double precision not null, constraint items_pk primary key (item_id), constraint " \ "classification_item double precision not null, constraint items_pk primary key (item_id), constraint " \
"items_market_id_fkey foreign key (market_id) references marketplaces (market_id),constraint " \ "items_market_id_fkey foreign key (market_id) references marketplaces (market_id),constraint " \
@ -441,7 +449,7 @@ def create_database(cur, con):
"character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \ "character varying(25) null, btc_item character varying(255) null, usd_item character varying(255) " \
"null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \ "null, euro_item character varying(255) null, quantitysold_item integer null, quantityleft_item " \
"character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \ "character varying(255) null, shippedfrom_item character varying(255) null, shippedto_item character " \
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(1000000) null, " \
"varying(255) null, lastseen_item timestamp(6) with time zone not null, image_item character varying(10000000) null, " \
"href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \ "href_item character varying(255) not null, dateinserted_item timestamp(6) with time zone not null, " \
"classification_item double precision not null, constraint items_history_pk primary key (item_id, version_item), " \ "classification_item double precision not null, constraint items_history_pk primary key (item_id, version_item), " \
"constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \ "constraint items_history_market_id_fkey foreign key (market_id) references marketplaces (market_id), " \


+ 14
- 14
MarketPlaces/DarkMatter/crawler_selenium.py View File

@ -171,14 +171,14 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# # digital fraud software
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=76')
# # legit
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=78')
# # hack guides
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=94')
# # services
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=117')
# digital fraud software
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=76')
# legit
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=78')
# hack guides
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=94')
# services
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=117')
# software/malware # software/malware
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=121') links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=121')
@ -221,12 +221,12 @@ def crawlForum(driver):
time.sleep(3) # to keep from detecting click speed time.sleep(3) # to keep from detecting click speed
driver.back() driver.back()
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
link = driver.find_element(by=By.LINK_TEXT, value=">").get_attribute('href') link = driver.find_element(by=By.LINK_TEXT, value=">").get_attribute('href')


+ 9
- 2
MarketPlaces/DarkMatter/parser.py View File

@ -98,8 +98,11 @@ def darkmatter_description_parser(soup):
sold = cleanString(temp2.strip()) sold = cleanString(temp2.strip())
# Finding Product Image # Finding Product Image
image = soup.find('td', {"class": "vtop"}).find('img').get('src')
image = image.split('base64,')[-1]
image = soup.find('td', {"class": "vtop"}).find('img')
if image is not None:
image = image.get('src').split('base64,')[-1]
else:
image = '-1'
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
@ -163,6 +166,10 @@ def darkmatter_listing_parser(soup):
index = temp.index("pks x ") index = temp.index("pks x ")
result = temp[index + len("pks x "):] result = temp[index + len("pks x "):]
name.append(cleanString(result)) name.append(cleanString(result))
elif ("job x " in temp):
index = temp.index("job x ")
result = temp[index + len("job x "):]
name.append(cleanString(result))
CVE.append("-1") CVE.append("-1")
MS.append("-1") MS.append("-1")


+ 19
- 17
MarketPlaces/DigitalThriftShop/crawler_selenium.py View File

@ -89,7 +89,7 @@ def createFFDriver():
ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
ff_prof.set_preference("network.dns.disablePrefetch", True) ff_prof.set_preference("network.dns.disablePrefetch", True)
ff_prof.set_preference("network.http.sendRefererHeader", 0) ff_prof.set_preference("network.http.sendRefererHeader", 0)
ff_prof.set_preference("permissions.default.image", 2)
ff_prof.set_preference("permissions.default.image", 3)
ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.folderList", 2)
ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -172,16 +172,18 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# Apps
links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/apps/')
# Books
links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/books/')
# Bot nets # Bot nets
links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/botnets/') links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/botnets/')
# # data leak
# links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/dataleak/')
# # databases
# links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/databases/')
# # ransomware
# links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/ransomware/')
# # rats
# links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/rats/')
# ransomware
links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/ransomware/')
# rats
links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/rats/')
# scripts
links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/scripts/')
return links return links
@ -220,16 +222,16 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
link = driver.find_element(by=By.XPATH, value=
'/html/body/div[1]/div[2]/div/div[2]/main/div[1]/nav/ul/li[5]/a').get_attribute('href')
nav = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div/div[2]/main/div[1]/nav')
link = nav.find_element(by=By.PARTIAL_LINK_TEXT, value='').get_attribute('href')
if link == "": if link == "":
raise NoSuchElementException raise NoSuchElementException
count += 1 count += 1


+ 5
- 12
MarketPlaces/DigitalThriftShop/parser.py View File

@ -50,22 +50,17 @@ def digitalThriftShop_description_parser(soup: Tag):
product_category = soup.find("span", {"class": "posted_in"}).find("a").text product_category = soup.find("span", {"class": "posted_in"}).find("a").text
category = cleanString(product_category.strip()) category = cleanString(product_category.strip())
try:
product_rating: Tag = soup.find("div", {"class": "woocommerce-product-rating"})
product_rating: Tag = soup.find("div", {"class": "woocommerce-product-rating"})
if product_rating is not None:
rating_item = product_rating.find("strong", {"class": "rating"}).text rating_item = product_rating.find("strong", {"class": "rating"}).text
reviews = product_rating.find("span", {"Class": "rating"}).text
except Exception as e:
pass
reviews = product_rating.find("span", {"class": "rating"}).text
product_BTC = soup.find("div", {"id": "price-BTC"}).find("span", {"class": "priceinfo cw-noselect"}).text product_BTC = soup.find("div", {"id": "price-BTC"}).find("span", {"class": "priceinfo cw-noselect"}).text
BTC = cleanString(product_BTC.strip()) BTC = cleanString(product_BTC.strip())
product_USD = soup.find("span", {"class": "woocommerce-Price-amount amount"}).text product_USD = soup.find("span", {"class": "woocommerce-Price-amount amount"}).text
USD = cleanString(product_USD.replace("$", "").strip()) USD = cleanString(product_USD.replace("$", "").strip())
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
@ -110,9 +105,7 @@ def digitalThriftShop_listing_parser(soup: Tag):
product_category = soup.find("h1", {"class": "woocommerce-products-header__title page-title"}).text product_category = soup.find("h1", {"class": "woocommerce-products-header__title page-title"}).text
products_list: ResultSet[Tag] = soup.find("ul", {"class": "products columns-5"}).find_all("li") products_list: ResultSet[Tag] = soup.find("ul", {"class": "products columns-5"}).find_all("li")
for product in products_list: for product in products_list:
nm += 1 nm += 1
vendor.append(mktName) vendor.append(mktName)


+ 26
- 26
MarketPlaces/HiddenMarket/crawler_selenium.py View File

@ -198,28 +198,28 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# # Civil Software
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/civil_softwares')
# # Tutorials - Carding
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/carding')
# Civil Software
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/civil_softwares')
# Tutorials - Carding
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/carding')
# Digital - Hacks # Digital - Hacks
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/hacks') links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/hacks')
# # Digital - Exploit Kit
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/exploit_kit')
# # 0Day
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/0day')
# # Digital Forensics
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/digital_forensics')
# # Tutorials - Mining
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/mining')
# # Tutorials - Worms
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/worms')
# # Tutorials - Viruses
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/viruses')
# # Tutorials - Trojans
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/trojans')
# # Tutorials - Botnets
# links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/botnets')
# Digital - Exploit Kit
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/exploit_kit')
# 0Day
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/0day')
# Digital Forensics
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/digital_forensics')
# Tutorials - Mining
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/mining')
# Tutorials - Worms
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/worms')
# Tutorials - Viruses
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/viruses')
# Tutorials - Trojans
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/trojans')
# Tutorials - Botnets
links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/botnets')
return links return links
@ -262,12 +262,12 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
pageCount += 1 pageCount += 1


+ 7
- 13
MarketPlaces/HiddenMarket/parser.py View File

@ -81,10 +81,7 @@ def hiddenmarket_description_parser(soup):
# Finding the Product description # Finding the Product description
describe = bae.find('div', {"class": "twotabs"}).find('div', {'class': "tab1"}).text describe = bae.find('div', {"class": "twotabs"}).find('div', {'class': "tab1"}).text
describe = describe.replace("\n", " ")
describe = describe.replace("\r", " ")
describe = describe.replace("-", " ")
describe = describe.strip()
describe = cleanString(describe.strip())
# Finding Product Image # Finding Product Image
image = soup.find('div', {"class": "thumbnails"}).find('img', {"class": "bigthumbnail"}) image = soup.find('div', {"class": "thumbnails"}).find('img', {"class": "bigthumbnail"})
@ -169,7 +166,6 @@ def hiddenmarket_listing_parser(soup):
category.append(cat) category.append(cat)
# Adding the url to the list of urls # Adding the url to the list of urls
link = card.find_all('a') link = card.find_all('a')
link = link[1].get('href') link = link[1].get('href')
@ -177,7 +173,7 @@ def hiddenmarket_listing_parser(soup):
href.append(link) href.append(link)
# Finding Product Name # Finding Product Name
product = card.next_sibling.find('div', {'class': "title"})
product = card.find('div', {'class': "title"})
product = product.text product = product.text
product = product.replace('\n', ' ') product = product.replace('\n', ' ')
product = product.replace(",", "") product = product.replace(",", "")
@ -188,7 +184,7 @@ def hiddenmarket_listing_parser(soup):
image.append("-1") image.append("-1")
# Finding Vendor # Finding Vendor
vendor_name = card.text
vendor_name = card.find('div', {"class": "seller"}).text
vendor_name = vendor_name.replace(",", "") vendor_name = vendor_name.replace(",", "")
vendor_name = vendor_name.strip() vendor_name = vendor_name.strip()
vendor.append(vendor_name) vendor.append(vendor_name)
@ -196,12 +192,12 @@ def hiddenmarket_listing_parser(soup):
image_vendor.append("-1") image_vendor.append("-1")
# Finding USD # Finding USD
usd = card.next_sibling.find('div', {"class": "buttons"}).find('div', {'class': "price"}).text
usd = card.find('div', {"class": "buttons"}).find('div', {'class': "price"}).text
usd = usd.replace("USD", "") usd = usd.replace("USD", "")
usd = usd.strip() usd = usd.strip()
USD.append(usd) USD.append(usd)
tb = card.next_sibling.find("div", {"class": "stats"})
tb = card.find("div", {"class": "stats"})
tb = tb.find_all('td') tb = tb.find_all('td')
# Finding Reviews # Finding Reviews
@ -221,17 +217,15 @@ def hiddenmarket_listing_parser(soup):
if num == '0': if num == '0':
item_rating = '-1' item_rating = '-1'
else: else:
item_rating = card.next_sibling.find('div', {'class': 'stats'}).find('div', {'class': "stars2"})
item_rating = card.find('div', {'class': 'stats'}).find('div', {'class': "stars2"})
item_rating = item_rating.get('style') item_rating = item_rating.get('style')
item_rating = item_rating.replace("width:", "") item_rating = item_rating.replace("width:", "")
item_rating = item_rating.replace("%", "") item_rating = item_rating.replace("%", "")
item_rating = (float(item_rating) * 5.0) / 100.0
item_rating = "{:.{}f}".format(item_rating, 2)
rating_item.append(item_rating) rating_item.append(item_rating)
# Finding shipping info # Finding shipping info
shipping = card.next_sibling.find('div', {'class': "shipping"}).text.split('>')
shipping = card.find('div', {'class': "shipping"}).text.split('>')
# SHip from # SHip from
origin = shipping[0].strip() origin = shipping[0].strip()
shipFrom.append(origin) shipFrom.append(origin)


+ 6
- 2
MarketPlaces/Initialization/prepare_parser.py View File

@ -5,6 +5,7 @@ import os
import codecs import codecs
import shutil import shutil
import traceback import traceback
from psycopg2.extras import RealDictCursor
from MarketPlaces.DB_Connection.db_connection import * from MarketPlaces.DB_Connection.db_connection import *
from MarketPlaces.DarkFox.parser import * from MarketPlaces.DarkFox.parser import *
@ -298,7 +299,7 @@ def new_parse(marketPlace, url, createLog):
# Connecting to the database # Connecting to the database
con = connectDataBase() con = connectDataBase()
cur = con.cursor()
cur = con.cursor(cursor_factory=RealDictCursor)
# Creating the tables (The database should be created manually) # Creating the tables (The database should be created manually)
create_database(cur, con) create_database(cur, con)
@ -420,7 +421,7 @@ def new_parse(marketPlace, url, createLog):
if createLog: if createLog:
logFile.write( logFile.write(
str(nError) + f". There was a problem to locate the file(s) for {listingFile}" str(nError) + f". There was a problem to locate the file(s) for {listingFile}"
f" in the Description section!\n")
f" in the Description section!\n\n")
if not (readDescriptionError or parseDescriptionError or persistDescriptionError if not (readDescriptionError or parseDescriptionError or persistDescriptionError
or moveDescriptionError or findDescriptionError): or moveDescriptionError or findDescriptionError):
@ -430,4 +431,7 @@ def new_parse(marketPlace, url, createLog):
if createLog: if createLog:
logFile.close() logFile.close()
cur.close()
con.close()
print("Parsing the " + marketPlace + " market and data classification done.") print("Parsing the " + marketPlace + " market and data classification done.")

+ 27
- 17
MarketPlaces/LionMarketplace/crawler_selenium.py View File

@ -128,7 +128,21 @@ def getAccess():
def login(driver): def login(driver):
# wait for page to show up (This Xpath may need to change based on different seed url) # wait for page to show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[2]/div[2]/div[2]/div[1]/div/div[2]/div")))
(By.XPATH, '//*[@id="username"]')))
# entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
# Username here
usernameBox.send_keys('blabri')
passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="password"]')
# Password here
passwordBox.send_keys('fishowal')
input("Press ENTER when CAPTCHA is completed\n")
# wait for listing page show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/nav/div/div/ul[2]/form/button")))
# Saves the crawled html page, makes the directory path for html pages if not made # Saves the crawled html page, makes the directory path for html pages if not made
@ -172,14 +186,10 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# Software/Malware
links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/16')
# # Carding
# links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/20')
# # Hacking
# links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/ba142ac0-c7e7-11ec-9bd1-fdd89c3d3f91')
# # tutorial
# links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/19')
# Hacking
links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/ba142ac0-c7e7-11ec-9bd1-fdd89c3d3f91')
# Digital
links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/12')
return links return links
@ -218,16 +228,16 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
link = driver.find_element(by=By.XPATH, value=
'/html/body/div[2]/div[2]/div/div[2]/nav/ul/li[5]/a').get_attribute('href')
nav = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div/div[2]/nav')
link = nav.find_element(by=By.PARTIAL_LINK_TEXT, value='Next').get_attribute('href')
if link == "": if link == "":
raise NoSuchElementException raise NoSuchElementException
count += 1 count += 1


+ 32
- 50
MarketPlaces/LionMarketplace/parser.py View File

@ -45,11 +45,16 @@ def lionmarketplace_description_parser(soup):
table = soup.find('table') table = soup.find('table')
rows = table.findAll('tr') rows = table.findAll('tr')
# successful transaction
success = "-1"
# vendor rating 5
rating_vendor = '-1'
# vendor rating
pos = soup.find('span', {"class": "fas fa-plus-circle text-success"}).parent.text
pos = int(pos.strip())
neu = soup.find('span', {"class": "fas fa-stop-circle text-secondary"}).parent.text
neu = int(neu.strip())
neg = soup.find('span', {"class": "fas fa-minus-circle text-danger"}).parent.text
neg = int(neg.strip())
total = pos + neu + neg
if total > 0:
rating_vendor = str((pos + 0.5*neu) / total)
# product name # product name
temp = soup.find('div', {'class', 'row'}).find('h2').text temp = soup.find('div', {'class', 'row'}).find('h2').text
@ -64,20 +69,12 @@ def lionmarketplace_description_parser(soup):
image = image.get('src') image = image.get('src')
image = image.split('base64,')[-1] image = image.split('base64,')[-1]
CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much
MS = "-1" # 6 Product_MS_Classification (Microsoft Security) dont worry about that much
# product category
temp = rows[1].find('strong').text
category = cleanString(temp.strip())
# product number of views
views = "-1"
reviews = "-1" # 9 Product_Number_Of_Reviews
rating_item = "-1" # 10 Product_Rating
addDate = "-1" # 11 Product_AddedDate
# BTC selling price box box-rounded mt-2
BTC = "-1"
full = rows[0].findAll('i', {"class": "fas fa-star"})
half = rows[0].find('i', {"class": "fas fa-star-half-alt"})
rating_item = len(full)
if half is not None:
rating_item += 0.5
rating_item = str(rating_item)
# USD selling price # USD selling price
temp = rows[2].find('strong').text temp = rows[2].find('strong').text
@ -87,37 +84,22 @@ def lionmarketplace_description_parser(soup):
temp = temp.replace("$", "") temp = temp.replace("$", "")
USD = cleanString((temp.strip())) USD = cleanString((temp.strip()))
EURO = "-1" # 14 Product_EURO_SellingPrice
# product sold # product sold
if (len(rows) <= 5):
temp = rows[4].find('td').text
string = cleanString(temp)
if (string == 'Left/Sold'):
temp = rows[4].findAll('td')
temp = temp[1].findAll('span')
# left
temp2 = temp[1].text
temp3 = temp[1].text
if(" items" in temp2):
temp2 = temp2.replace(" items", "")
if(" items" in temp3):
temp3 = temp3.replace(" items", "")
sold = (cleanString(temp2.strip()))
left = cleanString(temp3.strip())
else:
sold = '-1'
left = "-1"
temp = rows[4].find('td')
if temp is not None and cleanString(temp.text.strip()) == 'Left/Sold':
temp = rows[4].findAll('td')
temp = temp[1].findAll('span')
# left
sold = temp[1].text
left = temp[0].text
sold = cleanNumbers(sold.strip())
left = cleanNumbers(left.strip())
else: else:
sold = '-1' sold = '-1'
left = "-1" left = "-1"
shipFrom = "-1" # 17 Product_ShippedFrom
shipTo = "-1" # 18 Product_ShippedTo
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate, row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image) BTC, USD, EURO, sold, left, shipFrom, shipTo, image, vendor_image)
@ -195,14 +177,16 @@ def lionmarketplace_listing_parser(soup):
MS.append('-1') MS.append('-1')
# product category # product category
temp = row[2].text
temp = row[1].text
temp = temp.replace("Category: ", "") temp = temp.replace("Category: ", "")
category.append(cleanString(temp.strip())) category.append(cleanString(temp.strip()))
describe.append('-1') describe.append('-1')
# product views # product views
views.append("-1")
vnum = listing.find('p', {"class": "position-absolute bg-primary opacity-60 text-white mt-4 mr-5 pr-1"}).text
views.append(cleanNumbers(vnum.strip()))
reviews.append('-1') # 10 Product_Number_Of_Reviews reviews.append('-1') # 10 Product_Number_Of_Reviews
rating_item.append('-1') # 11 Product_Rating rating_item.append('-1') # 11 Product_Rating
addDate.append('-1') # 12 Product_AddDate addDate.append('-1') # 12 Product_AddDate
@ -212,9 +196,7 @@ def lionmarketplace_listing_parser(soup):
# USD # USD
temp = row[0].find('strong').text temp = row[0].find('strong').text
if ' $' in temp:
temp = temp.replace(" $", "")
USD.append(cleanString(temp.strip())) # 14 Product_USD_SellingPrice
USD.append(cleanNumbers(temp.strip())) # 14 Product_USD_SellingPrice
EURO.append("-1") # 15 Product_EURO_SellingPrice EURO.append("-1") # 15 Product_EURO_SellingPrice


+ 15
- 15
MarketPlaces/MetaVerseMarket/crawler_selenium.py View File

@ -2,8 +2,6 @@ __author__ = 'Helium'
''' '''
MetaVerseMarket Marketplace Crawler (Selenium) MetaVerseMarket Marketplace Crawler (Selenium)
not complete
need to go through multiple pages...
''' '''
from selenium import webdriver from selenium import webdriver
@ -128,6 +126,8 @@ def getAccess():
# then allows for manual solving of captcha in the terminal # then allows for manual solving of captcha in the terminal
#@param: current selenium web driver #@param: current selenium web driver
def login(driver): def login(driver):
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//*[@id="username"]')))
# entering username and password into input boxes # entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]') usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
@ -184,12 +184,12 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# hacking
links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hacking')
# # hosting
# links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hosting')
# # hacking guides and tutorials
# links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/hacking-guides-and-tutorials')
# software and malware
links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/softwares-and-malwares')
# guides and tutorials
links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/guides-and-tutorials')
# services
links.append('http://mdbvvcfwl3fpckiraucv7gio57yoslnhfjxzpoihf4fgdkdd7bwyv7id.onion/products/services')
return links return links
@ -228,16 +228,16 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out
break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
link = driver.find_element(by=By.PARTIAL_LINK_TEXT, value='Next').get_attribute('href') link = driver.find_element(by=By.PARTIAL_LINK_TEXT, value='Next').get_attribute('href')
if link == "":
if link.endswith('#') or link == "":
raise NoSuchElementException raise NoSuchElementException
count += 1 count += 1


+ 39
- 53
MarketPlaces/MetaVerseMarket/parser.py View File

@ -40,40 +40,48 @@ def metaversemarket_description_parser(soup):
name = soup.find('div', {'class': "panel-heading"}).text name = soup.find('div', {'class': "panel-heading"}).text
name = cleanString(name.strip()) name = cleanString(name.strip())
# Finding Vendor
temp = soup.findAll('div', {'class': "col-xs-12 col-sm-6 mt-5"}) temp = soup.findAll('div', {'class': "col-xs-12 col-sm-6 mt-5"})
# Finding Product Image
image = temp[0].find('img')
image = image.get('src')
image = image.split('base64,')[-1]
# Finding Vendor
temp = temp[1].findAll('span') temp = temp[1].findAll('span')
vendor = temp[1].find('b').text vendor = temp[1].find('b').text
vendor = cleanString(vendor.strip()) vendor = cleanString(vendor.strip())
# Finding Product Reviews
reviews = soup.find('span', {'class': "badge bg-success fs-12px"}).text.strip()
# Finding Successful Transactions
# NA
# Finding Vendor Rating
pos = soup.find('span', {'class': "badge bg-success fs-12px"}).text
pos = int(cleanNumbers(pos).strip())
neg = soup.find('span', {'class': "badge bg-danger fs-12px"}).text
neg = int(cleanNumbers(neg).strip())
total = pos + neg
if total > 0:
rating_vendor = str(pos / total)
# Finding Prices # Finding Prices
USD = soup.find('h3', {'class': "mb-2"}).text USD = soup.find('h3', {'class': "mb-2"}).text
USD = USD.replace("Price: $", "").strip()
USD = cleanNumbers(USD).strip()
# Finding the Product Category # Finding the Product Category
temp = soup.select('div[class="mt-2"]')[1].text temp = soup.select('div[class="mt-2"]')[1].text
temp = temp.replace("Category:", "") temp = temp.replace("Category:", "")
category = temp.strip() category = temp.strip()
# Finding Number of Views
views = soup.find('button', {"class": "btn btn-secondary text-center w-33 fw-bold"}).text
views = views.strip()
# Finding the Product Quantity Available # Finding the Product Quantity Available
# temp = soup.find('em', {'class': "icon ni ni-layers-fill"}).parent.parent.parent
# left = temp.text
# left = left.replace("Supply:", "")
# left = left.strip()
temp = soup.findAll('span', {'class': "badge bg-success"})
temp = temp[1].text.split("/")
temp = soup.find('button', {"class": "btn btn-success text-center w-33 fw-bold"}).text
temp = temp.split("/")
left = temp[1].strip() left = temp[1].strip()
# Finding Number Sold # Finding Number Sold
sold = temp[0].strip() sold = temp[0].strip()
# Finding Shipment Information (Origin) # Finding Shipment Information (Origin)
temp = soup.find('div', {'class': "alert alert-info"}).text temp = soup.find('div', {'class': "alert alert-info"}).text
temp = temp.split("to") temp = temp.split("to")
@ -85,21 +93,7 @@ def metaversemarket_description_parser(soup):
# Finding the Product description # Finding the Product description
describe = soup.find('p', {'class': "card-text"}).text describe = soup.find('p', {'class': "card-text"}).text
describe = describe.replace("\n", " ")
describe = describe.strip()
'''# Finding the Number of Product Reviews
tag = soup.findAll(text=re.compile('Reviews'))
for index in tag:
reviews = index
par = reviews.find('(')
if par >=0:
reviews = reviews.replace("Reviews (","")
reviews = reviews.replace(")","")
reviews = reviews.split(",")
review = str(abs(int(reviews[0])) + abs(int(reviews[1])))
else :
review = "-1"'''
describe = cleanString(describe.strip())
# Searching for CVE and MS categories # Searching for CVE and MS categories
cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}')) cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
@ -177,10 +171,7 @@ def metaversemarket_listing_parser(soup):
# Finding Prices # Finding Prices
price = a.find('strong').text price = a.find('strong').text
price = price.replace("Buy for $", "")
price = price.strip()
USD.append(price)
USD.append(cleanNumbers(price).strip())
# Finding the Vendor # Finding the Vendor
temp = a.find('div', {'class': "mt-1 fs-12px"}) temp = a.find('div', {'class': "mt-1 fs-12px"})
@ -194,35 +185,30 @@ def metaversemarket_listing_parser(soup):
cat = cat.strip() cat = cat.strip()
category.append(cat) category.append(cat)
badge = a.find('span', {'class': "badge bg-success"})
ul = a.find('ul', {"class": "product-actions"})
# Finding Number Sold and Quantity Left # Finding Number Sold and Quantity Left
temp = badge.text
temp = ul.find('span', {'class': "badge bg-success"}).text
temp = temp.split("/") temp = temp.split("/")
num = temp[0] num = temp[0]
num = num.strip()
sold.append(num)
num = num.replace('k', '000')
sold.append(cleanNumbers(num).strip())
quant = temp[1] quant = temp[1]
quant = quant.strip()
qLeft.append(quant)
# Finding Successful Transactions
# NA
# Finding Product review
review = a.find('span', {'class': "badge bg-success fs-12px"}).text
review = review.replace("+ ", "")
reviews.append(review)
quant = quant.replace('k', '000')
qLeft.append(cleanNumbers(quant).strip())
# Finding Descrption # Finding Descrption
description = a.find('p', {'class': "alert alert-light text-ssbold p-1"}).text
description = description.replace("\n", " ")
description = description.strip()
describe.append(cleanString(description))
# description = a.find('p', {'class': "alert alert-light text-ssbold p-1"}).text
# description = description.replace("\n", " ")
# description = description.strip()
# describe.append(cleanString(description))
# Finding Number of Views # Finding Number of Views
view = a.find('span', {'class': "badge bg-primary"}).text.strip()
views.append(view)
view = ul.find('span', {'class': "badge bg-primary"}).text
view = view.replace('.', '')
view = view.replace('K', '000')
views.append(view.strip())
# Find where ships from # Find where ships from
ships = a.find('div', {'class': "alert alert-info item_alert fs-12px p-1"}) ships = a.find('div', {'class': "alert alert-info item_alert fs-12px p-1"})


+ 34
- 27
MarketPlaces/Nexus/crawler_selenium.py View File

@ -36,6 +36,7 @@ def startCrawling():
if driver != 'down': if driver != 'down':
try: try:
input("Press ENTER when page loads after DDOS protection")
crawlForum(driver) crawlForum(driver)
except Exception as e: except Exception as e:
print(driver.current_url, e) print(driver.current_url, e)
@ -163,22 +164,22 @@ def getInterestedLinks():
# malware # malware
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/') links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/')
# # hacking-spam
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/hacking-spam/')
# # hacking services
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/hacking/')
# # programming services
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/programacao/')
# # remote admin services
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/administracao-remota/')
# # hacking guides
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-hacking/')
# # malware guides
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-malware/')
# # fraud guides
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-fraudes/')
# # fraud software
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/fraudes/software-de-fraude/')
# hacking-spam
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/hacking-spam/')
# hacking services
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/hacking/')
# programming services
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/programacao/')
# remote admin services
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/administracao-remota/')
# hacking guides
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-hacking/')
# malware guides
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-malware/')
# fraud guides
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-fraudes/')
# fraud software
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/fraudes/software-de-fraude/')
return links return links
@ -206,9 +207,12 @@ def crawlForum(driver):
driver.refresh() driver.refresh()
# waiting for btc price to load # waiting for btc price to load
WebDriverWait(driver, 30).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[1]/div[2]/div/div/main/ul/li[1]/div/span/span[3]")))
time.sleep(5)
try:
WebDriverWait(driver, 1).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[1]/div[2]/div/div/main/ul/li[1]/div/span/span[3]")))
time.sleep(5)
except:
pass
html = driver.page_source html = driver.page_source
savePage(driver, html, link) savePage(driver, html, link)
@ -222,18 +226,21 @@ def crawlForum(driver):
driver.refresh() driver.refresh()
# waiting for btc price to load # waiting for btc price to load
WebDriverWait(driver, 30).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[1]/div[2]/div/div/main/div[3]/div[2]/p/span[3]")))
try:
WebDriverWait(driver, 1).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[1]/div[2]/div/div/main/div[3]/div[2]/p/span[3]")))
except:
pass
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
link = driver.find_element(by=By.LINK_TEXT, value='').get_attribute('href') link = driver.find_element(by=By.LINK_TEXT, value='').get_attribute('href')


+ 32
- 5
MarketPlaces/Nexus/parser.py View File

@ -8,6 +8,9 @@ from bs4 import BeautifulSoup
import re import re
usd_to_brl_r = None
#parses description pages, so takes html pages of description pages using soup object, and parses it for info it needs #parses description pages, so takes html pages of description pages using soup object, and parses it for info it needs
#stores info it needs in different lists, these lists are returned after being organized #stores info it needs in different lists, these lists are returned after being organized
#@param: soup object looking at html page of description page #@param: soup object looking at html page of description page
@ -43,10 +46,19 @@ def nexus_description_parser(soup):
name_of_product = soup.find("h1", {"class": "product_title entry-title"}).text name_of_product = soup.find("h1", {"class": "product_title entry-title"}).text
name = cleanString(name_of_product.strip()) name = cleanString(name_of_product.strip())
# Finding USD Price
real = soup.find('span', {"class": "price"}).find('bdi').text
real = real.split(',')
whole = cleanNumbers(real[0]).replace('.', '')
real = whole + '.' + real[1]
usd = float(real) / usd_to_brl_r
USD = str(round(usd, 2))
# Find the BTC Price # Find the BTC Price
prices = soup.find('p', {"class": "price"}).findAll('span', {"class": "cs"}) prices = soup.find('p', {"class": "price"}).findAll('span', {"class": "cs"})
BTC = prices[0].text
BTC = cleanNumbers(BTC.strip())
if len(prices) > 0:
BTC = prices[0].text
BTC = cleanNumbers(BTC.strip())
# finding the description of the product # finding the description of the product
description_div = soup.find("div", {"class": "woocommerce-product-details__short-description"}) description_div = soup.find("div", {"class": "woocommerce-product-details__short-description"})
@ -88,6 +100,13 @@ def nexus_description_parser(soup):
#return: 'row' that contains a variety of lists that each hold info on the listing page #return: 'row' that contains a variety of lists that each hold info on the listing page
def nexus_listing_parser(soup): def nexus_listing_parser(soup):
global usd_to_brl_r
while usd_to_brl_r is None:
try:
usd_to_brl_r = float(input("1 US Dollar = (Brazilian Real) "))
except ValueError:
pass
# Fields to be parsed # Fields to be parsed
nm = 0 # *Total_Products (Should be Integer) nm = 0 # *Total_Products (Should be Integer)
mktName = "Nexus" # 0 *Marketplace_Name mktName = "Nexus" # 0 *Marketplace_Name
@ -138,14 +157,22 @@ def nexus_listing_parser(soup):
product_image = product_image.split('base64,')[-1] product_image = product_image.split('base64,')[-1]
image.append(product_image) image.append(product_image)
# Finding USD Price
real = product.find('span', {"class": "price"}).find('bdi').text
real = real.split(',')
whole = cleanNumbers(real[0]).replace('.', '')
real = whole + '.' + real[1]
usd = float(real) / usd_to_brl_r
USD.append(str(round(usd, 2)))
# Finding BTC Price # Finding BTC Price
prices = product.find('span', {"class": "price"}).findAll('span', {"class": "cs"}) prices = product.find('span', {"class": "price"}).findAll('span', {"class": "cs"})
price = prices[0].text
BTC.append(cleanNumbers(price.strip()))
if len(prices) > 0:
price = prices[0].text
BTC.append(cleanNumbers(price.strip()))
#everything else appends a -1 #everything else appends a -1
rating_vendor.append("-1") rating_vendor.append("-1")
USD.append("-1")
vendor.append('-1') vendor.append('-1')
success.append("-1") success.append("-1")
CVE.append("-1") CVE.append("-1")


+ 1
- 1
MarketPlaces/RobinhoodMarket/parser.py View File

@ -115,7 +115,7 @@ def Robinhood_listing_parser(soup):
# Fields to be parsed # Fields to be parsed
nm = 0 # *Total_Products (Should be Integer) nm = 0 # *Total_Products (Should be Integer)
mktName = "Robinhood Market" # 0 *Marketplace_Name
mktName = "RobinhoodMarket" # 0 *Marketplace_Name
vendor = [] # 1 *Vendor y vendor = [] # 1 *Vendor y
rating_vendor = [] # 2 Vendor_Rating rating_vendor = [] # 2 Vendor_Rating
success = [] # 3 Vendor_Successful_Transactions success = [] # 3 Vendor_Successful_Transactions


+ 15
- 14
MarketPlaces/ThiefWorld/crawler_selenium.py View File

@ -182,12 +182,12 @@ def getInterestedLinks():
# Hacking and DOSS # Hacking and DOSS
links.append(['Hacking and DOSS', 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/35']) links.append(['Hacking and DOSS', 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/35'])
# # Carding Manuals
# links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/20')
# # Software
# links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/37')
# # Database
# links.append('http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/38')
# Carding Manuals
links.append(['Carding Manuals', 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/20'])
# Software
links.append(['Software', 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/37'])
# Database
links.append(['Database', 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/catalog/38'])
return links return links
@ -228,16 +228,17 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
link = driver.find_element(by=By.XPATH, value=
'/html/body/div/div[1]/div/div/div[2]/div[3]/div/ul/li[13]/a').get_attribute('href')
nav = driver.find_element(by=By.XPATH, value='/html/body/div/div[1]/div/div/div[2]/div[3]')
right = nav.find_element(by=By.CLASS_NAME, value='pag_right')
link = right.find_element(by=By.TAG_NAME, value='a').get_attribute('href')
if link == "": if link == "":
raise NoSuchElementException raise NoSuchElementException
count += 1 count += 1


+ 15
- 15
MarketPlaces/Tor2door/crawler_selenium.py View File

@ -47,7 +47,7 @@ def startCrawling():
def login(driver): def login(driver):
#wait for login page #wait for login page
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/main/div/div/div/div/div/h5")))
(By.XPATH, '//*[@id="username"]')))
#entering username and password into input boxes #entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]') usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
@ -198,16 +198,16 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# # Digital - Guides - Hacking
# links.append('http://4rnsxgoijvnb6i6ujchlmudfobk7scdxpewf4vb2osdxuzo4rmkucpad.onion/en/products?category=55')
# # Digital - Guides - Others
# links.append('http://4rnsxgoijvnb6i6ujchlmudfobk7scdxpewf4vb2osdxuzo4rmkucpad.onion/en/products?category=57')
# # Digital - Software
# links.append('http://4rnsxgoijvnb6i6ujchlmudfobk7scdxpewf4vb2osdxuzo4rmkucpad.onion/en/products?category=60')
# Digital - Guides - Hacking
links.append('http://4rnsxgoijvnb6i6ujchlmudfobk7scdxpewf4vb2osdxuzo4rmkucpad.onion/en/products?category=55')
# Digital - Guides - Others
links.append('http://4rnsxgoijvnb6i6ujchlmudfobk7scdxpewf4vb2osdxuzo4rmkucpad.onion/en/products?category=57')
# Digital - Software
links.append('http://4rnsxgoijvnb6i6ujchlmudfobk7scdxpewf4vb2osdxuzo4rmkucpad.onion/en/products?category=60')
# Software - Malware # Software - Malware
links.append('http://4rnsxgoijvnb6i6ujchlmudfobk7scdxpewf4vb2osdxuzo4rmkucpad.onion/en/products?category=69') links.append('http://4rnsxgoijvnb6i6ujchlmudfobk7scdxpewf4vb2osdxuzo4rmkucpad.onion/en/products?category=69')
# # Software - Others
# links.append('http://4rnsxgoijvnb6i6ujchlmudfobk7scdxpewf4vb2osdxuzo4rmkucpad.onion/en/products?category=78')
# Software - Others
links.append('http://4rnsxgoijvnb6i6ujchlmudfobk7scdxpewf4vb2osdxuzo4rmkucpad.onion/en/products?category=78')
return links return links
@ -243,12 +243,12 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
nav = driver.find_element(by=By.XPATH, value= nav = driver.find_element(by=By.XPATH, value=


+ 8
- 8
MarketPlaces/TorBay/crawler_selenium.py View File

@ -213,16 +213,16 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
link = driver.find_element(by=By.XPATH, value=
'/html/body/section/div/div/div[2]/div/div[2]/ul/li[3]/a').get_attribute('href')
nav = driver.find_element(by=By.XPATH, value='/html/body/section/div/div/div[2]/div/div[2]/ul')
link = nav.find_element(by=By.PARTIAL_LINK_TEXT, value='Next').get_attribute('href')
if link == "": if link == "":
raise NoSuchElementException raise NoSuchElementException
count += 1 count += 1


+ 2
- 0
MarketPlaces/TorBay/parser.py View File

@ -163,6 +163,8 @@ def torbay_listing_parser(soup):
# Populate the final variable (this should be a list with all fields scraped) # Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor) reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor)
#called by the crawler to get description links on a listing page #called by the crawler to get description links on a listing page
#@param: beautifulsoup object that is using the correct html page (listing page) #@param: beautifulsoup object that is using the correct html page (listing page)
#return: list of description links from a listing page #return: list of description links from a listing page


+ 10
- 10
MarketPlaces/TorMarket/crawler_selenium.py View File

@ -170,12 +170,12 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# # Tutorials
# links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/guides-tutorials/')
# Tutorials
links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/guides-tutorials/')
# Malware # Malware
links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/malware/') links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/malware/')
# # Services
# links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/services/')
# Services
links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/services/')
return links return links
@ -214,12 +214,12 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
link = driver.find_element(by=By.LINK_TEXT, value='NEXT').get_attribute('href') link = driver.find_element(by=By.LINK_TEXT, value='NEXT').get_attribute('href')


+ 24
- 16
MarketPlaces/TorMarket/parser.py View File

@ -41,28 +41,28 @@ def tormarket_description_parser(soup):
#finding the name of the product #finding the name of the product
name_of_product = soup.find("h1", {"class": "product_title entry-title"}).find("a").text name_of_product = soup.find("h1", {"class": "product_title entry-title"}).find("a").text
name = cleanString(name_of_product.strip()) name = cleanString(name_of_product.strip())
#finding the description of the product #finding the description of the product
description_of_product = soup.find("div", {"class": "woocommerce-product-details__short-description"}).text description_of_product = soup.find("div", {"class": "woocommerce-product-details__short-description"}).text
describe = cleanString(description_of_product.strip()) describe = cleanString(description_of_product.strip())
#finding the replies
inquires_about_product = soup.find("div", {"class": "woocommerce-Tabs-panel woocommerce-Tabs-panel--wcfm_enquiry_tab panel entry-content wc-tab"}).find("p").text
if inquires_about_product == "There are no inquiries yet.":
review = 0
else:
review = "-1" #fix later pls
#finding the terms and conditions
terms_and_conditions = soup.find("div", {"class": "woocommerce-Tabs-panel woocommerce-Tabs-panel--wcfm_enquiry_tab panel entry-content wc-tab"}).find("p").text
term = cleanString(terms_and_conditions)
#finding the name of the vendor #finding the name of the vendor
name_of_vendor = soup.find("div", {"class": "wcfmmp_sold_by_store"}).find("a").text
vendor = cleanString(name_of_vendor)
name_of_vendor = soup.find("div", {"class": "wcfmmp_sold_by_store"})
if name_of_vendor is not None:
name_of_vendor = name_of_vendor.find("a").text
vendor = cleanString(name_of_vendor.strip())
else:
vendor = "TorMarket"
#finding the price of the item #finding the price of the item
price = soup.find("p", {"class": "price"}).find("bdi").text price = soup.find("p", {"class": "price"}).find("bdi").text
price_cleaned = price[1:] price_cleaned = price[1:]
USD = price_cleaned.strip() USD = price_cleaned.strip()
category = soup.find('span', {"class": "posted_in"}).text
category = category.split(':')[-1]
category = category.replace(',', '/')
category = cleanString(category.strip())
#everything else gets a -1 because they are not found #everything else gets a -1 because they are not found
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
@ -128,16 +128,24 @@ def tormarket_listing_parser(soup):
rating_item.append(cleanString(rating_score_of_product.strip())) rating_item.append(cleanString(rating_score_of_product.strip()))
# print("done") # print("done")
#finding the rating of the vendors #finding the rating of the vendors
rating_score_of_vendor = product.find("div", {"class": "wcfmmp-store-rating"}).find("strong").text
rating_vendor.append(cleanString(rating_score_of_vendor.strip()))
rating_score_of_vendor = product.find("div", {"class": "wcfmmp-store-rating"})
if rating_score_of_vendor is not None:
rating_score_of_vendor = rating_score_of_vendor.find("strong").text
rating_vendor.append(cleanString(rating_score_of_vendor.strip()))
else:
rating_vendor.append('-1')
# print("done") # print("done")
#finding the cost in USD #finding the cost in USD
cost = product.find("span", {"class": "woocommerce-Price-amount amount"}).text cost = product.find("span", {"class": "woocommerce-Price-amount amount"}).text
USD.append(cost) USD.append(cost)
# print("done") # print("done")
#finding the name of the vendor #finding the name of the vendor
vendor_name = product.find("div", {"class": "wcfmmp_sold_by_wrapper"}).find("a").text
vendor.append(cleanString(vendor_name.strip()))
vendor_name = product.find("div", {"class": "wcfmmp_sold_by_wrapper"})
if vendor_name is not None:
vendor_name = vendor_name.find("a").text
vendor.append(cleanString(vendor_name.strip()))
else:
vendor.append(mktName)
# print("done") # print("done")
#everything else appends a -1 #everything else appends a -1
success.append("-1") success.append("-1")


+ 1
- 3
MarketPlaces/Utilities/utilities.py View File

@ -246,12 +246,10 @@ def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nom
current_time = datetime.now() current_time = datetime.now()
day = current_time.strftime("%m/%d/%Y") day = current_time.strftime("%m/%d/%Y")
ahora = current_time.strftime("%I:%M:%S")
for n in range(nm): for n in range(nm):
current_time += timedelta(seconds=2)
ahora = current_time.strftime("%I:%M:%S")
lne = marketplace # 0 lne = marketplace # 0
lne += "," lne += ","
lne += vendor[n] # 1 lne += vendor[n] # 1


Loading…
Cancel
Save