Browse Source

debugged marketplaces

main
westernmeadow 1 year ago
parent
commit
d5c78b3e3f
25 changed files with 397 additions and 406 deletions
  1. +17
    -3
      .idea/DW_Pipeline_Test.iml
  2. +1
    -1
      .idea/misc.xml
  3. +0
    -1
      Forums/DB_Connection/db_connection.py
  4. +4
    -8
      MarketPlaces/AnonymousMarketplace/crawler_selenium.py
  5. +17
    -20
      MarketPlaces/AnonymousMarketplace/parser.py
  6. +12
    -4
      MarketPlaces/Apocalypse/crawler_selenium.py
  7. +0
    -1
      MarketPlaces/DB_Connection/db_connection.py
  8. +135
    -111
      MarketPlaces/DarkBazar/crawler_selenium.py
  9. +55
    -52
      MarketPlaces/DarkBazar/parser.py
  10. +3
    -4
      MarketPlaces/DarkMatter/crawler_selenium.py
  11. +1
    -1
      MarketPlaces/DigitalThriftShop/crawler_selenium.py
  12. +1
    -1
      MarketPlaces/HiddenMarket/crawler_selenium.py
  13. +1
    -8
      MarketPlaces/Initialization/marketsList.txt
  14. +3
    -0
      MarketPlaces/Initialization/markets_mining.py
  15. +6
    -1
      MarketPlaces/Initialization/prepare_parser.py
  16. +1
    -1
      MarketPlaces/LionMarketplace/crawler_selenium.py
  17. +57
    -110
      MarketPlaces/LionMarketplace/parser.py
  18. +1
    -1
      MarketPlaces/M00nkeyMarket/crawler_selenium.py
  19. +19
    -11
      MarketPlaces/Nexus/crawler_selenium.py
  20. +3
    -2
      MarketPlaces/RobinhoodMarket/crawler_selenium.py
  21. +2
    -2
      MarketPlaces/ThiefWorld/crawler_selenium.py
  22. +1
    -1
      MarketPlaces/TorBay/crawler_selenium.py
  23. +7
    -9
      MarketPlaces/TorMarket/crawler_selenium.py
  24. +49
    -52
      MarketPlaces/TorMarket/parser.py
  25. +1
    -1
      MarketPlaces/ViceCity/crawler_selenium.py

+ 17
- 3
.idea/DW_Pipeline_Test.iml View File

@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="jdk" jdkName="C:\Users\calsyslab\anaconda3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
<component name="PyNamespacePackagesService"> <component name="PyNamespacePackagesService">
@ -10,10 +10,24 @@
<list> <list>
<option value="$MODULE_DIR$/Forums/BestCardingWorld" /> <option value="$MODULE_DIR$/Forums/BestCardingWorld" />
<option value="$MODULE_DIR$/Forums/CryptBB" /> <option value="$MODULE_DIR$/Forums/CryptBB" />
<option value="$MODULE_DIR$/MarketPlaces/DarkFox" />
<option value="$MODULE_DIR$/MarketPlaces/Tor2door" />
<option value="$MODULE_DIR$/Forums/OnniForums" /> <option value="$MODULE_DIR$/Forums/OnniForums" />
<option value="$MODULE_DIR$/MarketPlaces/ThiefWorld" /> <option value="$MODULE_DIR$/MarketPlaces/ThiefWorld" />
<option value="$MODULE_DIR$/MarketPlaces/Apocalypse" />
<option value="$MODULE_DIR$/MarketPlaces/DarkMatter" />
<option value="$MODULE_DIR$/MarketPlaces/DigitalThriftShop" />
<option value="$MODULE_DIR$/MarketPlaces/HiddenMarket" />
<option value="$MODULE_DIR$/MarketPlaces/LionMarketplace" />
<option value="$MODULE_DIR$/MarketPlaces/Nexus" />
<option value="$MODULE_DIR$/MarketPlaces/RobinhoodMarket" />
<option value="$MODULE_DIR$/MarketPlaces/TorBay" />
<option value="$MODULE_DIR$/MarketPlaces/TorMarket" />
<option value="$MODULE_DIR$/MarketPlaces/ViceCity" />
<option value="$MODULE_DIR$/Forums/AbyssForum" />
<option value="$MODULE_DIR$/Forums/Altenens" />
<option value="$MODULE_DIR$/Forums/Cardingleaks" />
<option value="$MODULE_DIR$/Forums/HiddenAnswers" />
<option value="$MODULE_DIR$/Forums/Libre" />
<option value="$MODULE_DIR$/Forums/Procrax" />
</list> </list>
</option> </option>
</component> </component>

+ 1
- 1
.idea/misc.xml View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="C:\Users\John Wick\anaconda3" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="C:\Users\calsyslab\anaconda3" project-jdk-type="Python SDK" />
</project> </project>

+ 0
- 1
Forums/DB_Connection/db_connection.py View File

@ -2,7 +2,6 @@ __author__ = 'DarkWeb'
import psycopg2 import psycopg2
import traceback import traceback
import configparser
def connectDataBase(): def connectDataBase():


+ 4
- 8
MarketPlaces/AnonymousMarketplace/crawler_selenium.py View File

@ -187,12 +187,8 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# # carding
# links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/carding/')
# # hacked paypal
# links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacked-paypal-accounts/')
# hacking services
links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacking-services/')
# home
links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/')
return links return links
@ -232,7 +228,7 @@ def crawlForum(driver):
driver.back() driver.back()
# comment out # comment out
break
# break
# comment out # comment out
if count == 1: if count == 1:
@ -240,7 +236,7 @@ def crawlForum(driver):
#left in in case site changes #left in in case site changes
try: try:
link = ""
link = driver.find_element(by=By.LINK_TEXT, value="").get_attribute('href')
if link == "": if link == "":
raise NoSuchElementException raise NoSuchElementException
count += 1 count += 1


+ 17
- 20
MarketPlaces/AnonymousMarketplace/parser.py View File

@ -41,12 +41,12 @@ def anonymousMarketplace_description_parser(soup: Tag):
describe_output += div.text describe_output += div.text
describe = cleanString(describe_output.strip()) describe = cleanString(describe_output.strip())
product_ratings: Tag = soup.find("div", {"class": "star-rating"})
product_ratings: Tag = soup.find("div", {"class": "woocommerce-product-rating"})
product_reviews = product_ratings.find("div", {"class": "woocommerce-product-rating"}).find("strong", {"class": "rating"}).text
product_reviews = product_ratings.find("span", {"class": "rating"}).text
reviews = cleanString(product_reviews.strip()) reviews = cleanString(product_reviews.strip())
product_star_rating = product_ratings.find("span", {"class": "rating"}).text
product_star_rating = product_ratings.find("strong", {"class": "rating"}).text
rating_item = cleanString(product_star_rating.strip()) rating_item = cleanString(product_star_rating.strip())
product_price = soup.find("span", {"class": "woocommerce-Price-amount amount"}).text product_price = soup.find("span", {"class": "woocommerce-Price-amount amount"}).text
@ -86,15 +86,16 @@ def anonymousMarketplace_listing_parser(soup: Tag):
shipFrom = [] # 18 Product_ShippedFrom shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links href = [] # 20 Product_Links
product_list: ResultSet[Tag] = soup.find("ul", {"class": "product_list_widget"}).find_all("li")
woo = soup.find('div', {"class": "woocommerce"})
product_list = woo.find('ul', {"class": "products columns-4"}, recursive=False).find_all('li')
for item in product_list: for item in product_list:
item_href = item.find("a").get("href")
item_href = item.find("a", recursive=False).get("href")
href.append(item_href) href.append(item_href)
item_name = item.find("span", {"class": "product-title"}).text
item_name = item.find("h2").text
name.append(cleanString(item_name.strip())) name.append(cleanString(item_name.strip()))
item_rating = item.find("div", {"class": "star-rating"}).find("strong", {"class": "rating"}).text item_rating = item.find("div", {"class": "star-rating"}).find("strong", {"class": "rating"}).text
@ -103,14 +104,11 @@ def anonymousMarketplace_listing_parser(soup: Tag):
try: try:
item_price = item.find("span", {"class": "woocommerce-Price-amount amount"}).text item_price = item.find("span", {"class": "woocommerce-Price-amount amount"}).text
item_price = item_price.replace("$", "").strip() item_price = item_price.replace("$", "").strip()
USD.append(item_price)
USD.append(cleanNumbers(item_price))
except AttributeError: except AttributeError:
USD.append("-1") USD.append("-1")
vendor.append("Anonymous")
vendor.append("AnonymousMarketplace")
rating_vendor.append("-1") rating_vendor.append("-1")
success.append("-1") success.append("-1")
CVE.append("-1") CVE.append("-1")
@ -153,10 +151,6 @@ def anonymousMarketplace_listing_parser(soup: Tag):
shipTo=shipTo, shipTo=shipTo,
href=href href=href
) )
#called by the crawler to get description links on a listing page #called by the crawler to get description links on a listing page
@ -167,10 +161,13 @@ def anonymous_links_parser(soup):
# Returning all links that should be visited by the Crawler # Returning all links that should be visited by the Crawler
href = [] href = []
listing = soup.find('ul', {"class": "product_list_widget"}).findAll('li')
woo = soup.find('div', {"class": "woocommerce"})
listing = woo.find('ul', {"class": "products columns-4"}, recursive=False).find_all('li')
for a in listing: for a in listing:
bae = a.find('a', href=True)
bae = a.find('a', href=True, recursive=False)
link = bae['href'] link = bae['href']
href.append(link) href.append(link)


+ 12
- 4
MarketPlaces/Apocalypse/crawler_selenium.py View File

@ -203,8 +203,12 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# # Hacking Services
# links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/subcategory/19')
# # Digital Goods
# links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/category/74')
# # Fraud
# links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/category/75')
# # Services
# links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/category/76')
# software and malware # software and malware
links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/subcategory/30') links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/subcategory/30')
@ -243,7 +247,11 @@ def crawlForum(driver):
except: except:
driver.refresh() driver.refresh()
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back()
# driver.back()
try:
driver.get(link)
except:
driver.refresh()
# comment out # comment out
# break # break
@ -282,7 +290,7 @@ def isDescriptionLink(url):
#@param: url of any url crawled #@param: url of any url crawled
#return: true if is a Listing page, false if not #return: true if is a Listing page, false if not
def isListingLink(url): def isListingLink(url):
if 'subcategory' in url:
if 'category' in url:
return True return True
return False return False


+ 0
- 1
MarketPlaces/DB_Connection/db_connection.py View File

@ -2,7 +2,6 @@ __author__ = 'DarkWeb'
import psycopg2 import psycopg2
import traceback import traceback
import configparser
def connectDataBase(): def connectDataBase():


+ 135
- 111
MarketPlaces/DarkBazar/crawler_selenium.py View File

@ -1,4 +1,4 @@
__author__ = 'Helium'
__author__ = 'DarkWeb'
''' '''
DarkBazar Marketplace Crawler (Selenium) DarkBazar Marketplace Crawler (Selenium)
@ -10,6 +10,7 @@ from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.service import Service from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
@ -25,99 +26,11 @@ from MarketPlaces.DarkBazar.parser import darkbazar_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
counter = 1 counter = 1
baseURL = 'http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/log.php'
baseURL = 'http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/'
def login(driver):
input("Press ENTER when CAPTCHA is complete and login page has loaded\n")
# entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="username"]')
# Username here
usernameBox.send_keys('aliciamykeys')
passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="password"]')
# Password here
passwordBox.send_keys('aliciawherearemykey$')
input("Press ENTER when CAPTCHA is completed and you exit the newsletter\n")
# wait for listing page show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//*[@id="submit"]')))
def crawlForum(driver):
print("Crawling the DarkBazar market")
linksToCrawl = getInterestedLinks()
i = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
has_next_page = True
count = 0
while has_next_page:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
list = productPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
driver.back()
# comment out
break
# comment out
if count == 1:
break
try:
link = driver.find_element(by=By.XPATH, value='//a[contains(text(), "Next")]').get_attribute('href')
if link == "":
raise NoSuchElementException
count += 1
except NoSuchElementException:
has_next_page = False
except Exception as e:
print(link, e)
i += 1
input("Crawling DarkBazar forum done sucessfully. Press ENTER to continue\n")
def savePage(page, url):
cleanPage = cleanHTML(page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
return
def getInterestedLinks():
links = []
links.append('http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/cat.php?category=5')
return links
def crawler():
startCrawling()
def startCrawling(): def startCrawling():
opentor()
# opentor()
mktName = getMKTName() mktName = getMKTName()
driver = getAccess() driver = getAccess()
@ -129,7 +42,8 @@ def startCrawling():
print(driver.current_url, e) print(driver.current_url, e)
closetor(driver) closetor(driver)
# new_parse(mktName, baseURL, False)
new_parse(mktName, baseURL, True)
# Opens Tor Browser # Opens Tor Browser
def opentor(): def opentor():
@ -143,14 +57,16 @@ def opentor():
input('Tor Connected. Press ENTER to continue\n') input('Tor Connected. Press ENTER to continue\n')
return return
# Returns the name of the website # Returns the name of the website
def getMKTName(): def getMKTName():
name = 'DarkBazar' name = 'DarkBazar'
return name return name
# Return the base link of the website # Return the base link of the website
def getFixedURL(): def getFixedURL():
url = 'http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/log.php'
url = 'http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/'
return url return url
@ -164,6 +80,7 @@ def closetor(driver):
time.sleep(3) time.sleep(3)
return return
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
@ -179,9 +96,9 @@ def createFFDriver():
ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True) ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
ff_prof.set_preference("signon.rememberSignons", False) ff_prof.set_preference("signon.rememberSignons", False)
ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
ff_prof.set_preference("network.dns.disablePrefetch", True)
ff_prof.set_preference("network.http.sendRefererHeader", 0)
ff_prof.set_preference("permissions.default.image", 2)
# ff_prof.set_preference("network.dns.disablePrefetch", True)
# ff_prof.set_preference("network.http.sendRefererHeader", 0)
ff_prof.set_preference("permissions.default.image", 1)
ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.folderList", 2)
ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain") ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -197,11 +114,10 @@ def createFFDriver():
driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service) driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
driver.maximize_window()
return driver return driver
def productPages(html):
soup = BeautifulSoup(html, "html.parser")
return darkbazar_links_parser(soup)
#the driver 'gets' the url, attempting to get on the site, if it can't access return 'down' #the driver 'gets' the url, attempting to get on the site, if it can't access return 'down'
def getAccess(): def getAccess():
@ -214,10 +130,40 @@ def getAccess():
driver.close() driver.close()
return 'down' return 'down'
def login(driver):
input("Press ENTER when CAPTCHA is complete and login page has loaded\n")
# entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="username"]')
# Username here
usernameBox.send_keys('aliciamykeys')
passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="password"]')
# Password here
passwordBox.send_keys('aliciawherearemykey$')
# session time
session_select = Select(driver.find_element(by=By.XPATH, value='/html/body/main/div/div/div/div/div/form/div[4]/div/div[2]/select'))
session_select.select_by_visible_text('Session 60min')
input("Press ENTER when CAPTCHA is completed and you exit the newsletter\n")
# wait for listing page show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//*[@id="submit"]')))
def savePage(driver, page, url):
cleanPage = cleanHTML(driver, page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
return
def getFullPathName(url): def getFullPathName(url):
from Forums.Initialization.forums_mining import config, CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html') fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
@ -225,6 +171,88 @@ def getFullPathName(url):
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html') fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath
def getForumName() -> str:
name = 'DarkBazar'
return name
def getNameFromURL(url):
global counter
name = ''.join(e for e in url if e.isalnum())
if name == '':
name = str(counter)
counter = counter + 1
return name
def getInterestedLinks():
links = []
# # Digital Goods
# links.append('http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/cat.php?category=4')
# Services
links.append('http://jw5e5sdywqupaqgtt43uq5ysfqpd2vzefl65s2fcjlj4qfxivynv6bqd.onion/cat.php?category=5')
return links
def crawlForum(driver):
print("Crawling the DarkBazar market")
linksToCrawl = getInterestedLinks()
i = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
has_next_page = True
count = 0
while has_next_page:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(driver, html, link)
list = productPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver, driver.page_source, item)
driver.back()
# comment out
# break
# comment out
if count == 1:
break
try:
link = driver.find_element(by=By.XPATH, value='//a[contains(text(), "Next")]').get_attribute('href')
if link == "":
raise NoSuchElementException
count += 1
except NoSuchElementException:
has_next_page = False
except Exception as e:
print(link, e)
i += 1
print("Crawling the DarkBazar market done.")
# Returns 'True' if the link is Topic link, may need to change for every website # Returns 'True' if the link is Topic link, may need to change for every website
def isDescriptionLink(url): def isDescriptionLink(url):
if 'item' in url: if 'item' in url:
@ -238,15 +266,11 @@ def isListingLink(url):
return True return True
return False return False
def getForumName() -> str:
name = 'DarkBazar'
return name
def getNameFromURL(url):
global counter
name = ''.join(e for e in url if e.isalnum())
if name == '':
name = str(counter)
counter = counter + 1
return name
def productPages(html):
soup = BeautifulSoup(html, "html.parser")
return darkbazar_links_parser(soup)
def crawler():
startCrawling()

+ 55
- 52
MarketPlaces/DarkBazar/parser.py View File

@ -1,3 +1,5 @@
__author__ = 'DarkWeb'
# Here, we are importing the auxiliary functions to clean or convert data # Here, we are importing the auxiliary functions to clean or convert data
from MarketPlaces.Utilities.utilities import * from MarketPlaces.Utilities.utilities import *
@ -44,21 +46,23 @@ def darkbazar_description_parser(soup):
vendor = divmb[1].find('a').text.strip() vendor = divmb[1].find('a').text.strip()
# Finding Vendor Rating # Finding Vendor Rating
rating = soup.find('div', {'class': ""}).text
rating = rating.replace("Vendor's Review : ", "")
rating = rating.strip()
temp = soup.find('div', {'class': ""}).text
temp = temp.split('(')
rating = temp[0].replace("Vendor's Review : ", "")
rating = rating.replace("%", "")
rating_vendor = rating.strip()
# Finding the Product Rating and Number of Product Reviews
reviews = temp[2].replace(" review)", "")
reviews = reviews.strip()
# Finding Successful Transactions
success = divmb[3].text
success = success.replace("Level:", "")
success = success.strip()
temp = temp[1].split(")")
rating = temp[1].replace("Product Review : ", "")
rating = rating.replace("%", "")
rating_item = rating.strip()
# Finding Prices # Finding Prices
USD = soup.find('div', {'class': "h3 text - primary"}).text.strip()
# Finding Escrow
escrow = divmb[5].find('span', {'class': "badge badge-danger"}).text.strip()
USD = soup.find('div', {'class': "h3 text-primary"}).text.strip()
# Finding the Product Category # Finding the Product Category
pmb = soup.findAll('p', {'class': "mb-1"}) pmb = soup.findAll('p', {'class': "mb-1"})
@ -78,7 +82,6 @@ def darkbazar_description_parser(soup):
sold = sold.replace("sold", "") sold = sold.replace("sold", "")
sold = sold.strip() sold = sold.strip()
# Finding Shipment Information (Origin) # Finding Shipment Information (Origin)
pmb[0].text pmb[0].text
shipFrom = shipFrom.replace("Ships from: ", "").strip() shipFrom = shipFrom.replace("Ships from: ", "").strip()
@ -91,13 +94,6 @@ def darkbazar_description_parser(soup):
cardbody = soup.findAll('div', {'class': "card-body"}) cardbody = soup.findAll('div', {'class': "card-body"})
describe = cardbody[1].text.strip() describe = cardbody[1].text.strip()
# Finding the Number of Product Reviews
reviews = reviews.find('div', {'class': "product-rating"}).text
reviews = reviews.replace("(", "")
reviews = reviews.replace(" review)", "")
reviews = reviews.strip()
# Searching for CVE and MS categories # Searching for CVE and MS categories
cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}')) cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
if cve: if cve:
@ -117,8 +113,8 @@ def darkbazar_description_parser(soup):
MS = MS.replace('\n', '') MS = MS.replace('\n', '')
# Populating the final variable (this should be a list with all fields scraped) # Populating the final variable (this should be a list with all fields scraped)
row = (name, describe, CVE, MS, review, category, shipFrom, shipTo, left, escrow, vendor,
sold, addDate, BTC, USD, rating, success, EURO)
row = (vendor, rating_vendor, success, name, describe, CVE, MS, category, views, reviews, rating_item, addDate,
BTC, USD, EURO, sold, left, shipFrom, shipTo)
# Sending the results # Sending the results
return row return row
@ -129,30 +125,30 @@ def darkbazar_description_parser(soup):
# @param: soup object looking at html page of listing page # @param: soup object looking at html page of listing page
# return: 'row' that contains a variety of lists that each hold info on the listing page # return: 'row' that contains a variety of lists that each hold info on the listing page
def darkbazar_listing_parser(soup): def darkbazar_listing_parser(soup):
# Fields to be parsed # Fields to be parsed
nm = 0 # Total_Products (Should be Integer)
mktName = "DarkBazar" # 0 Marketplace_Name
name = [] # 1 Product_Name
CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = [] # 3 Product_MS_Classification (Microsoft Security)
category = [] # 4 Product_Category
describe = [] # 5 Product_Description
escrow = [] # 6 Vendor_Warranty
views = [] # 7 Product_Number_Of_Views
reviews = [] # 8 Product_Number_Of_Reviews
addDate = [] # 9 Product_AddDate
lastSeen = [] # 10 Product_LastViewDate
BTC = [] # 11 Product_BTC_SellingPrice
USD = [] # 12 Product_USD_SellingPrice
EURO = [] # 13 Product_EURO_SellingPrice
sold = [] # 14 Product_QuantitySold
qLeft = [] # 15 Product_QuantityLeft
shipFrom = [] # 16 Product_ShippedFrom
shipTo = [] # 17 Product_ShippedTo
vendor = [] # 18 Vendor
rating = [] # 19 Vendor_Rating
success = [] # 20 Vendor_Successful_Transactions
href = [] # 23 Product_Links (Urls)
nm = 0 # *Total_Products (Should be Integer)
mktName = "DarkBazar" # 0 *Marketplace_Name
vendor = [] # 1 *Vendor y
rating_vendor = [] # 2 Vendor_Rating
success = [] # 3 Vendor_Successful_Transactions
name = [] # 4 *Product_Name y
CVE = [] # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about this
MS = [] # 6 Product_MS_Classification (Microsoft Security) dont worry about this
category = [] # 7 Product_Category y
describe = [] # 8 Product_Description
views = [] # 9 Product_Number_Of_Views
reviews = [] # 10 Product_Number_Of_Reviews
rating_item = [] # 11 Product_Rating
addDate = [] # 12 Product_AddDate
BTC = [] # 13 Product_BTC_SellingPrice
USD = [] # 14 Product_USD_SellingPrice y
EURO = [] # 15 Product_EURO_SellingPrice
sold = [] # 16 Product_QuantitySold
qLeft = [] # 17 Product_QuantityLeft
shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links
listing = soup.findAll('div', {"id": "itembox"}) listing = soup.findAll('div', {"id": "itembox"})
@ -194,13 +190,21 @@ def darkbazar_listing_parser(soup):
cat = cat.strip() cat = cat.strip()
category.append(cat) category.append(cat)
# Finding Number Sold and Quantity Left
span = lb[1].findAll("span")
num = span[-1].text
span = lb[0].findAll("span")
# Finding Number of Views
num = span[0].text
num = num.replace("views:", "")
num = num.strip()
sold.append(num)
# Finding Number Sold
num = span[2].text
num = num.replace("Sold:", "") num = num.replace("Sold:", "")
num = num.strip() num = num.strip()
category.append(num)
sold.append(num)
# Finding Quantity Left
quant = span[1].text quant = span[1].text
quant = quant.replace("stock:", "") quant = quant.replace("stock:", "")
quant = quant.strip() quant = quant.strip()
@ -235,8 +239,8 @@ def darkbazar_listing_parser(soup):
MS.append(MSValue) MS.append(MSValue)
# Populate the final variable (this should be a list with all fields scraped) # Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, name, CVE, MS, category, describe, escrow, views, reviews, addDate, lastSeen,
BTC, USD, EURO, qLeft, shipFrom, shipTo, vendor, rating, success, sold, href)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href)
# called by the crawler to get description links on a listing page # called by the crawler to get description links on a listing page
@ -258,7 +262,6 @@ def darkbazar_links_parser(soup):
# Adding the url to the list of urls # Adding the url to the list of urls
link = bae[0].get('href') link = bae[0].get('href')
link = cleanLink(link)
href.append(link) href.append(link)
return href return href

+ 3
- 4
MarketPlaces/DarkMatter/crawler_selenium.py View File

@ -229,17 +229,16 @@ def crawlForum(driver):
for item in list: for item in list:
itemURL = urlparse.urljoin(baseURL, str(item)) itemURL = urlparse.urljoin(baseURL, str(item))
try: try:
time.sleep(1.5) # to keep from detecting click speed
time.sleep(3) # to keep from detecting click speed
driver.get(itemURL) driver.get(itemURL)
except: except:
driver.refresh() driver.refresh()
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
time.sleep(1.5)
time.sleep(3) # to keep from detecting click speed
driver.back() driver.back()
# to keep from detecting click speed
# comment out # comment out
break
# break
# comment out # comment out
if count == 1: if count == 1:


+ 1
- 1
MarketPlaces/DigitalThriftShop/crawler_selenium.py View File

@ -235,7 +235,7 @@ def crawlForum(driver):
driver.back() driver.back()
# comment out # comment out
break
# break
# comment out # comment out
if count == 1: if count == 1:


+ 1
- 1
MarketPlaces/HiddenMarket/crawler_selenium.py View File

@ -277,7 +277,7 @@ def crawlForum(driver):
driver.back() driver.back()
# comment out # comment out
break
# break
# comment out # comment out
if count == 1: if count == 1:


+ 1
- 8
MarketPlaces/Initialization/marketsList.txt View File

@ -1,8 +1 @@
Apocalypse
DarkMatter
DigitalThriftShop
HiddenMarket
Nexus
Robinhood
TorBay
ViceCity
DarkBazar

+ 3
- 0
MarketPlaces/Initialization/markets_mining.py View File

@ -24,6 +24,7 @@ from MarketPlaces.HiddenMarket.crawler_selenium import crawler as crawlerHiddenM
from MarketPlaces.RobinhoodMarket.crawler_selenium import crawler as crawlerRobinhoodMarket from MarketPlaces.RobinhoodMarket.crawler_selenium import crawler as crawlerRobinhoodMarket
from MarketPlaces.Nexus.crawler_selenium import crawler as crawlerNexus from MarketPlaces.Nexus.crawler_selenium import crawler as crawlerNexus
from MarketPlaces.CypherMarketplace.crawler_selenium import crawler as crawlerCypher from MarketPlaces.CypherMarketplace.crawler_selenium import crawler as crawlerCypher
from MarketPlaces.DarkBazar.crawler_selenium import crawler as crawlerDarkBazar
import configparser import configparser
import os import os
@ -137,5 +138,7 @@ if __name__ == '__main__':
crawlerNexus() crawlerNexus()
elif mkt == "CypherMarketplace": elif mkt == "CypherMarketplace":
crawlerCypher() crawlerCypher()
elif mkt == "DarkBazar":
crawlerDarkBazar()
print("\nScraping process completed!") print("\nScraping process completed!")

+ 6
- 1
MarketPlaces/Initialization/prepare_parser.py View File

@ -1,4 +1,4 @@
__author__ = 'Helium'
__author__ = 'DarkWeb'
import glob import glob
import os import os
@ -21,6 +21,7 @@ from MarketPlaces.HiddenMarket.parser import *
from MarketPlaces.RobinhoodMarket.parser import * from MarketPlaces.RobinhoodMarket.parser import *
from MarketPlaces.Nexus.parser import * from MarketPlaces.Nexus.parser import *
from MarketPlaces.MikesGrandStore.parser import * from MarketPlaces.MikesGrandStore.parser import *
from MarketPlaces.DarkBazar.parser import *
from MarketPlaces.Classifier.classify_product import predict from MarketPlaces.Classifier.classify_product import predict
@ -148,6 +149,8 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
rw = nexus_listing_parser(soup) rw = nexus_listing_parser(soup)
elif marketPlace == "MikesGrandStore": elif marketPlace == "MikesGrandStore":
rw = mikesGrandStore_listing_parser(soup) rw = mikesGrandStore_listing_parser(soup)
elif marketPlace == "DarkBazar":
rw = darkbazar_listing_parser(soup)
else: else:
print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!") print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
raise Exception raise Exception
@ -199,6 +202,8 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile):
rmm = nexus_description_parser(soup) rmm = nexus_description_parser(soup)
elif marketPlace == "MikesGrandStore": elif marketPlace == "MikesGrandStore":
rmm = mikesGrandStore_description_parser(soup) rmm = mikesGrandStore_description_parser(soup)
elif marketPlace == "DarkBazar":
rmm = darkbazar_description_parser(soup)
else: else:
print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!") print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!")
raise Exception raise Exception


+ 1
- 1
MarketPlaces/LionMarketplace/crawler_selenium.py View File

@ -234,7 +234,7 @@ def crawlForum(driver):
driver.back() driver.back()
# comment out # comment out
break
# break
# comment out # comment out
if count == 1: if count == 1:


+ 57
- 110
MarketPlaces/LionMarketplace/parser.py View File

@ -34,15 +34,11 @@ def lionmarketplace_description_parser(soup):
shipTo = "-1" # 18 Product_ShippedTo shipTo = "-1" # 18 Product_ShippedTo
# vendor name # vendor name
try:
temp = soup.find('div', {'class': 'btn-group'}).find('a').text
vendor = (cleanString(temp.strip()))
except:
print('vendor')
vendor = "-1"
temp = soup.find('div', {'class': 'btn-group'}).find('a').text
vendor = (cleanString(temp.strip()))
# table with info # table with info
table = soup.find('table', {'class', 'table border-0 text-left table-borderless'})
table = soup.find('table')
rows = table.findAll('tr') rows = table.findAll('tr')
# successful transaction # successful transaction
@ -51,37 +47,20 @@ def lionmarketplace_description_parser(soup):
# vendor rating 5 # vendor rating 5
rating_vendor = '-1' rating_vendor = '-1'
# product name # product name
try:
temp = soup.find('div', {'class', 'row'}).find('h2').text
name = (cleanString(temp.strip()))
except:
name = '-1'
print('product name')
temp = soup.find('div', {'class', 'row'}).find('h2').text
name = (cleanString(temp.strip()))
# product description # product description
try:
temp = soup.find('div', {'class': "mt-4"}).findAll('p')
temp = temp[1].text
if "\n" in temp:
temp = temp.replace("\n", " ")
temp = temp.replace("\r", " ")
describe = cleanString(temp.strip())
except:
describe="-1"
print('describe')
temp = soup.find('div', {'class': "mt-4"}).find(text=True, recursive=False)
describe = cleanString(temp.strip())
CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much CVE = "-1" # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much
MS = "-1" # 6 Product_MS_Classification (Microsoft Security) dont worry about that much MS = "-1" # 6 Product_MS_Classification (Microsoft Security) dont worry about that much
# product category # product category
try:
temp = rows[1].find('strong').text
category = cleanString(temp.strip())
except:
category = "-1"
print('category')
temp = rows[1].find('strong').text
category = cleanString(temp.strip())
# product number of views # product number of views
views = "-1" views = "-1"
@ -92,54 +71,38 @@ def lionmarketplace_description_parser(soup):
BTC = "-1" BTC = "-1"
# USD selling price # USD selling price
try:
temp = rows[2].find('strong').text
if " $" in temp:
temp = temp.replace(" $", "")
elif "$" in temp:
temp = temp.replace("$", "")
USD = cleanString((temp.strip()))
except:
try:
temp = soup.find('li').find('strong').text
if " $" in temp:
temp = temp.replace(" $", "")
elif "$" in temp:
temp = temp.replace("$", "")
USD = cleanString((temp.strip()))
except:
print("USD")
temp = rows[2].find('strong').text
if " $" in temp:
temp = temp.replace(" $", "")
elif "$" in temp:
temp = temp.replace("$", "")
USD = cleanString((temp.strip()))
EURO = "-1" # 14 Product_EURO_SellingPrice EURO = "-1" # 14 Product_EURO_SellingPrice
# product sold # product sold
try:
if (len(rows) <= 5):
temp = rows[4].find('td').text
string = cleanString(temp)
if (string == 'Left/Sold'):
temp = rows[4].findAll('td')
temp = temp[1].findAll('span')
# left
temp2 = temp[1].text
temp3 = temp[1].text
if(" items" in temp2):
temp2 = temp2.replace(" items", "")
if(" items" in temp3):
temp3 = temp3.replace(" items", "")
sold = (cleanString(temp2.strip()))
left = cleanString(temp3.strip())
else:
sold = '-1'
left = "-1"
if (len(rows) <= 5):
temp = rows[4].find('td').text
string = cleanString(temp)
if (string == 'Left/Sold'):
temp = rows[4].findAll('td')
temp = temp[1].findAll('span')
# left
temp2 = temp[1].text
temp3 = temp[1].text
if(" items" in temp2):
temp2 = temp2.replace(" items", "")
if(" items" in temp3):
temp3 = temp3.replace(" items", "")
sold = (cleanString(temp2.strip()))
left = cleanString(temp3.strip())
else: else:
sold = '-1' sold = '-1'
left = "-1" left = "-1"
except:
print("success")
else:
sold = '-1' sold = '-1'
left = "-1" left = "-1"
@ -161,7 +124,7 @@ def lionmarketplace_description_parser(soup):
def lionmarketplace_listing_parser(soup): def lionmarketplace_listing_parser(soup):
# Fields to be parsed # Fields to be parsed
nm = 0 # *Total_Products (Should be Integer) nm = 0 # *Total_Products (Should be Integer)
mktName = "M00nkeyMarket" # 0 *Marketplace_Name
mktName = "LionMarketplace" # 0 *Marketplace_Name
vendor = [] # 1 *Vendor y vendor = [] # 1 *Vendor y
rating_vendor = [] # 2 Vendor_Rating rating_vendor = [] # 2 Vendor_Rating
success = [] # 3 Vendor_Successful_Transactions success = [] # 3 Vendor_Successful_Transactions
@ -183,21 +146,20 @@ def lionmarketplace_listing_parser(soup):
shipTo = [] # 19 Product_ShippedTo shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links href = [] # 20 Product_Links
listing = soup.findAll('div', {"class": "card-body"})
listings = soup.findAll('div', {"class": "col-md-4 my-md-0 my-2 col-12"})
# Populating the Number of Products # Populating the Number of Products
nm = len(listing)
nm = len(listings)
for a in listing:
for listing in listings:
a = listing.find('div', {"class": "card-body"})
row = a.findAll('p') row = a.findAll('p')
# vendor # vendor
try:
temp = row[3].text
vendor.append(cleanString(temp.strip()))
except:
vendor.append("-1")
print('vendor')
temp = row[3].text
temp = temp.replace("Vendor:", "")
vendor.append(cleanString(temp.strip()))
# vendor rating # vendor rating
rating_vendor.append("-1") rating_vendor.append("-1")
@ -206,25 +168,16 @@ def lionmarketplace_listing_parser(soup):
success.append("-1") success.append("-1")
# product name # product name
try:
temp = a.find('a').text
name.append(cleanString(temp.strip()))
except:
name.append("-1")
print('product name')
temp = a.find('a').text
name.append(cleanString(temp.strip()))
CVE.append('-1') CVE.append('-1')
MS.append('-1') MS.append('-1')
# product category # product category
try:
temp = row[2].text
if "Category: " in temp:
temp = temp.replace("Category: ", "")
category.append(cleanString(temp.strip()))
except:
print("Error in product category")
temp = row[2].text
temp = temp.replace("Category: ", "")
category.append(cleanString(temp.strip()))
describe.append('-1') describe.append('-1')
@ -238,14 +191,10 @@ def lionmarketplace_listing_parser(soup):
BTC.append('-1') BTC.append('-1')
# USD # USD
try:
temp = row[0].find('strong').text
if ' $' in temp:
temp = temp.replace(" $", "")
USD.append(cleanString(temp.strip())) # 14 Product_USD_SellingPrice
except:
print("USD")
USD.append("-1")
temp = row[0].find('strong').text
if ' $' in temp:
temp = temp.replace(" $", "")
USD.append(cleanString(temp.strip())) # 14 Product_USD_SellingPrice
EURO.append("-1") # 15 Product_EURO_SellingPrice EURO.append("-1") # 15 Product_EURO_SellingPrice
@ -257,11 +206,8 @@ def lionmarketplace_listing_parser(soup):
shipTo.append('-1') # 19 Product_ShippedTo shipTo.append('-1') # 19 Product_ShippedTo
# href # href
try:
temp = a.find('a').get('href')
href.append(temp)
except:
print('product name')
temp = a.find('a').get('href')
href.append(temp)
# Populate the final variable (this should be a list with all fields scraped) # Populate the final variable (this should be a list with all fields scraped)
return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views, return organizeProducts(mktName, nm, vendor, rating_vendor, success, name, CVE, MS, category, describe, views,
@ -276,9 +222,10 @@ def lionmarketplace_links_parser(soup):
# Returning all links that should be visited by the Crawler # Returning all links that should be visited by the Crawler
href = [] href = []
listing = soup.findAll('div', {"class": "container d-flex justify-content-center"})
listings = soup.findAll('div', {"class": "col-md-4 my-md-0 my-2 col-12"})
for a in listing:
for listing in listings:
a = listing.find('div', {"class": "card-body"})
bae = a.find('a', href=True) bae = a.find('a', href=True)
link = bae['href'] link = bae['href']
href.append(link) href.append(link)


+ 1
- 1
MarketPlaces/M00nkeyMarket/crawler_selenium.py View File

@ -159,7 +159,7 @@ def login(driver):
# wait for listing page show up (This Xpath may need to change based on different seed url) # wait for listing page show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div/div[1]/div/div/div[2]/div[3]/div")))
(By.XPATH, "/html/body/div/div[2]/div/div/div/div/div/div[1]/a/img")))
# Saves the crawled html page, makes the directory path for html pages if not made # Saves the crawled html page, makes the directory path for html pages if not made
def savePage(driver, page, url): def savePage(driver, page, url):


+ 19
- 11
MarketPlaces/Nexus/crawler_selenium.py View File

@ -173,16 +173,24 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# Bot nets
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/botnets/')
# # Rats
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/rats/')
# # Ransomware
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/ransomware/')
# # Other Malware
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/outros-malware/')
# # Hacking Tools & Scripting
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/hacking-spam/ferramentas-de-hacking-scripts/')
# malware
links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/')
# # hacking-spam
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/hacking-spam/')
# # hacking services
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/hacking/')
# # programming services
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/programacao/')
# # remote admin services
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/servicos/administracao-remota/')
# # hacking guides
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-hacking/')
# # malware guides
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-malware/')
# # fraud guides
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/guias-tutoriais/guia-de-fraudes/')
# # fraud software
# links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/fraudes/software-de-fraude/')
return links return links
@ -222,7 +230,7 @@ def crawlForum(driver):
driver.back() driver.back()
# comment out # comment out
break
# break
# comment out # comment out
if count == 1: if count == 1:


+ 3
- 2
MarketPlaces/RobinhoodMarket/crawler_selenium.py View File

@ -218,7 +218,7 @@ def crawlForum(driver):
driver.back() driver.back()
# comment out # comment out
break
# break
# comment out # comment out
if count == 1: if count == 1:
@ -266,5 +266,6 @@ def crawler():
startCrawling() startCrawling()
# print("Crawling and Parsing BestCardingWorld .... DONE!") # print("Crawling and Parsing BestCardingWorld .... DONE!")
if __name__ == '__main__': if __name__ == '__main__':
startCrawling()
startCrawling()

+ 2
- 2
MarketPlaces/ThiefWorld/crawler_selenium.py View File

@ -144,7 +144,7 @@ def getAccess():
def login(driver): def login(driver):
# wait for page to show up (This Xpath may need to change based on different seed url) # wait for page to show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div/div[1]/div/div[1]/div[1]/ul")))
(By.XPATH, "/html/body/div/header/div[2]/div/nav/div[2]/a[1]")))
temp = driver.find_element(By.XPATH, '/html/body/div/header/div[2]/div/nav/div[2]/a[1]').get_attribute( temp = driver.find_element(By.XPATH, '/html/body/div/header/div[2]/div/nav/div[2]/a[1]').get_attribute(
'href') # /html/body/div/div[2]/div/div[2]/div 'href') # /html/body/div/div[2]/div/div[2]/div
@ -242,7 +242,7 @@ def crawlForum(driver):
driver.back() driver.back()
# comment out # comment out
break
# break
# comment out # comment out
if count == 1: if count == 1:


+ 1
- 1
MarketPlaces/TorBay/crawler_selenium.py View File

@ -228,7 +228,7 @@ def crawlForum(driver):
driver.back() driver.back()
# comment out # comment out
break
# break
# comment out # comment out
if count == 1: if count == 1:


+ 7
- 9
MarketPlaces/TorMarket/crawler_selenium.py View File

@ -37,7 +37,6 @@ def startCrawling():
if driver != 'down': if driver != 'down':
try: try:
# login(driver)
crawlForum(driver) crawlForum(driver)
except Exception as e: except Exception as e:
print(driver.current_url, e) print(driver.current_url, e)
@ -101,8 +100,8 @@ def createFFDriver():
ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True) ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
ff_prof.set_preference("signon.rememberSignons", False) ff_prof.set_preference("signon.rememberSignons", False)
ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
ff_prof.set_preference("network.dns.disablePrefetch", True)
ff_prof.set_preference("network.http.sendRefererHeader", 0)
# ff_prof.set_preference("network.dns.disablePrefetch", True)
# ff_prof.set_preference("network.http.sendRefererHeader", 0)
ff_prof.set_preference("permissions.default.image", 1) ff_prof.set_preference("permissions.default.image", 1)
ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.folderList", 2)
ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
@ -186,12 +185,12 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# # Hacking Tutorials
# links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/guides-tutorials/hacking/')
# # Tutorials
# links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/guides-tutorials/')
# Malware # Malware
links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/malware/') links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/malware/')
# # Hacking Services
# links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/services/hacking-services/')
# # Services
# links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/services/')
return links return links
@ -238,8 +237,7 @@ def crawlForum(driver):
break break
try: try:
link = driver.find_element(by=By.XPATH, value=
'/html/body/div[2]/div/div/div[1]/main/nav/ul/li[5]/a').get_attribute('href')
link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
if link == "": if link == "":
raise NoSuchElementException raise NoSuchElementException
count += 1 count += 1


+ 49
- 52
MarketPlaces/TorMarket/parser.py View File

@ -104,61 +104,58 @@ def tormarket_listing_parser(soup):
shipFrom = [] # 18 Product_ShippedFrom shipFrom = [] # 18 Product_ShippedFrom
shipTo = [] # 19 Product_ShippedTo shipTo = [] # 19 Product_ShippedTo
href = [] # 20 Product_Links href = [] # 20 Product_Links
products_list = soup.find_all('li')
nm = 0
products_list = soup.find('ul', {"class": "products columns-3 tablet-columns-2 mobile-columns-1"}).find_all('li')
nm = len(products_list)
for product in products_list: for product in products_list:
# Finding the name of the product
name_of_product = product.find("h2", {"class": "woocommerce-loop-product__title"}).find("a").text
name_of_product_cleaned = cleanString(name_of_product.strip())
# print(name_of_product_cleaned)
name.append(name_of_product_cleaned)
#finding the URL
try: try:
# Finding the name of the product
name_of_product = product.find("h2", {"class": "woocommerce-loop-product__title"}).find("a").text
name_of_product_cleaned = cleanString(name_of_product.strip())
print(name_of_product_cleaned)
name.append(name_of_product_cleaned)
#finding the URL
try:
url = product.find("div", {"class": "product-loop-content text-center"}).find("a").get("href")
print(url)
href.append(url)
except AttributeError as e:
print("I can't find the link")
raise e
#finding the rating of the product
rating_score_of_product = product.find("div", {"class": "product-loop-content text-center"}).find("div").find("span").text
rating_item.append(cleanString(rating_score_of_product.strip()))
print("done")
#finding the rating of the vendors
rating_score_of_vendor = product.find("div", {"class": "wcfmmp-store-rating"}).find("strong").text
rating_vendor.append(cleanString(rating_score_of_vendor.strip()))
print("done")
#finding the cost in USD
cost = product.find("span", {"class": "woocommerce-Price-amount amount"}).text
USD.append(cost)
print("done")
#finding the name of the vendor
vendor_name = product.find("div", {"class": "wcfmmp_sold_by_wrapper"}).find("a").text
vendor.append(cleanString(vendor_name.strip()))
print("done")
#everything else appends a -1
success.append("-1")
CVE.append("-1")
MS.append("-1")
category.append("-1")
describe.append("-1")
views.append("-1")
reviews.append("-1")
addDate.append("-1")
BTC.append("-1")
EURO.append("-1")
sold.append("-1")
qLeft.append("-1")
shipFrom.append("-1")
shipTo.append("-1")
print("Done! moving onto the next product!")
print(len(shipTo))
nm += 1
url = product.find("div", {"class": "product-loop-content text-center"}).find("a").get("href")
# print(url)
href.append(url)
except AttributeError as e: except AttributeError as e:
print("I'm somewhere I don't belong. I'm going to leave")
continue
print("I can't find the link")
raise e
#finding the rating of the product
rating_score_of_product = product.find("div", {"class": "product-loop-content text-center"}).find("div").find("span").text
rating_item.append(cleanString(rating_score_of_product.strip()))
# print("done")
#finding the rating of the vendors
rating_score_of_vendor = product.find("div", {"class": "wcfmmp-store-rating"}).find("strong").text
rating_vendor.append(cleanString(rating_score_of_vendor.strip()))
# print("done")
#finding the cost in USD
cost = product.find("span", {"class": "woocommerce-Price-amount amount"}).text
USD.append(cost)
# print("done")
#finding the name of the vendor
vendor_name = product.find("div", {"class": "wcfmmp_sold_by_wrapper"}).find("a").text
vendor.append(cleanString(vendor_name.strip()))
# print("done")
#everything else appends a -1
success.append("-1")
CVE.append("-1")
MS.append("-1")
category.append("-1")
describe.append("-1")
views.append("-1")
reviews.append("-1")
addDate.append("-1")
BTC.append("-1")
EURO.append("-1")
sold.append("-1")
qLeft.append("-1")
shipFrom.append("-1")
shipTo.append("-1")
# print("Done! moving onto the next product!")
# print(len(shipTo))
# Populate the final variable (this should be a list with all fields scraped) # Populate the final variable (this should be a list with all fields scraped)


+ 1
- 1
MarketPlaces/ViceCity/crawler_selenium.py View File

@ -271,7 +271,7 @@ def crawlForum(driver):
driver.back() driver.back()
# comment out # comment out
break
# break
# comment out # comment out
if count == 1: if count == 1:


Loading…
Cancel
Save