From 19e8d4aa7967caeba5dbfa785269a72f642e68b0 Mon Sep 17 00:00:00 2001
From: westernmeadow <43891839+westernmeadow@users.noreply.github.com>
Date: Wed, 7 Jun 2023 12:33:54 -0700
Subject: [PATCH] added to gitignore

---
 MarketPlaces/DarkFox/crawler_seleniumtest.py | 192 -------------------
 1 file changed, 192 deletions(-)
 delete mode 100644 MarketPlaces/DarkFox/crawler_seleniumtest.py

diff --git a/MarketPlaces/DarkFox/crawler_seleniumtest.py b/MarketPlaces/DarkFox/crawler_seleniumtest.py
deleted file mode 100644
index e183b53..0000000
--- a/MarketPlaces/DarkFox/crawler_seleniumtest.py
+++ /dev/null
@@ -1,192 +0,0 @@
-from selenium import webdriver
-from selenium.common.exceptions import NoSuchElementException
-from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
-from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
-from selenium.webdriver.firefox.service import Service
-import os
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.common.by import By
-from selenium.webdriver.firefox.options import Options
-from PIL import Image
-
-import codecs
-import time
-from datetime import date
-import urllib.parse as urlparse
-import os
-from bs4 import BeautifulSoup
-from MarketPlaces.DarkFox.parser import darkfox_links_parser
-
-file = open('../../path.txt', 'r')
-lines = file.readlines()
-
-# torexe = os.popen(lines[0].strip())  # path for tor.exe
-binary = FirefoxBinary(lines[0].strip())  # full path for firefox.exe
-# options = Options()
-profile = FirefoxProfile(lines[1].strip())  # full path for profile.default
-profile.set_preference('network.proxy.type', 1)
-profile.set_preference('network.proxy.socks', '127.0.0.1')
-profile.set_preference('network.proxy.socks_port', 9150)
-profile.set_preference("network.proxy.socks_remote_dns", True)
-profile.update_preferences()
-service = Service(lines[2].strip())  # full path for geckodriver.exe
-driver = webdriver.Firefox(firefox_binary=binary, firefox_profile=profile,
-                           service=service)
-
-# Manual captcha solver
-def captcha(driver):
-    # wait for captcha page show up
-    WebDriverWait(driver, 100).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div/div/form/button[1]")))
-
-    # save captcha to local
-    driver.find_element(by=By.XPATH, value="/html/body/div/div/form/div[1]/div[1]").screenshot("captcha.png")
-
-    # open method used to open different extension image file
-    im = Image.open(r'C:\Users\CALSysLab\Documents\threatIntelligence-main\DarkWebMining_Working\MarketPlaces\DarkFox\captcha.png')
-
-    # This method will show image in any image viewer
-    im.show()
-
-    # wait until input space show up
-    inputBox = driver.find_element(by=By.XPATH, value="/html/body/div/div/form/div[1]/div[2]/input")
-
-    # ask user input captha solution in terminal
-    userIn = input("Enter solution: ")
-
-    # send user solution into the input space
-    inputBox.send_keys(userIn)
-
-    # click the verify(submit) button
-    driver.find_element(by=By.XPATH, value="/html/body/div/div/form/button[1]").click()
-
-    # wait for listing page show up (This Xpath may need to change based on different seed url)
-    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
-        (By.XPATH, "/html/body/main/div/div/div[2]/div[1]/div[1]/form/div[1]/h1")))
-
-
-# Saves the crawled html page
-def savePage(page, url):
-    filePath = getFullPathName(url)
-    os.makedirs(os.path.dirname(filePath), exist_ok=True)
-    open(filePath, 'wb').write(page)
-    return
-
-
-# Gets the full path of the page to be saved along with its appropriate file name
-def getFullPathName(url):
-    fileName = getNameFromURL(url)
-    if isDescriptionLink(url):
-        fullPath = r'C:\Users\CALSysLab\Documents\threatIntelligence-main\DarkWebMining_Working\MarketPlaces\DarkFox\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
-    else:
-        fullPath = r'C:\Users\CALSysLab\Documents\threatIntelligence-main\DarkWebMining_Working\MarketPlaces\DarkFox\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
-    return fullPath
-
-
-# Creates the name of the file based on URL
-def getNameFromURL(url):
-    global counter
-    name = ''.join(e for e in url if e.isalnum())
-    if (name == ''):
-        name = str(counter)
-        counter = counter + 1
-    return name
-
-
-def getInterestedLinks():
-    links = []
-
-    # Guides and Tutorials
-    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/30739153-1fcd-45cd-b919-072b439c6e06')
-    # Digital Products
-    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/0e384d5f-26ef-4561-b5a3-ff76a88ab781')
-    # Software and Malware
-    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/6b71210f-f1f9-4aa3-8f89-bd9ee28f7afc')
-    # Services
-    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/b9dc5846-5024-421e-92e6-09ba96a03280')
-    # Miscellaneous
-    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/fd1c989b-1a74-4dc0-92b0-67d8c1c487cb')
-    # Hosting and Security
-    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/5233fd6a-72e6-466d-b108-5cc61091cd14')
-
-    return links
-
-
-def isDescriptionLink(url):
-    if 'product' in url:
-        return True
-    return False
-
-
-def isListingLink(url):
-    if 'category' in url:
-        return True
-    return False
-
-
-def productPages(html):
-    soup = BeautifulSoup(html, "html.parser")
-    return darkfox_links_parser(soup)
-
-
-def isSignOut(url):
-    #absURL = urlparse.urljoin(url.base_url, url.url)
-    if 'signout' in url.lower() or 'logout' in url.lower():
-        return True
-
-    return False
-
-
-# dark fox seed url
-baseurl = 'http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion'
-driver.get(baseurl)
-captcha(driver)
-
-# visited = set()
-# visited.add(br.geturl())
-linksToCrawl = getInterestedLinks()
-initialTime = time.time()
-
-i = 0
-while i < len(linksToCrawl):
-    link = linksToCrawl[i]
-    print('Crawling :', link)
-    try:
-        driver.get(link)
-        html = driver.page_source.encode('utf-8', link)
-        savePage(html, link)
-        '''
-        has_next_page = True
-        while has_next_page:
-            j = 0
-            list = productPages(html)
-            for item in list:
-                if j == 1:
-                    break
-                itemURL = str(item)
-                driver.get(itemURL)
-                savePage(driver.page_source.encode('utf-8'), item)
-                driver.back()
-                j += 1
-
-            try:
-                link = driver.find_element(by=By.XPATH, value=
-                    '/html/body/main/div/div[2]/div/div[2]/div/div/div/nav/a[2]').get_attribute('href')
-                driver.get(link)
-                html = driver.page_source.encode('utf-8', link)
-                savePage(html, link)
-            except NoSuchElementException:
-                has_next_page = False
-        '''
-    except Exception as e:
-        print(link, e.message)
-    i += 1
-
-# finalTime = time.time()
-# print finalTime - initialTime
-
-input("Crawling DarkFox marketplace done sucessfully. Press ENTER to continue\n")
\ No newline at end of file