|
|
@ -14,6 +14,8 @@ from selenium.webdriver.support import expected_conditions as EC |
|
|
|
from selenium.webdriver.support.ui import WebDriverWait |
|
|
|
from PIL import Image |
|
|
|
|
|
|
|
import codecs |
|
|
|
import socks, socket, time |
|
|
|
import urllib.parse as urlparse |
|
|
|
import os, re, time |
|
|
|
from datetime import date |
|
|
@ -22,6 +24,7 @@ from bs4 import BeautifulSoup |
|
|
|
from MarketPlaces.Initialization.prepare_parser import new_parse |
|
|
|
from MarketPlaces.WeTheNorth.parser import wethenorth_links_parser |
|
|
|
from MarketPlaces.Utilities.utilities import cleanHTML |
|
|
|
import selenium |
|
|
|
|
|
|
|
counter = 1 |
|
|
|
baseURL = 'http://hn2paw7zaahbikbejiv6h22zwtijlam65y2c77xj2ypbilm2xs4bnbid.onion' |
|
|
@ -40,25 +43,24 @@ def startCrawling(): |
|
|
|
print(driver.current_url, e) |
|
|
|
closeDriver(driver) |
|
|
|
|
|
|
|
new_parse(marketName, False) |
|
|
|
new_parse(marketName, baseURL, True) |
|
|
|
|
|
|
|
|
|
|
|
# Login using premade account credentials and do login captcha manually |
|
|
|
def login(driver): |
|
|
|
time.sleep(3) |
|
|
|
#wait for login page |
|
|
|
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( |
|
|
|
(By.XPATH, "/html/body/div/div[2]/div[2]/div/div[3]/form/div[1]/input"))) |
|
|
|
input("Press ENTER when CAPTCHA is completed\n") |
|
|
|
|
|
|
|
#entering username and password into input boxes |
|
|
|
usernameBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div[2]/div/div[3]/form/div[1]/input') |
|
|
|
usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="login"]') |
|
|
|
#Username here |
|
|
|
usernameBox.send_keys('blabri') |
|
|
|
passwordBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div[2]/div/div[3]/form/div[2]/input') |
|
|
|
passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="pass"]') |
|
|
|
#Password here |
|
|
|
passwordBox.send_keys('fishowal') |
|
|
|
|
|
|
|
''' |
|
|
|
|
|
|
|
# wait for captcha page show up |
|
|
|
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( |
|
|
|
(By.XPATH, "/html/body/div/div[2]/div[2]/div/div[3]/form/div[3]/div/img"))) |
|
|
@ -83,27 +85,27 @@ def login(driver): |
|
|
|
|
|
|
|
# click the verify(submit) button |
|
|
|
driver.find_element(by=By.XPATH, value="/html/body/div/div[2]/div[2]/div/div[3]/form/div[5]/input").click() |
|
|
|
''' |
|
|
|
|
|
|
|
input("Press ENTER when CAPTCHA is completed\n") |
|
|
|
|
|
|
|
# wait for listing page show up (This Xpath may need to change based on different seed url) |
|
|
|
WebDriverWait(driver, 50).until(EC.visibility_of_element_located( |
|
|
|
(By.XPATH, '//*[@id="information"]'))) |
|
|
|
|
|
|
|
|
|
|
|
# Returns the name of the website |
|
|
|
def getMarketName(): |
|
|
|
name = 'WeTheNorth' |
|
|
|
return name |
|
|
|
|
|
|
|
def getMKTName() -> str: |
|
|
|
name = 'WeTheNorth' |
|
|
|
return name |
|
|
|
|
|
|
|
# Return the link of the website |
|
|
|
def getFixedURL(): |
|
|
|
url = 'http://hn2paw7zaahbikbejiv6h22zwtijlam65y2c77xj2ypbilm2xs4bnbid.onion' |
|
|
|
|
|
|
|
return url |
|
|
|
|
|
|
|
|
|
|
|
# Closes Tor Browser |
|
|
|
def closeDriver(driver): |
|
|
|
# global pid |
|
|
@ -123,13 +125,13 @@ def createFFDriver(): |
|
|
|
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) |
|
|
|
|
|
|
|
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) |
|
|
|
# ff_prof.set_preference("places.history.enabled", False) |
|
|
|
# ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True) |
|
|
|
# ff_prof.set_preference("privacy.clearOnShutdown.passwords", True) |
|
|
|
# ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True) |
|
|
|
# ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True) |
|
|
|
# ff_prof.set_preference("signon.rememberSignons", False) |
|
|
|
# ff_prof.set_preference("network.cookie.lifetimePolicy", 2) |
|
|
|
ff_prof.set_preference("places.history.enabled", False) |
|
|
|
ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True) |
|
|
|
ff_prof.set_preference("privacy.clearOnShutdown.passwords", True) |
|
|
|
ff_prof.set_preference("privacy.clearOnShutdown.siteSettings", True) |
|
|
|
ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True) |
|
|
|
ff_prof.set_preference("signon.rememberSignons", False) |
|
|
|
ff_prof.set_preference("network.cookie.lifetimePolicy", 2) |
|
|
|
# ff_prof.set_preference("network.dns.disablePrefetch", True) |
|
|
|
# ff_prof.set_preference("network.http.sendRefererHeader", 0) |
|
|
|
ff_prof.set_preference("permissions.default.image", 3) |
|
|
@ -206,7 +208,6 @@ def getInterestedLinks(): |
|
|
|
# Software and Malware |
|
|
|
links.append('http://hn2paw7zaahbikbejiv6h22zwtijlam65y2c77xj2ypbilm2xs4bnbid.onion/items.php?category=10') |
|
|
|
|
|
|
|
|
|
|
|
return links |
|
|
|
|
|
|
|
|
|
|
@ -243,12 +244,6 @@ def crawlForum(driver): |
|
|
|
savePage(driver, driver.page_source, item) |
|
|
|
driver.back() |
|
|
|
|
|
|
|
# comment out |
|
|
|
break |
|
|
|
|
|
|
|
# comment out |
|
|
|
if count == 1: |
|
|
|
break |
|
|
|
|
|
|
|
try: |
|
|
|
nav = driver.find_element(by=By.XPATH, value= |
|
|
|