|
|
@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse |
|
|
|
from MarketPlaces.TorBay.parser import torbay_links_parser |
|
|
|
from MarketPlaces.Utilities.utilities import cleanHTML |
|
|
|
|
|
|
|
config = configparser.ConfigParser() |
|
|
|
config.read('../../setup.ini') |
|
|
|
counter = 1 |
|
|
|
baseURL = 'http://torbay3253zck4ym5cbowwvrbfjjzruzthrx3np5y6owvifrnhy5ybid.onion/' |
|
|
|
|
|
|
@ -52,6 +50,8 @@ def startCrawling(): |
|
|
|
# Opens Tor Browser |
|
|
|
#prompts for ENTER input to continue |
|
|
|
def opentor(): |
|
|
|
from MarketPlaces.Initialization.markets_mining import config |
|
|
|
|
|
|
|
global pid |
|
|
|
print("Connecting Tor...") |
|
|
|
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) |
|
|
@ -90,6 +90,8 @@ def closetor(driver): |
|
|
|
# Creates FireFox 'driver' and configure its 'Profile' |
|
|
|
# to use Tor proxy and socket |
|
|
|
def createFFDriver(): |
|
|
|
from MarketPlaces.Initialization.markets_mining import config |
|
|
|
|
|
|
|
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) |
|
|
|
|
|
|
|
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) |
|
|
@ -154,15 +156,14 @@ def savePage(page, url): |
|
|
|
# Gets the full path of the page to be saved along with its appropriate file name |
|
|
|
#@param: raw url as crawler crawls through every site |
|
|
|
def getFullPathName(url): |
|
|
|
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE |
|
|
|
|
|
|
|
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages") |
|
|
|
fileName = getNameFromURL(url) |
|
|
|
if isDescriptionLink(url): |
|
|
|
fullPath = r'..\TorBay\HTML_Pages\\' + str( |
|
|
|
"%02d" % date.today().month) + str("%02d" % date.today().day) + str( |
|
|
|
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html' |
|
|
|
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html') |
|
|
|
else: |
|
|
|
fullPath = r'..\TorBay\HTML_Pages\\' + str( |
|
|
|
"%02d" % date.today().month) + str("%02d" % date.today().day) + str( |
|
|
|
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html' |
|
|
|
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html') |
|
|
|
return fullPath |
|
|
|
|
|
|
|
|
|
|
|