From 30b1ab8bda9fa42021e588f26ac52d3e8e28fa47 Mon Sep 17 00:00:00 2001 From: westernmeadow Date: Thu, 29 Jun 2023 12:57:45 -0700 Subject: [PATCH] save html to shared folder rest of markets: tor2door, torbay, and tormarket --- MarketPlaces/Tor2door/crawler_selenium.py | 14 +++++++++----- MarketPlaces/TorBay/crawler_selenium.py | 17 +++++++++-------- MarketPlaces/TorMarket/crawler_selenium.py | 14 +++++++++----- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/MarketPlaces/Tor2door/crawler_selenium.py b/MarketPlaces/Tor2door/crawler_selenium.py index baef719..b0c5a6b 100644 --- a/MarketPlaces/Tor2door/crawler_selenium.py +++ b/MarketPlaces/Tor2door/crawler_selenium.py @@ -23,8 +23,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse from MarketPlaces.Tor2door.parser import tor2door_links_parser from MarketPlaces.Utilities.utilities import cleanHTML -config = configparser.ConfigParser() -config.read('../../setup.ini') counter = 1 baseURL = 'http://yzrrne3pveltulbavydr2kiashvlnysdwclwmklo6cyjuqpxi7ku4xqd.onion' @@ -48,6 +46,8 @@ def startCrawling(): # Opens Tor Browser def opentor(): + from MarketPlaces.Initialization.markets_mining import config + global pid print("Connecting Tor...") pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) @@ -131,6 +131,8 @@ def closetor(driver): # Creates FireFox 'driver' and configure its 'Profile' # to use Tor proxy and socket def createFFDriver(): + from MarketPlaces.Initialization.markets_mining import config + ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) @@ -184,12 +186,14 @@ def savePage(page, url): # Gets the full path of the page to be saved along with its appropriate file name def getFullPathName(url): - from MarketPlaces.Initialization.markets_mining import CURRENT_DATE + from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE + + mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages") fileName = getNameFromURL(url) if isDescriptionLink(url): - fullPath = r'..\Tor2door\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html' + fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html') else: - fullPath = r'..\Tor2door\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html' + fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html') return fullPath diff --git a/MarketPlaces/TorBay/crawler_selenium.py b/MarketPlaces/TorBay/crawler_selenium.py index 3c0619e..cadc72c 100644 --- a/MarketPlaces/TorBay/crawler_selenium.py +++ b/MarketPlaces/TorBay/crawler_selenium.py @@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse from MarketPlaces.TorBay.parser import torbay_links_parser from MarketPlaces.Utilities.utilities import cleanHTML -config = configparser.ConfigParser() -config.read('../../setup.ini') counter = 1 baseURL = 'http://torbay3253zck4ym5cbowwvrbfjjzruzthrx3np5y6owvifrnhy5ybid.onion/' @@ -52,6 +50,8 @@ def startCrawling(): # Opens Tor Browser #prompts for ENTER input to continue def opentor(): + from MarketPlaces.Initialization.markets_mining import config + global pid print("Connecting Tor...") pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) @@ -90,6 +90,8 @@ def closetor(driver): # Creates FireFox 'driver' and configure its 'Profile' # to use Tor proxy and socket def createFFDriver(): + from MarketPlaces.Initialization.markets_mining import config + ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) @@ -154,15 +156,14 @@ def savePage(page, url): # Gets the full path of the page to be saved along with its appropriate file name #@param: raw url as crawler crawls through every site def getFullPathName(url): + from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE + + mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages") fileName = getNameFromURL(url) if isDescriptionLink(url): - fullPath = r'..\TorBay\HTML_Pages\\' + str( - "%02d" % date.today().month) + str("%02d" % date.today().day) + str( - "%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html' + fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html') else: - fullPath = r'..\TorBay\HTML_Pages\\' + str( - "%02d" % date.today().month) + str("%02d" % date.today().day) + str( - "%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html' + fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html') return fullPath diff --git a/MarketPlaces/TorMarket/crawler_selenium.py b/MarketPlaces/TorMarket/crawler_selenium.py index 91c2f84..35be864 100644 --- a/MarketPlaces/TorMarket/crawler_selenium.py +++ b/MarketPlaces/TorMarket/crawler_selenium.py @@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse from MarketPlaces.TorMarket.parser import tormarket_links_parser from MarketPlaces.Utilities.utilities import cleanHTML -config = configparser.ConfigParser() -config.read('../../setup.ini') counter = 1 baseURL = 'http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/' @@ -51,6 +49,8 @@ def startCrawling(): # Opens Tor Browser #prompts for ENTER input to continue def opentor(): + from MarketPlaces.Initialization.markets_mining import config + global pid print("Connecting Tor...") pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) @@ -89,6 +89,8 @@ def closetor(driver): # Creates FireFox 'driver' and configure its 'Profile' # to use Tor proxy and socket def createFFDriver(): + from MarketPlaces.Initialization.markets_mining import config + ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) @@ -153,12 +155,14 @@ def savePage(page, url): # Gets the full path of the page to be saved along with its appropriate file name #@param: raw url as crawler crawls through every site def getFullPathName(url): - from MarketPlaces.Initialization.markets_mining import CURRENT_DATE + from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE + + mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages") fileName = getNameFromURL(url) if isDescriptionLink(url): - fullPath = r'..\TorMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html' + fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html') else: - fullPath = r'..\TorMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html' + fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html') return fullPath