__author__ = 'DarkWeb' ''' Starting point of the Darkweb Forums Mining ''' from datetime import * from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld import configparser import os import subprocess config = configparser.ConfigParser() config.read('../../setup.ini') CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year) # reads list of marketplaces manually inputted def getForums(): forums = [] with open('forumsList.txt') as f: forums = f.readlines() return forums # Creates needed directories for marketplace if doesn't exist def createDirectory(forum): # Package should already be there, holding crawler and parser if forum == 'Reddits': pagesMainDir = '../' + forum else: # pagesMainDir = '../' + forum + "/HTML_Pages" pagesMainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages") if not os.path.isdir(pagesMainDir): os.makedirs(pagesMainDir) if forum == 'Reddits': createRedditsSubdirectories(pagesMainDir) else: createSubdirectories(pagesMainDir) def createRedditsSubdirectories(pagesMainDir): with open('../Reddits/redditsList.txt', 'r') as f: reddits = f.readlines() for reddit in reddits: reddit = reddit.strip('\n') redditMainDir = pagesMainDir + '/' + reddit + '/HTML_Pages' if not os.path.isdir(redditMainDir): os.mkdir(redditMainDir) # Create inner time folders createSubdirectories(redditMainDir) def createSubdirectories(pagesDir): currentDateDir = pagesDir + '/' + CURRENT_DATE if not os.path.isdir(currentDateDir): os.mkdir(currentDateDir) listingDir = currentDateDir + '/Listing' if not os.path.isdir(listingDir): os.mkdir(listingDir) listReadDir = listingDir + '/Read' if not os.path.isdir(listReadDir): os.mkdir(listReadDir) descriptionDir = currentDateDir + '/Description' if not os.path.isdir(descriptionDir): os.mkdir(descriptionDir) descReadDir = descriptionDir + '/Read' if not os.path.isdir(descReadDir): os.mkdir(descReadDir) # Opens Tor Browser def opentor(): global pid print("Connecting Tor...") pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pid = pro.pid # time.sleep(7.5) input('Press ENTER when Tor is connected to continue') return # main method if __name__ == '__main__': opentor() # assignment from forumsList.txt forumsList = getForums() # get forum from forumsList for forum in forumsList: forum = forum.replace('\n','') print("\nCreating listing and description directories ... for " + forum) createDirectory(forum) # time.sleep(5) # wait for directories to be created print("Directories created.") if forum == "BestCardingWorld": crawlerBestCardingWorld() print("\nScraping process completed!")