this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

130 lines
3.3 KiB

__author__ = 'DarkWeb'
'''
Starting point of the Darkweb Mining Platform
'''
import os
from datetime import *
from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld
from Forums.CryptBB.crawler_selenium import crawler as crawlerCryptBB
from Forums.OnniForums.crawler_selenium import crawler as crawlerOnniForums
from Forums.AbyssForum.crawler_selenium import crawler as crawlerAbyssForum
from Forums.Altenens.crawler_selenium import crawler as crawlerAltenensForum
from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
import time
CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
# reads list of marketplaces manually inputted
def getForums():
forums = []
with open('forumsList.txt') as f:
forums = f.readlines()
return forums
# Creates needed directories for marketplace if doesn't exist
def createDirectory(forum):
# Package should already be there, holding crawler and parser
if forum == 'Reddits':
pagesMainDir = '../' + forum
else:
pagesMainDir = '../' + forum + "/HTML_Pages"
if not os.path.isdir(pagesMainDir):
os.makedirs(pagesMainDir)
if forum == 'Reddits':
createRedditsSubdirectories(pagesMainDir)
else:
createSubdirectories(pagesMainDir)
def createRedditsSubdirectories(pagesMainDir):
with open('../Reddits/redditsList.txt', 'r') as f:
reddits = f.readlines()
for reddit in reddits:
reddit = reddit.strip('\n')
redditMainDir = pagesMainDir + '/' + reddit + '/HTML_Pages'
if not os.path.isdir(redditMainDir):
os.mkdir(redditMainDir)
# Create inner time folders
createSubdirectories(redditMainDir)
def createSubdirectories(pagesDir):
currentDateDir = pagesDir + '/' + CURRENT_DATE
if not os.path.isdir(currentDateDir):
os.mkdir(currentDateDir)
listingDir = currentDateDir + '/Listing'
if not os.path.isdir(listingDir):
os.mkdir(listingDir)
listReadDir = listingDir + '/Read'
if not os.path.isdir(listReadDir):
os.mkdir(listReadDir)
descriptionDir = currentDateDir + '/Description'
if not os.path.isdir(descriptionDir):
os.mkdir(descriptionDir)
descReadDir = descriptionDir + '/Read'
if not os.path.isdir(descReadDir):
os.mkdir(descReadDir)
# main method
if __name__ == '__main__':
# assignment from forumsList.txt
forumsList = getForums()
# get forum from forumsList
for forum in forumsList:
forum = forum.replace('\n','')
print("Creating listing and description directories ... for " + forum)
createDirectory(forum)
time.sleep(5) # wait for directories to be created
input("Directories created successfully. Press ENTER to continue\n")
if forum == "BestCardingWorld":
crawlerBestCardingWorld()
# elif forum == "CryptBB":
# crawlerCryptBB()
elif forum == "OnniForums":
crawlerOnniForums()
# elif forum == "CrackingPro":
# crawlerCrackingPro()
elif forum == "AbyssForum":
crawlerAbyssForum()
elif forum == "HiddenAnswers":
crawlerHiddenAnswers()
print("Scraping process completed successfully!")