this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

130 lines
3.5 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. '''
  3. Starting point of the Darkweb Mining Platform
  4. '''
  5. import os
  6. from datetime import *
  7. from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld
  8. from Forums.CryptBB.crawler_selenium import crawler as crawlerCryptBB
  9. from Forums.OnniForums.crawler_selenium import crawler as crawlerOnniForums
  10. from Forums.AbyssForum.crawler_selenium import crawler as crawlerAbyssForum
  11. from Forums.Altenens.crawler_selenium import crawler as crawlerAltenensForum
  12. from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
  13. import configparser
  14. import time
  15. config = configparser.ConfigParser()
  16. config.read('../../setup.ini')
  17. CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
  18. # reads list of marketplaces manually inputted
  19. def getForums():
  20. forums = []
  21. with open('forumsList.txt') as f:
  22. forums = f.readlines()
  23. return forums
  24. # Creates needed directories for marketplace if doesn't exist
  25. def createDirectory(forum):
  26. # Package should already be there, holding crawler and parser
  27. if forum == 'Reddits':
  28. pagesMainDir = '../' + forum
  29. else:
  30. # pagesMainDir = '../' + forum + "/HTML_Pages"
  31. pagesMainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages")
  32. if not os.path.isdir(pagesMainDir):
  33. os.makedirs(pagesMainDir)
  34. if forum == 'Reddits':
  35. createRedditsSubdirectories(pagesMainDir)
  36. else:
  37. createSubdirectories(pagesMainDir)
  38. def createRedditsSubdirectories(pagesMainDir):
  39. with open('../Reddits/redditsList.txt', 'r') as f:
  40. reddits = f.readlines()
  41. for reddit in reddits:
  42. reddit = reddit.strip('\n')
  43. redditMainDir = pagesMainDir + '/' + reddit + '/HTML_Pages'
  44. if not os.path.isdir(redditMainDir):
  45. os.mkdir(redditMainDir)
  46. # Create inner time folders
  47. createSubdirectories(redditMainDir)
  48. def createSubdirectories(pagesDir):
  49. currentDateDir = pagesDir + '/' + CURRENT_DATE
  50. if not os.path.isdir(currentDateDir):
  51. os.mkdir(currentDateDir)
  52. listingDir = currentDateDir + '/Listing'
  53. if not os.path.isdir(listingDir):
  54. os.mkdir(listingDir)
  55. listReadDir = listingDir + '/Read'
  56. if not os.path.isdir(listReadDir):
  57. os.mkdir(listReadDir)
  58. descriptionDir = currentDateDir + '/Description'
  59. if not os.path.isdir(descriptionDir):
  60. os.mkdir(descriptionDir)
  61. descReadDir = descriptionDir + '/Read'
  62. if not os.path.isdir(descReadDir):
  63. os.mkdir(descReadDir)
  64. # main method
  65. if __name__ == '__main__':
  66. # assignment from forumsList.txt
  67. forumsList = getForums()
  68. # get forum from forumsList
  69. for forum in forumsList:
  70. forum = forum.replace('\n','')
  71. print("Creating listing and description directories ... for " + forum)
  72. createDirectory(forum)
  73. time.sleep(5) # wait for directories to be created
  74. input("Directories created successfully. Press ENTER to continue\n")
  75. if forum == "BestCardingWorld":
  76. crawlerBestCardingWorld()
  77. elif forum == "CryptBB":
  78. crawlerCryptBB()
  79. elif forum == "OnniForums":
  80. crawlerOnniForums()
  81. elif forum == "AbyssForum":
  82. crawlerAbyssForum()
  83. elif forum == "HiddenAnswers":
  84. crawlerHiddenAnswers()
  85. elif forum == "Altenens":
  86. crawlerAltenensForum()
  87. print("Scraping process completed successfully!")