this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

146 lines
4.1 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. '''
  3. Starting point of the Darkweb Forums Mining
  4. '''
  5. from datetime import *
  6. from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld
  7. from Forums.CryptBB.crawler_selenium import crawler as crawlerCryptBB
  8. from Forums.OnniForums.crawler_selenium import crawler as crawlerOnniForums
  9. from Forums.AbyssForum.crawler_selenium import crawler as crawlerAbyssForum
  10. from Forums.Procrax.crawler_selenium import crawler as crawlerProcraxForum
  11. from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
  12. from Forums.Cardingleaks.crawler_selenium import crawler as crawlerCardingleaks
  13. from Forums.Altenens.crawler_selenium import crawler as crawlerAltenens
  14. from Forums.Libre.crawler_selenium import crawler as crawlerLibre
  15. import configparser
  16. import os
  17. import subprocess
  18. config = configparser.ConfigParser()
  19. config.read('../../setup.ini')
  20. CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
  21. # reads list of marketplaces manually inputted
  22. def getForums():
  23. forums = []
  24. with open('forumsList.txt') as f:
  25. forums = f.readlines()
  26. return forums
  27. # Creates needed directories for marketplace if doesn't exist
  28. def createDirectory(forum):
  29. # Package should already be there, holding crawler and parser
  30. if forum == 'Reddits':
  31. pagesMainDir = '../' + forum
  32. else:
  33. # pagesMainDir = '../' + forum + "/HTML_Pages"
  34. pagesMainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages")
  35. if not os.path.isdir(pagesMainDir):
  36. os.makedirs(pagesMainDir)
  37. if forum == 'Reddits':
  38. createRedditsSubdirectories(pagesMainDir)
  39. else:
  40. createSubdirectories(pagesMainDir)
  41. def createRedditsSubdirectories(pagesMainDir):
  42. with open('../Reddits/redditsList.txt', 'r') as f:
  43. reddits = f.readlines()
  44. for reddit in reddits:
  45. reddit = reddit.strip('\n')
  46. redditMainDir = pagesMainDir + '/' + reddit + '/HTML_Pages'
  47. if not os.path.isdir(redditMainDir):
  48. os.mkdir(redditMainDir)
  49. # Create inner time folders
  50. createSubdirectories(redditMainDir)
  51. def createSubdirectories(pagesDir):
  52. currentDateDir = pagesDir + '/' + CURRENT_DATE
  53. if not os.path.isdir(currentDateDir):
  54. os.mkdir(currentDateDir)
  55. listingDir = currentDateDir + '/Listing'
  56. if not os.path.isdir(listingDir):
  57. os.mkdir(listingDir)
  58. listReadDir = listingDir + '/Read'
  59. if not os.path.isdir(listReadDir):
  60. os.mkdir(listReadDir)
  61. descriptionDir = currentDateDir + '/Description'
  62. if not os.path.isdir(descriptionDir):
  63. os.mkdir(descriptionDir)
  64. descReadDir = descriptionDir + '/Read'
  65. if not os.path.isdir(descReadDir):
  66. os.mkdir(descReadDir)
  67. # Opens Tor Browser
  68. def opentor():
  69. global pid
  70. print("Connecting Tor...")
  71. pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
  72. pid = pro.pid
  73. # time.sleep(7.5)
  74. input('Press ENTER when Tor is connected to continue')
  75. return
  76. # main method
  77. if __name__ == '__main__':
  78. # opentor()
  79. # assignment from forumsList.txt
  80. forumsList = getForums()
  81. # get forum from forumsList
  82. for forum in forumsList:
  83. forum = forum.replace('\n','')
  84. print("\nCreating listing and description directories ... for " + forum)
  85. createDirectory(forum)
  86. # time.sleep(5) # wait for directories to be created
  87. print("Directories created.")
  88. if forum == "BestCardingWorld":
  89. crawlerBestCardingWorld()
  90. elif forum == "CryptBB":
  91. crawlerCryptBB()
  92. elif forum == "OnniForums":
  93. crawlerOnniForums()
  94. elif forum == "AbyssForum":
  95. crawlerAbyssForum()
  96. elif forum == "HiddenAnswers":
  97. crawlerHiddenAnswers()
  98. elif forum == 'Procrax':
  99. crawlerProcraxForum()
  100. elif forum == 'Cardingleaks':
  101. crawlerCardingleaks()
  102. elif forum == 'Altenens':
  103. crawlerAltenens()
  104. elif forum == 'Libre':
  105. crawlerLibre()
  106. print("\nScraping process completed!")