this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

133 lines
3.9 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. '''
  3. Starting point of the Darkweb Forums Mining
  4. '''
  5. import os
  6. from datetime import *
  7. from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld
  8. from Forums.CryptBB.crawler_selenium import crawler as crawlerCryptBB
  9. from Forums.OnniForums.crawler_selenium import crawler as crawlerOnniForums
  10. from Forums.AbyssForum.crawler_selenium import crawler as crawlerAbyssForum
  11. from Forums.Procrax.crawler_selenium import crawler as crawlerProcraxForum
  12. from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
  13. from Forums.Cardingleaks.crawler_selenium import crawler as crawlerCardingleaks
  14. from Forums.Altenens.crawler_selenium import crawler as crawlerAltenens
  15. from Forums.Libre.crawler_selenium import crawler as crawlerLibre
  16. import configparser
  17. import time
  18. config = configparser.ConfigParser()
  19. config.read('../../setup.ini')
  20. CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
  21. # reads list of marketplaces manually inputted
  22. def getForums():
  23. forums = []
  24. with open('forumsList.txt') as f:
  25. forums = f.readlines()
  26. return forums
  27. # Creates needed directories for marketplace if doesn't exist
  28. def createDirectory(forum):
  29. # Package should already be there, holding crawler and parser
  30. if forum == 'Reddits':
  31. pagesMainDir = '../' + forum
  32. else:
  33. # pagesMainDir = '../' + forum + "/HTML_Pages"
  34. pagesMainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages")
  35. if not os.path.isdir(pagesMainDir):
  36. os.makedirs(pagesMainDir)
  37. if forum == 'Reddits':
  38. createRedditsSubdirectories(pagesMainDir)
  39. else:
  40. createSubdirectories(pagesMainDir)
  41. def createRedditsSubdirectories(pagesMainDir):
  42. with open('../Reddits/redditsList.txt', 'r') as f:
  43. reddits = f.readlines()
  44. for reddit in reddits:
  45. reddit = reddit.strip('\n')
  46. redditMainDir = pagesMainDir + '/' + reddit + '/HTML_Pages'
  47. if not os.path.isdir(redditMainDir):
  48. os.mkdir(redditMainDir)
  49. # Create inner time folders
  50. createSubdirectories(redditMainDir)
  51. def createSubdirectories(pagesDir):
  52. currentDateDir = pagesDir + '/' + CURRENT_DATE
  53. if not os.path.isdir(currentDateDir):
  54. os.mkdir(currentDateDir)
  55. listingDir = currentDateDir + '/Listing'
  56. if not os.path.isdir(listingDir):
  57. os.mkdir(listingDir)
  58. listReadDir = listingDir + '/Read'
  59. if not os.path.isdir(listReadDir):
  60. os.mkdir(listReadDir)
  61. descriptionDir = currentDateDir + '/Description'
  62. if not os.path.isdir(descriptionDir):
  63. os.mkdir(descriptionDir)
  64. descReadDir = descriptionDir + '/Read'
  65. if not os.path.isdir(descReadDir):
  66. os.mkdir(descReadDir)
  67. # main method
  68. if __name__ == '__main__':
  69. # assignment from forumsList.txt
  70. forumsList = getForums()
  71. # get forum from forumsList
  72. for forum in forumsList:
  73. forum = forum.replace('\n','')
  74. print("Creating listing and description directories ... for " + forum)
  75. createDirectory(forum)
  76. time.sleep(5) # wait for directories to be created
  77. print("Directories created successfully.")
  78. if forum == "BestCardingWorld":
  79. crawlerBestCardingWorld()
  80. elif forum == "CryptBB":
  81. crawlerCryptBB()
  82. elif forum == "OnniForums":
  83. crawlerOnniForums()
  84. elif forum == "AbyssForum":
  85. crawlerAbyssForum()
  86. elif forum == "HiddenAnswers":
  87. crawlerHiddenAnswers()
  88. elif forum == 'Procrax':
  89. crawlerProcraxForum()
  90. elif forum == 'Cardingleaks':
  91. crawlerCardingleaks()
  92. elif forum == 'Altenens':
  93. crawlerAltenens()
  94. elif forum == 'Libre':
  95. crawlerLibre()
  96. print("Scraping process completed successfully!")