this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

122 lines
3.2 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. '''
  3. Starting point of the Darkweb Mining Platform
  4. '''
  5. import os
  6. from datetime import *
  7. from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld
  8. from Forums.CryptBB.crawler_selenium import crawler as crawlerCryptBB
  9. from Forums.OnniForums.crawler_selenium import crawler as crawlerOnniForums
  10. from Forums.AbyssForum.crawler_selenium import crawler as crawlerAbyssForum
  11. from Forums.Altenens.crawler_selenium import crawler as crawlerAltenensForum
  12. from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
  13. import time
  14. CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
  15. # reads list of marketplaces manually inputted
  16. def getForums():
  17. forums = []
  18. with open('forumsList.txt') as f:
  19. forums = f.readlines()
  20. return forums
  21. # Creates needed directories for marketplace if doesn't exist
  22. def createDirectory(forum):
  23. # Package should already be there, holding crawler and parser
  24. if forum == 'Reddits':
  25. pagesMainDir = '../' + forum
  26. else:
  27. pagesMainDir = '../' + forum + "/HTML_Pages"
  28. if not os.path.isdir(pagesMainDir):
  29. os.makedirs(pagesMainDir)
  30. if forum == 'Reddits':
  31. createRedditsSubdirectories(pagesMainDir)
  32. else:
  33. createSubdirectories(pagesMainDir)
  34. def createRedditsSubdirectories(pagesMainDir):
  35. with open('../Reddits/redditsList.txt', 'r') as f:
  36. reddits = f.readlines()
  37. for reddit in reddits:
  38. reddit = reddit.strip('\n')
  39. redditMainDir = pagesMainDir + '/' + reddit + '/HTML_Pages'
  40. if not os.path.isdir(redditMainDir):
  41. os.mkdir(redditMainDir)
  42. # Create inner time folders
  43. createSubdirectories(redditMainDir)
  44. def createSubdirectories(pagesDir):
  45. currentDateDir = pagesDir + '/' + CURRENT_DATE
  46. if not os.path.isdir(currentDateDir):
  47. os.mkdir(currentDateDir)
  48. listingDir = currentDateDir + '/Listing'
  49. if not os.path.isdir(listingDir):
  50. os.mkdir(listingDir)
  51. listReadDir = listingDir + '/Read'
  52. if not os.path.isdir(listReadDir):
  53. os.mkdir(listReadDir)
  54. descriptionDir = currentDateDir + '/Description'
  55. if not os.path.isdir(descriptionDir):
  56. os.mkdir(descriptionDir)
  57. descReadDir = descriptionDir + '/Read'
  58. if not os.path.isdir(descReadDir):
  59. os.mkdir(descReadDir)
  60. # main method
  61. if __name__ == '__main__':
  62. # assignment from forumsList.txt
  63. forumsList = getForums()
  64. # get forum from forumsList
  65. for forum in forumsList:
  66. forum = forum.replace('\n','')
  67. print("Creating listing and description directories ...")
  68. createDirectory(forum)
  69. time.sleep(5) # wait for directories to be created
  70. input("Directories created successfully. Press ENTER to continue\n")
  71. if forum == "BestCardingWorld":
  72. crawlerBestCardingWorld()
  73. elif forum == "CryptBB":
  74. crawlerCryptBB()
  75. elif forum == "OnniForums":
  76. crawlerOnniForums()
  77. elif forum == "AbyssForum":
  78. crawlerAbyssForum()
  79. elif forum == "HiddenAnswers":
  80. crawlerHiddenAnswers()
  81. print("Scraping process completed successfully!")