this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

136 lines
3.9 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. '''
  3. '''
  4. import os
  5. from datetime import *
  6. from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld
  7. from Forums.CryptBB.crawler_selenium import crawler as crawlerCryptBB
  8. from Forums.OnniForums.crawler_selenium import crawler as crawlerOnniForums
  9. from Forums.AbyssForum.crawler_selenium import crawler as crawlerAbyssForum
  10. from Forums.Procrax.crawler_selenium import crawler as crawlerProcraxForum
  11. from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
  12. from Forums.Cardingleaks.crawler_selenium import crawler as crawlerCardingleaks
  13. from Forums.Altenens.crawler_selenium import crawler as crawlerAltenens
  14. from Forums.Libre.crawler_selenium import crawler as crawlerLibre
  15. import configparser
  16. import time
  17. config = configparser.ConfigParser()
  18. config.read('../../setup.ini')
  19. CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
  20. # reads list of marketplaces manually inputted
  21. def getForums():
  22. forums = []
  23. with open('forumsList.txt') as f:
  24. forums = f.readlines()
  25. return forums
  26. # Creates needed directories for marketplace if doesn't exist
  27. def createDirectory(forum):
  28. # Package should already be there, holding crawler and parser
  29. if forum == 'Reddits':
  30. pagesMainDir = '../' + forum
  31. else:
  32. # pagesMainDir = '../' + forum + "/HTML_Pages"
  33. pagesMainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages")
  34. if not os.path.isdir(pagesMainDir):
  35. os.makedirs(pagesMainDir)
  36. if forum == 'Reddits':
  37. createRedditsSubdirectories(pagesMainDir)
  38. else:
  39. createSubdirectories(pagesMainDir)
  40. def createRedditsSubdirectories(pagesMainDir):
  41. with open('../Reddits/redditsList.txt', 'r') as f:
  42. reddits = f.readlines()
  43. for reddit in reddits:
  44. reddit = reddit.strip('\n')
  45. redditMainDir = pagesMainDir + '/' + reddit + '/HTML_Pages'
  46. if not os.path.isdir(redditMainDir):
  47. os.mkdir(redditMainDir)
  48. # Create inner time folders
  49. createSubdirectories(redditMainDir)
  50. def createSubdirectories(pagesDir):
  51. currentDateDir = pagesDir + '/' + CURRENT_DATE
  52. if not os.path.isdir(currentDateDir):
  53. os.mkdir(currentDateDir)
  54. listingDir = currentDateDir + '/Listing'
  55. if not os.path.isdir(listingDir):
  56. os.mkdir(listingDir)
  57. listReadDir = listingDir + '/Read'
  58. if not os.path.isdir(listReadDir):
  59. os.mkdir(listReadDir)
  60. descriptionDir = currentDateDir + '/Description'
  61. if not os.path.isdir(descriptionDir):
  62. os.mkdir(descriptionDir)
  63. descReadDir = descriptionDir + '/Read'
  64. if not os.path.isdir(descReadDir):
  65. os.mkdir(descReadDir)
  66. # main method
  67. if __name__ == '__main__':
  68. # assignment from forumsList.txt
  69. forumsList = getForums()
  70. # get forum from forumsList
  71. for forum in forumsList:
  72. forum = forum.replace('\n','')
  73. print("Creating listing and description directories ... for " + forum)
  74. createDirectory(forum)
  75. time.sleep(5) # wait for directories to be created
  76. input("Directories created successfully. Press ENTER to continue\n")
  77. if forum == "BestCardingWorld":
  78. crawlerBestCardingWorld()
  79. elif forum == "CryptBB":
  80. crawlerCryptBB()
  81. elif forum == "OnniForums":
  82. crawlerOnniForums()
  83. elif forum == "AbyssForum":
  84. crawlerAbyssForum()
  85. elif forum == "HiddenAnswers":
  86. crawlerHiddenAnswers()
  87. elif forum == 'Procrax':
  88. crawlerProcraxForum()
  89. elif forum == 'Cardingleaks':
  90. crawlerCardingleaks()
  91. elif forum == 'Altenens':
  92. crawlerAltenens()
  93. elif forum == 'Libre':
  94. crawlerLibre()
  95. print("Scraping process completed successfully!")