this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

133 lines
3.7 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. '''
  3. '''
  4. import os
  5. from datetime import *
  6. from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld
  7. from Forums.CryptBB.crawler_selenium import crawler as crawlerCryptBB
  8. from Forums.OnniForums.crawler_selenium import crawler as crawlerOnniForums
  9. from Forums.AbyssForum.crawler_selenium import crawler as crawlerAbyssForum
  10. from Forums.Procrax.crawler_selenium import crawler as crawlerProcraxForum
  11. from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
  12. from Forums.Cardingleaks.crawler_selenium import crawler as crawlerCardingleaks
  13. from Forums.Altenens.crawler_selenium import crawler as crawlerAltenens
  14. import configparser
  15. import time
  16. config = configparser.ConfigParser()
  17. config.read('../../setup.ini')
  18. CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
  19. # reads list of marketplaces manually inputted
  20. def getForums():
  21. forums = []
  22. with open('forumsList.txt') as f:
  23. forums = f.readlines()
  24. return forums
  25. # Creates needed directories for marketplace if doesn't exist
  26. def createDirectory(forum):
  27. # Package should already be there, holding crawler and parser
  28. if forum == 'Reddits':
  29. pagesMainDir = '../' + forum
  30. else:
  31. # pagesMainDir = '../' + forum + "/HTML_Pages"
  32. pagesMainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages")
  33. if not os.path.isdir(pagesMainDir):
  34. os.makedirs(pagesMainDir)
  35. if forum == 'Reddits':
  36. createRedditsSubdirectories(pagesMainDir)
  37. else:
  38. createSubdirectories(pagesMainDir)
  39. def createRedditsSubdirectories(pagesMainDir):
  40. with open('../Reddits/redditsList.txt', 'r') as f:
  41. reddits = f.readlines()
  42. for reddit in reddits:
  43. reddit = reddit.strip('\n')
  44. redditMainDir = pagesMainDir + '/' + reddit + '/HTML_Pages'
  45. if not os.path.isdir(redditMainDir):
  46. os.mkdir(redditMainDir)
  47. # Create inner time folders
  48. createSubdirectories(redditMainDir)
  49. def createSubdirectories(pagesDir):
  50. currentDateDir = pagesDir + '/' + CURRENT_DATE
  51. if not os.path.isdir(currentDateDir):
  52. os.mkdir(currentDateDir)
  53. listingDir = currentDateDir + '/Listing'
  54. if not os.path.isdir(listingDir):
  55. os.mkdir(listingDir)
  56. listReadDir = listingDir + '/Read'
  57. if not os.path.isdir(listReadDir):
  58. os.mkdir(listReadDir)
  59. descriptionDir = currentDateDir + '/Description'
  60. if not os.path.isdir(descriptionDir):
  61. os.mkdir(descriptionDir)
  62. descReadDir = descriptionDir + '/Read'
  63. if not os.path.isdir(descReadDir):
  64. os.mkdir(descReadDir)
  65. # main method
  66. if __name__ == '__main__':
  67. # assignment from forumsList.txt
  68. forumsList = getForums()
  69. # get forum from forumsList
  70. for forum in forumsList:
  71. forum = forum.replace('\n','')
  72. print("Creating listing and description directories ... for " + forum)
  73. createDirectory(forum)
  74. time.sleep(5) # wait for directories to be created
  75. input("Directories created successfully. Press ENTER to continue\n")
  76. if forum == "BestCardingWorld":
  77. crawlerBestCardingWorld()
  78. elif forum == "CryptBB":
  79. crawlerCryptBB()
  80. elif forum == "OnniForums":
  81. crawlerOnniForums()
  82. elif forum == "AbyssForum":
  83. crawlerAbyssForum()
  84. elif forum == "HiddenAnswers":
  85. crawlerHiddenAnswers()
  86. elif forum == "Altenens":
  87. crawlerAltenens()
  88. elif forum == 'Procrax':
  89. crawlerProcraxForum()
  90. elif forum == 'Cardingleaks':
  91. crawlerCardingleaks()
  92. print("Scraping process completed successfully!")