this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

122 lines
3.0 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. '''
  3. Starting point of the Darkweb Forums Mining
  4. '''
  5. from datetime import *
  6. from Forums.BestCardingWorld.crawler_selenium import crawler as crawlerBestCardingWorld
  7. import configparser
  8. import os
  9. import subprocess
  10. config = configparser.ConfigParser()
  11. config.read('../../setup.ini')
  12. CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
  13. # reads list of marketplaces manually inputted
  14. def getForums():
  15. forums = []
  16. with open('forumsList.txt') as f:
  17. forums = f.readlines()
  18. return forums
  19. # Creates needed directories for marketplace if doesn't exist
  20. def createDirectory(forum):
  21. # Package should already be there, holding crawler and parser
  22. if forum == 'Reddits':
  23. pagesMainDir = '../' + forum
  24. else:
  25. # pagesMainDir = '../' + forum + "/HTML_Pages"
  26. pagesMainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages")
  27. if not os.path.isdir(pagesMainDir):
  28. os.makedirs(pagesMainDir)
  29. if forum == 'Reddits':
  30. createRedditsSubdirectories(pagesMainDir)
  31. else:
  32. createSubdirectories(pagesMainDir)
  33. def createRedditsSubdirectories(pagesMainDir):
  34. with open('../Reddits/redditsList.txt', 'r') as f:
  35. reddits = f.readlines()
  36. for reddit in reddits:
  37. reddit = reddit.strip('\n')
  38. redditMainDir = pagesMainDir + '/' + reddit + '/HTML_Pages'
  39. if not os.path.isdir(redditMainDir):
  40. os.mkdir(redditMainDir)
  41. # Create inner time folders
  42. createSubdirectories(redditMainDir)
  43. def createSubdirectories(pagesDir):
  44. currentDateDir = pagesDir + '/' + CURRENT_DATE
  45. if not os.path.isdir(currentDateDir):
  46. os.mkdir(currentDateDir)
  47. listingDir = currentDateDir + '/Listing'
  48. if not os.path.isdir(listingDir):
  49. os.mkdir(listingDir)
  50. listReadDir = listingDir + '/Read'
  51. if not os.path.isdir(listReadDir):
  52. os.mkdir(listReadDir)
  53. descriptionDir = currentDateDir + '/Description'
  54. if not os.path.isdir(descriptionDir):
  55. os.mkdir(descriptionDir)
  56. descReadDir = descriptionDir + '/Read'
  57. if not os.path.isdir(descReadDir):
  58. os.mkdir(descReadDir)
  59. # Opens Tor Browser
  60. def opentor():
  61. global pid
  62. print("Connecting Tor...")
  63. pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
  64. pid = pro.pid
  65. # time.sleep(7.5)
  66. input('Press ENTER when Tor is connected to continue')
  67. return
  68. # main method
  69. if __name__ == '__main__':
  70. opentor()
  71. # assignment from forumsList.txt
  72. forumsList = getForums()
  73. # get forum from forumsList
  74. for forum in forumsList:
  75. forum = forum.replace('\n','')
  76. print("\nCreating listing and description directories ... for " + forum)
  77. createDirectory(forum)
  78. # time.sleep(5) # wait for directories to be created
  79. print("Directories created.")
  80. if forum == "BestCardingWorld":
  81. crawlerBestCardingWorld()
  82. print("\nScraping process completed!")