Browse Source

added image aes encryption, base64 encoding, and html embedding

main
westernmeadow 1 year ago
parent
commit
d1943e5586
10 changed files with 251 additions and 113 deletions
  1. +6
    -6
      Forums/AbyssForum/crawler_selenium.py
  2. +14
    -14
      Forums/Altenens/crawler_selenium.py
  3. +85
    -68
      Forums/BestCardingWorld/crawler_selenium.py
  4. +2
    -2
      Forums/Cardingleaks/crawler_selenium.py
  5. +4
    -4
      Forums/CryptBB/crawler_selenium.py
  6. +4
    -4
      Forums/HiddenAnswers/crawler_selenium.py
  7. +4
    -4
      Forums/Libre/crawler_selenium.py
  8. +4
    -4
      Forums/OnniForums/crawler_selenium.py
  9. +4
    -4
      Forums/Procrax/crawler_selenium.py
  10. +124
    -3
      Forums/Utilities/utilities.py

+ 6
- 6
Forums/AbyssForum/crawler_selenium.py View File

@ -135,8 +135,8 @@ def getAccess():
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
def savePage(driver, page, url):
cleanPage = cleanHTML(driver, page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@ -206,7 +206,7 @@ def crawlForum(driver):
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
savePage(driver, html, link)
topics = topicPages(html)
for topic in topics:
@ -220,7 +220,7 @@ def crawlForum(driver):
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, topic + f"page{counter}")
savePage(driver, driver.page_source, topic + f"page{counter}")
# comment out
if counter == 2:
@ -228,8 +228,8 @@ def crawlForum(driver):
try:
temp = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div[3]')
item = temp.find_element(by=By.CLASS_NAME, value='button button-icon-only').get_attribute('href')
if item == "":
page = temp.find_element(by=By.CLASS_NAME, value='button button-icon-only').get_attribute('href')
if page == "":
raise NoSuchElementException
counter += 1


+ 14
- 14
Forums/Altenens/crawler_selenium.py View File

@ -32,17 +32,17 @@ baseURL = 'https://altenens.is/'
def startCrawling():
# opentor()
forumName = getForumName()
# driver = getAccess()
#
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
driver = getAccess()
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
new_parse(forumName, baseURL, True)
# new_parse(forumName, baseURL, True)
# Opens Tor Browser
@ -151,8 +151,8 @@ def getAccess():
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
def savePage(driver, html, url):
cleanPage = cleanHTML(driver, html)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@ -220,7 +220,7 @@ def crawlForum(driver):
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
savePage(driver, html, link)
topics = topicPages(html)
for topic in topics:
@ -234,7 +234,7 @@ def crawlForum(driver):
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, topic + f"page{counter}") # very important
savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:


+ 85
- 68
Forums/BestCardingWorld/crawler_selenium.py View File

@ -29,14 +29,14 @@ baseURL = 'http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion
def startCrawling():
# opentor()
forumName = getForumName()
# driver = getAccess()
driver = getAccess()
# if driver != 'down':
# try:
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
if driver != 'down':
try:
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
new_parse(forumName, baseURL, True)
@ -44,10 +44,11 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from Forums.Initialization.forums_mining import config
global pid
print("Connecting Tor...")
path = open('../../path.txt').readline().strip()
pro = subprocess.Popen(path)
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
pid = pro.pid
time.sleep(7.5)
input('Tor Connected. Press ENTER to continue\n')
@ -71,9 +72,9 @@ def getFixedURL():
# Closes Tor Browser
#@param: current selenium driver
def closetor(driver):
global pid
# global pid
# os.system("taskkill /pid " + str(pro.pid))
os.system("taskkill /t /f /im tor.exe")
# os.system("taskkill /t /f /im tor.exe")
print('Closing Tor...')
driver.close()
time.sleep(3)
@ -83,12 +84,11 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
file = open('../../path.txt', 'r')
lines = file.readlines()
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(lines[0].strip())
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(lines[1].strip())
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
ff_prof.set_preference("places.history.enabled", False)
ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
@ -110,7 +110,7 @@ def createFFDriver():
ff_prof.set_preference("javascript.enabled", True)
ff_prof.update_preferences()
service = Service(lines[2].strip())
service = Service(config.get('TOR', 'geckodriver_path'))
driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
@ -131,8 +131,8 @@ def getAccess():
# Saves the crawled html page, makes the directory path for html pages if not made
def savePage(page, url):
cleanPage = cleanHTML(page)
def savePage(driver, page, url):
cleanPage = cleanHTML(driver, page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@ -142,15 +142,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'C:\Users\fakeguy\Documents\threatIntelligence-main\DarkWebMining_Working\Forums\BestCardingWorld\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'C:\Users\fakeguy\Documents\threatIntelligence-main\DarkWebMining_Working\Forums\BestCardingWorld\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath
@ -171,30 +170,26 @@ def getNameFromURL(url):
def getInterestedLinks():
links = []
# Penetration Tests
links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=43')
# # Penetration Tests
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=43')
# # Social Engineering Tests
links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=44')
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=44')
# # Exploits
links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=45')
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=45')
# # Tools
links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=46')
# # Malware
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=46')
# Malware
links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=47')
# # Cryptography
links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=48')
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=48')
# # Others
links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=49')
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=49')
# # Hacking Tutorials
links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=50')
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=50')
# # Hacked Accounts and Database Dumps
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=30')
# # Android Moded pak
links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53')
#General Discussion
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=16&sid=6a4959d49be41e72944e5aa5684c187a')
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53')
return links
@ -206,45 +201,70 @@ def crawlForum(driver):
print("Crawling the BestCardingWorld forum")
linksToCrawl = getInterestedLinks()
visited = set(linksToCrawl)
initialTime = time.time()
i = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
print('Crawling :', link)
try:
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
has_next_page = True
count = 0
while has_next_page:
list = topicPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, item)
driver.back()
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(driver, html, link)
topics = topicPages(html)
for topic in topics:
has_next_topic_page = True
counter = 1
page = topic
while has_next_topic_page:
itemURL = urlparse.urljoin(baseURL, str(page))
try:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver, driver.page_source, topic + f"page{counter}")
# comment out
if counter == 2:
break
try:
nav = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div[2]/div[4]/ul')
li = nav.find_element_by_class_name('next')
page = li.find_element_by_tag_name('a').get_attribute('href')
if page == "":
raise NoSuchElementException
counter += 1
except NoSuchElementException:
has_next_topic_page = False
# end of loop
for i in range(counter):
driver.back()
# comment out
break
# comment out
if count == 1:
break
try:
bar = driver.find_element(by=By.XPATH, value=
'/html/body/div[1]/div[2]/div[2]/div[3]/ul')
bar = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div[2]/div[3]/ul')
next = bar.find_element_by_class_name('next')
link = next.find_element_by_tag_name('a').get_attribute('href')
try:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
if link == "":
raise NoSuchElementException
count += 1
except NoSuchElementException:
has_next_page = False
@ -253,9 +273,6 @@ def crawlForum(driver):
print(link, e)
i += 1
# finalTime = time.time()
# print finalTime - initialTime
input("Crawling BestCardingWorld forum done sucessfully. Press ENTER to continue\n")


+ 2
- 2
Forums/Cardingleaks/crawler_selenium.py View File

@ -160,7 +160,7 @@ def getAccess():
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
cleanPage = cleanHTML(driver, page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@ -228,7 +228,7 @@ def crawlForum(driver):
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
savePage(driver, html, link)
topics = topicPages(html)
for topic in topics:


+ 4
- 4
Forums/CryptBB/crawler_selenium.py View File

@ -177,8 +177,8 @@ def getAccess():
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
def savePage(driver, page, url):
cleanPage = cleanHTML(driver, page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@ -254,7 +254,7 @@ def crawlForum(driver):
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
savePage(driver, html, link)
topics = topicPages(html)
for topic in topics:
@ -268,7 +268,7 @@ def crawlForum(driver):
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, topic + f"page{counter}") # very important
savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:


+ 4
- 4
Forums/HiddenAnswers/crawler_selenium.py View File

@ -135,8 +135,8 @@ def getAccess():
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
def savePage(driver, page, url):
cleanPage = cleanHTML(driver, page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@ -202,7 +202,7 @@ def crawlForum(driver: webdriver.Firefox):
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
savePage(driver, html, link)
topics = topicPages(html)
for topic in topics:
@ -216,7 +216,7 @@ def crawlForum(driver: webdriver.Firefox):
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, topic + f"page{counter}") # very important
savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:


+ 4
- 4
Forums/Libre/crawler_selenium.py View File

@ -159,8 +159,8 @@ def getAccess():
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
def savePage(driver, page, url):
cleanPage = cleanHTML(driver, page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@ -222,7 +222,7 @@ def crawlForum(driver):
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
savePage(driver, html, link)
topics = topicPages(html)
for topic in topics:
@ -236,7 +236,7 @@ def crawlForum(driver):
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, topic + f"page{counter}") # very important
savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:


+ 4
- 4
Forums/OnniForums/crawler_selenium.py View File

@ -155,8 +155,8 @@ def getAccess():
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
def savePage(driver, page, url):
cleanPage = cleanHTML(driver, page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@ -232,7 +232,7 @@ def crawlForum(driver):
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
savePage(driver, html, link)
topics = topicPages(html)
for topic in topics:
@ -246,7 +246,7 @@ def crawlForum(driver):
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, topic + f"page{counter}") # very important
savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:


+ 4
- 4
Forums/Procrax/crawler_selenium.py View File

@ -153,8 +153,8 @@ def getAccess():
# Saves the crawled html page
def savePage(page, url):
cleanPage = cleanHTML(page)
def savePage(driver, page, url):
cleanPage = cleanHTML(driver, page)
filePath = getFullPathName(url)
os.makedirs(os.path.dirname(filePath), exist_ok=True)
open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@ -223,7 +223,7 @@ def crawlForum(driver):
except:
driver.refresh()
html = driver.page_source
savePage(html, link)
savePage(driver, html, link)
topics = topicPages(html)
for topic in topics:
@ -237,7 +237,7 @@ def crawlForum(driver):
driver.get(itemURL)
except:
driver.refresh()
savePage(driver.page_source, topic + f"page{counter}") # very important
savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:


+ 124
- 3
Forums/Utilities/utilities.py View File

@ -3,8 +3,42 @@ __author__ = 'DarkWeb'
import string
import time
import re
import hashlib
import imghdr
import base64
import requests
import io
import urllib.parse as urlparse
from datetime import datetime, timedelta
import datetime as fulldatetime
from bs4 import BeautifulSoup
from lxml import html as lxml
from selenium.webdriver.common.by import By
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad
from PIL import Image
def generate_aes_key():
from Forums.Initialization.forums_mining import config
password = "password"
password_bytes = bytes(password, encoding="utf-8")
# Derive a key from the seed using PBKDF2
key = hashlib.pbkdf2_hmac(hash_name='sha256', password=password_bytes, salt=bytes(), iterations=1)
# Use the first 16 bytes of the derived key as the AES key
aes_key = key[:16]
# print("key: ", aes_key)
return aes_key
BLOCK_SIZE = 32
aes_key = generate_aes_key()
encryptCipher = AES.new(aes_key, AES.MODE_ECB)
decryptCipher = AES.new(aes_key, AES.MODE_ECB)
def cleanText(originalText):
@ -269,7 +303,96 @@ def convertFromLongDate(longDate, crawlerdate):
return correct_date
def cleanHTML(html):
def aes_encryption(item):
to_bytes = bytes(item)
encrypted_bytes = encryptCipher.encrypt(pad(to_bytes, BLOCK_SIZE))
return encrypted_bytes
def aes_decryption(item):
to_bytes = bytes(item)
decrypted_bytes = decryptCipher.decrypt(to_bytes)
return unpad(decrypted_bytes, BLOCK_SIZE)
def encrypt_encode_image_to_base64(driver, xpath):
try:
img_element = driver.find_element(by=By.XPATH, value=xpath)
image_data = img_element.screenshot_as_png
encrypted_image = aes_encryption(image_data)
base64_image = base64.b64encode(encrypted_image)
string_image = base64_image.decode('utf-8')
return string_image
except:
pass
return None
def decode_decrypt_image_in_base64(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
for img_tag in soup.find_all('img'):
src_attr = img_tag.get('src')
if src_attr and src_attr.startswith('data:image'):
try:
string_image = src_attr.split('base64,')[-1]
base64_image = bytes(string_image, encoding='utf-8')
encrypted_image = base64.b64decode(base64_image)
decrypted_image = aes_decryption(encrypted_image)
im = Image.open(io.BytesIO(decrypted_image))
im.show()
except Exception as e:
print(e)
pass
def replace_image_sources(driver, html_content):
tree = lxml.fromstring(html_content)
for picture_tag in tree.findall('.//picture'):
for source_tag in picture_tag.findall('.//source'):
picture_tag.remove(source_tag)
for img_tag in tree.findall('.//img'):
img_xpath = tree.getroottree().getpath(img_tag)
string_image = encrypt_encode_image_to_base64(driver, img_xpath)
if string_image:
img_tag.set('src', f'data:image/png;base64,{string_image}')
else:
img_tag.getparent().remove(img_tag)
modified_html = lxml.tostring(tree, encoding='utf-8').decode('utf-8')
return modified_html
def cleanHTML(driver, html):
clean_html = replace_image_sources(driver, html)
# decode_decrypt_image_in_base64(clean_html)
formats = [
"jpg", "jpeg", "jfif", "pjpeg", "pjp",
@ -278,8 +401,6 @@ def cleanHTML(html):
]
# remove images
clean_html = re.sub(r"<img.*?>", "", html)
clean_html = re.sub(r"<picture.*?>", "", clean_html)
clean_html = re.sub(r"<svg.*?>", "", clean_html)
for fmat in formats:
clean_html = re.sub(r"<object.*" + fmat + ".*?>", "", clean_html)


|||||||
x
 
000:0
Loading…
Cancel
Save