From d1943e55866c7439790e52454590e041f060f44c Mon Sep 17 00:00:00 2001
From: westernmeadow <wkwan626@gmail.com>
Date: Tue, 22 Aug 2023 16:35:10 -0700
Subject: [PATCH] added image aes encryption, base64 encoding, and html
 embedding

---
 Forums/AbyssForum/crawler_selenium.py       |  12 +-
 Forums/Altenens/crawler_selenium.py         |  28 ++--
 Forums/BestCardingWorld/crawler_selenium.py | 153 +++++++++++---------
 Forums/Cardingleaks/crawler_selenium.py     |   4 +-
 Forums/CryptBB/crawler_selenium.py          |   8 +-
 Forums/HiddenAnswers/crawler_selenium.py    |   8 +-
 Forums/Libre/crawler_selenium.py            |   8 +-
 Forums/OnniForums/crawler_selenium.py       |   8 +-
 Forums/Procrax/crawler_selenium.py          |   8 +-
 Forums/Utilities/utilities.py               | 127 +++++++++++++++-
 10 files changed, 251 insertions(+), 113 deletions(-)

diff --git a/Forums/AbyssForum/crawler_selenium.py b/Forums/AbyssForum/crawler_selenium.py
index 4d2ad99..129e6dc 100644
--- a/Forums/AbyssForum/crawler_selenium.py
+++ b/Forums/AbyssForum/crawler_selenium.py
@@ -135,8 +135,8 @@ def getAccess():
 
 
 # Saves the crawled html page
-def savePage(page, url):
-    cleanPage = cleanHTML(page)
+def savePage(driver, page, url):
+    cleanPage = cleanHTML(driver, page)
     filePath = getFullPathName(url)
     os.makedirs(os.path.dirname(filePath), exist_ok=True)
     open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@@ -206,7 +206,7 @@ def crawlForum(driver):
                 except:
                     driver.refresh()
                 html = driver.page_source
-                savePage(html, link)
+                savePage(driver, html, link)
 
                 topics = topicPages(html)
                 for topic in topics:
@@ -220,7 +220,7 @@ def crawlForum(driver):
                             driver.get(itemURL)
                         except:
                             driver.refresh()
-                        savePage(driver.page_source, topic + f"page{counter}")
+                        savePage(driver, driver.page_source, topic + f"page{counter}")
 
                         # comment out
                         if counter == 2:
@@ -228,8 +228,8 @@ def crawlForum(driver):
 
                         try:
                             temp = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div[3]')
-                            item = temp.find_element(by=By.CLASS_NAME, value='button button-icon-only').get_attribute('href')
-                            if item == "":
+                            page = temp.find_element(by=By.CLASS_NAME, value='button button-icon-only').get_attribute('href')
+                            if page == "":
                                 raise NoSuchElementException
                             counter += 1
 
diff --git a/Forums/Altenens/crawler_selenium.py b/Forums/Altenens/crawler_selenium.py
index 6eb813e..0f14223 100644
--- a/Forums/Altenens/crawler_selenium.py
+++ b/Forums/Altenens/crawler_selenium.py
@@ -32,17 +32,17 @@ baseURL = 'https://altenens.is/'
 def startCrawling():
     # opentor()
     forumName = getForumName()
-    # driver = getAccess()
-    #
-    # if driver != 'down':
-    #     try:
-    #         login(driver)
-    #         crawlForum(driver)
-    #     except Exception as e:
-    #         print(driver.current_url, e)
-    #     closetor(driver)
+    driver = getAccess()
+
+    if driver != 'down':
+        try:
+            login(driver)
+            crawlForum(driver)
+        except Exception as e:
+            print(driver.current_url, e)
+        closetor(driver)
     
-    new_parse(forumName, baseURL, True)
+    # new_parse(forumName, baseURL, True)
 
 
 # Opens Tor Browser
@@ -151,8 +151,8 @@ def getAccess():
 
 
 # Saves the crawled html page
-def savePage(page, url):
-    cleanPage = cleanHTML(page)
+def savePage(driver, html, url):
+    cleanPage = cleanHTML(driver, html)
     filePath = getFullPathName(url)
     os.makedirs(os.path.dirname(filePath), exist_ok=True)
     open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@@ -220,7 +220,7 @@ def crawlForum(driver):
                 except:
                     driver.refresh()
                 html = driver.page_source
-                savePage(html, link)
+                savePage(driver, html, link)
 
                 topics = topicPages(html)
                 for topic in topics:
@@ -234,7 +234,7 @@ def crawlForum(driver):
                             driver.get(itemURL)
                         except:
                             driver.refresh()
-                        savePage(driver.page_source, topic + f"page{counter}") # very important
+                        savePage(driver, driver.page_source, topic + f"page{counter}") # very important
 
                         # comment out
                         if counter == 2:
diff --git a/Forums/BestCardingWorld/crawler_selenium.py b/Forums/BestCardingWorld/crawler_selenium.py
index 7e35381..96821cd 100644
--- a/Forums/BestCardingWorld/crawler_selenium.py
+++ b/Forums/BestCardingWorld/crawler_selenium.py
@@ -29,14 +29,14 @@ baseURL = 'http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion
 def startCrawling():
     # opentor()
     forumName = getForumName()
-    # driver = getAccess()
+    driver = getAccess()
 
-    # if driver != 'down':
-    #     try:
-    #         crawlForum(driver)
-    #     except Exception as e:
-    #         print(driver.current_url, e)
-    #     closetor(driver)
+    if driver != 'down':
+        try:
+            crawlForum(driver)
+        except Exception as e:
+            print(driver.current_url, e)
+        closetor(driver)
 
     new_parse(forumName, baseURL, True)
 
@@ -44,10 +44,11 @@ def startCrawling():
 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
+    from Forums.Initialization.forums_mining import config
+
     global pid
     print("Connecting Tor...")
-    path = open('../../path.txt').readline().strip()
-    pro = subprocess.Popen(path)
+    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
     pid = pro.pid
     time.sleep(7.5)
     input('Tor Connected. Press ENTER to continue\n')
@@ -71,9 +72,9 @@ def getFixedURL():
 # Closes Tor Browser
 #@param: current selenium driver
 def closetor(driver):
-    global pid
+    # global pid
     # os.system("taskkill /pid " + str(pro.pid))
-    os.system("taskkill /t /f /im tor.exe")
+    # os.system("taskkill /t /f /im tor.exe")
     print('Closing Tor...')
     driver.close()
     time.sleep(3)
@@ -83,12 +84,11 @@ def closetor(driver):
 # Creates FireFox 'driver' and configure its 'Profile'
 # to use Tor proxy and socket
 def createFFDriver():
-    file = open('../../path.txt', 'r')
-    lines = file.readlines()
+    from Forums.Initialization.forums_mining import config
 
-    ff_binary = FirefoxBinary(lines[0].strip())
+    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
 
-    ff_prof = FirefoxProfile(lines[1].strip())
+    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
     ff_prof.set_preference("places.history.enabled", False)
     ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
     ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
@@ -110,7 +110,7 @@ def createFFDriver():
     ff_prof.set_preference("javascript.enabled", True)
     ff_prof.update_preferences()
 
-    service = Service(lines[2].strip())
+    service = Service(config.get('TOR', 'geckodriver_path'))
 
     driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
 
@@ -131,8 +131,8 @@ def getAccess():
 
 
 # Saves the crawled html page, makes the directory path for html pages if not made
-def savePage(page, url):
-    cleanPage = cleanHTML(page)
+def savePage(driver, page, url):
+    cleanPage = cleanHTML(driver, page)
     filePath = getFullPathName(url)
     os.makedirs(os.path.dirname(filePath), exist_ok=True)
     open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@@ -142,15 +142,14 @@ def savePage(page, url):
 # Gets the full path of the page to be saved along with its appropriate file name
 #@param: raw url as crawler crawls through every site
 def getFullPathName(url):
+    from Forums.Initialization.forums_mining import config, CURRENT_DATE
+
+    mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
     fileName = getNameFromURL(url)
     if isDescriptionLink(url):
-        fullPath = r'C:\Users\fakeguy\Documents\threatIntelligence-main\DarkWebMining_Working\Forums\BestCardingWorld\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
+        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
     else:
-        fullPath = r'C:\Users\fakeguy\Documents\threatIntelligence-main\DarkWebMining_Working\Forums\BestCardingWorld\HTML_Pages\\' + str(
-            "%02d" % date.today().month) + str("%02d" % date.today().day) + str(
-            "%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
+        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
     return fullPath
 
 
@@ -171,30 +170,26 @@ def getNameFromURL(url):
 def getInterestedLinks():
     links = []
 
-    # Penetration Tests
-    links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=43')
+    # # Penetration Tests
+    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=43')
     # # Social Engineering Tests
-    links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=44')
+    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=44')
     # # Exploits
-    links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=45')
+    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=45')
     # # Tools
-    links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=46')
-    # # Malware
+    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=46')
+    # Malware
     links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=47')
     # # Cryptography
-    links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=48')
+    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=48')
     # # Others
-    links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=49')
+    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=49')
     # # Hacking Tutorials
-    links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=50')
+    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=50')
     # # Hacked Accounts and Database Dumps
     # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=30')
     # # Android Moded pak
-    links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53')
-
-
-    #General Discussion
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=16&sid=6a4959d49be41e72944e5aa5684c187a')
+    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53')
 
     return links
 
@@ -206,45 +201,70 @@ def crawlForum(driver):
     print("Crawling the BestCardingWorld forum")
 
     linksToCrawl = getInterestedLinks()
-    visited = set(linksToCrawl)
-    initialTime = time.time()
 
     i = 0
     while i < len(linksToCrawl):
         link = linksToCrawl[i]
         print('Crawling :', link)
         try:
-            try:
-                driver.get(link)
-            except:
-                driver.refresh()
-            html = driver.page_source
-            savePage(html, link)
-
             has_next_page = True
+            count = 0
+
             while has_next_page:
-                list = topicPages(html)
-                for item in list:
-                    itemURL = urlparse.urljoin(baseURL, str(item))
-                    try:
-                        driver.get(itemURL)
-                    except:
-                        driver.refresh()
-                    savePage(driver.page_source, item)
-                    driver.back()
+                try:
+                    driver.get(link)
+                except:
+                    driver.refresh()
+                html = driver.page_source
+                savePage(driver, html, link)
+
+                topics = topicPages(html)
+                for topic in topics:
+                    has_next_topic_page = True
+                    counter = 1
+                    page = topic
+
+                    while has_next_topic_page:
+                        itemURL = urlparse.urljoin(baseURL, str(page))
+                        try:
+                            driver.get(itemURL)
+                        except:
+                            driver.refresh()
+                        savePage(driver, driver.page_source, topic + f"page{counter}")
+
+                        # comment out
+                        if counter == 2:
+                            break
+
+                        try:
+                            nav = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div[2]/div[4]/ul')
+                            li = nav.find_element_by_class_name('next')
+                            page = li.find_element_by_tag_name('a').get_attribute('href')
+                            if page == "":
+                                raise NoSuchElementException
+                            counter += 1
+
+                        except NoSuchElementException:
+                            has_next_topic_page = False
+
+                    # end of loop
+                    for i in range(counter):
+                        driver.back()
+
+                    # comment out
+                    break
+
+                # comment out
+                if count == 1:
+                    break
 
                 try:
-                    bar = driver.find_element(by=By.XPATH, value=
-                        '/html/body/div[1]/div[2]/div[2]/div[3]/ul')
+                    bar = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[2]/div[2]/div[3]/ul')
                     next = bar.find_element_by_class_name('next')
                     link = next.find_element_by_tag_name('a').get_attribute('href')
-
-                    try:
-                        driver.get(link)
-                    except:
-                        driver.refresh()
-                    html = driver.page_source
-                    savePage(html, link)
+                    if link == "":
+                        raise NoSuchElementException
+                    count += 1
 
                 except NoSuchElementException:
                     has_next_page = False
@@ -253,9 +273,6 @@ def crawlForum(driver):
             print(link, e)
         i += 1
 
-    # finalTime = time.time()
-    # print finalTime - initialTime
-
     input("Crawling BestCardingWorld forum done sucessfully. Press ENTER to continue\n")
 
 
diff --git a/Forums/Cardingleaks/crawler_selenium.py b/Forums/Cardingleaks/crawler_selenium.py
index de8dd0b..85538fd 100644
--- a/Forums/Cardingleaks/crawler_selenium.py
+++ b/Forums/Cardingleaks/crawler_selenium.py
@@ -160,7 +160,7 @@ def getAccess():
 
 # Saves the crawled html page
 def savePage(page, url):
-    cleanPage = cleanHTML(page)
+    cleanPage = cleanHTML(driver, page)
     filePath = getFullPathName(url)
     os.makedirs(os.path.dirname(filePath), exist_ok=True)
     open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@@ -228,7 +228,7 @@ def crawlForum(driver):
                 except:
                     driver.refresh()
                 html = driver.page_source
-                savePage(html, link)
+                savePage(driver, html, link)
 
                 topics = topicPages(html)
                 for topic in topics:
diff --git a/Forums/CryptBB/crawler_selenium.py b/Forums/CryptBB/crawler_selenium.py
index bdc964c..5e98a7d 100644
--- a/Forums/CryptBB/crawler_selenium.py
+++ b/Forums/CryptBB/crawler_selenium.py
@@ -177,8 +177,8 @@ def getAccess():
 
 
 # Saves the crawled html page
-def savePage(page, url):
-    cleanPage = cleanHTML(page)
+def savePage(driver, page, url):
+    cleanPage = cleanHTML(driver, page)
     filePath = getFullPathName(url)
     os.makedirs(os.path.dirname(filePath), exist_ok=True)
     open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@@ -254,7 +254,7 @@ def crawlForum(driver):
                 except:
                     driver.refresh()
                 html = driver.page_source
-                savePage(html, link)
+                savePage(driver, html, link)
 
                 topics = topicPages(html)
                 for topic in topics:
@@ -268,7 +268,7 @@ def crawlForum(driver):
                             driver.get(itemURL)
                         except:
                             driver.refresh()
-                        savePage(driver.page_source, topic + f"page{counter}")  # very important
+                        savePage(driver, driver.page_source, topic + f"page{counter}")  # very important
 
                         # comment out
                         if counter == 2:
diff --git a/Forums/HiddenAnswers/crawler_selenium.py b/Forums/HiddenAnswers/crawler_selenium.py
index 46e445e..6641b81 100644
--- a/Forums/HiddenAnswers/crawler_selenium.py
+++ b/Forums/HiddenAnswers/crawler_selenium.py
@@ -135,8 +135,8 @@ def getAccess():
 
 
 # Saves the crawled html page
-def savePage(page, url):
-    cleanPage = cleanHTML(page)
+def savePage(driver, page, url):
+    cleanPage = cleanHTML(driver, page)
     filePath = getFullPathName(url)
     os.makedirs(os.path.dirname(filePath), exist_ok=True)
     open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@@ -202,7 +202,7 @@ def crawlForum(driver: webdriver.Firefox):
                 except:
                     driver.refresh()
                 html = driver.page_source
-                savePage(html, link)
+                savePage(driver, html, link)
 
                 topics = topicPages(html)
                 for topic in topics:
@@ -216,7 +216,7 @@ def crawlForum(driver: webdriver.Firefox):
                             driver.get(itemURL)
                         except:
                             driver.refresh()
-                        savePage(driver.page_source, topic + f"page{counter}")  # very important
+                        savePage(driver, driver.page_source, topic + f"page{counter}")  # very important
 
                         # comment out
                         if counter == 2:
diff --git a/Forums/Libre/crawler_selenium.py b/Forums/Libre/crawler_selenium.py
index 4697cda..d06cd83 100644
--- a/Forums/Libre/crawler_selenium.py
+++ b/Forums/Libre/crawler_selenium.py
@@ -159,8 +159,8 @@ def getAccess():
 
 
 # Saves the crawled html page
-def savePage(page, url):
-    cleanPage = cleanHTML(page)
+def savePage(driver, page, url):
+    cleanPage = cleanHTML(driver, page)
     filePath = getFullPathName(url)
     os.makedirs(os.path.dirname(filePath), exist_ok=True)
     open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@@ -222,7 +222,7 @@ def crawlForum(driver):
                 except:
                     driver.refresh()
                 html = driver.page_source
-                savePage(html, link)
+                savePage(driver, html, link)
 
                 topics = topicPages(html)
                 for topic in topics:
@@ -236,7 +236,7 @@ def crawlForum(driver):
                             driver.get(itemURL)
                         except:
                             driver.refresh()
-                        savePage(driver.page_source, topic + f"page{counter}")  # very important
+                        savePage(driver, driver.page_source, topic + f"page{counter}")  # very important
 
                         # comment out
                         if counter == 2:
diff --git a/Forums/OnniForums/crawler_selenium.py b/Forums/OnniForums/crawler_selenium.py
index a7d0c15..58b1313 100644
--- a/Forums/OnniForums/crawler_selenium.py
+++ b/Forums/OnniForums/crawler_selenium.py
@@ -155,8 +155,8 @@ def getAccess():
 
 
 # Saves the crawled html page
-def savePage(page, url):
-    cleanPage = cleanHTML(page)
+def savePage(driver, page, url):
+    cleanPage = cleanHTML(driver, page)
     filePath = getFullPathName(url)
     os.makedirs(os.path.dirname(filePath), exist_ok=True)
     open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@@ -232,7 +232,7 @@ def crawlForum(driver):
                 except:
                     driver.refresh()
                 html = driver.page_source
-                savePage(html, link)
+                savePage(driver, html, link)
 
                 topics = topicPages(html)
                 for topic in topics:
@@ -246,7 +246,7 @@ def crawlForum(driver):
                             driver.get(itemURL)
                         except:
                             driver.refresh()
-                        savePage(driver.page_source, topic + f"page{counter}")  # very important
+                        savePage(driver, driver.page_source, topic + f"page{counter}")  # very important
 
                         # comment out
                         if counter == 2:
diff --git a/Forums/Procrax/crawler_selenium.py b/Forums/Procrax/crawler_selenium.py
index fc54a30..f2ed372 100644
--- a/Forums/Procrax/crawler_selenium.py
+++ b/Forums/Procrax/crawler_selenium.py
@@ -153,8 +153,8 @@ def getAccess():
 
 
 # Saves the crawled html page
-def savePage(page, url):
-    cleanPage = cleanHTML(page)
+def savePage(driver, page, url):
+    cleanPage = cleanHTML(driver, page)
     filePath = getFullPathName(url)
     os.makedirs(os.path.dirname(filePath), exist_ok=True)
     open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@@ -223,7 +223,7 @@ def crawlForum(driver):
                 except:
                     driver.refresh()
                 html = driver.page_source
-                savePage(html, link)
+                savePage(driver, html, link)
 
                 topics = topicPages(html)
                 for topic in topics:
@@ -237,7 +237,7 @@ def crawlForum(driver):
                             driver.get(itemURL)
                         except:
                             driver.refresh()
-                        savePage(driver.page_source, topic + f"page{counter}")  # very important
+                        savePage(driver, driver.page_source, topic + f"page{counter}")  # very important
 
                         # comment out
                         if counter == 2:
diff --git a/Forums/Utilities/utilities.py b/Forums/Utilities/utilities.py
index c451758..fdd5495 100644
--- a/Forums/Utilities/utilities.py
+++ b/Forums/Utilities/utilities.py
@@ -3,8 +3,42 @@ __author__ = 'DarkWeb'
 import string
 import time
 import re
+import hashlib
+import imghdr
+import base64
+import requests
+import io
+import urllib.parse as urlparse
 from datetime import datetime, timedelta
 import datetime as fulldatetime
+from bs4 import BeautifulSoup
+from lxml import html as lxml
+from selenium.webdriver.common.by import By
+from Crypto.Cipher import AES
+from Crypto.Util.Padding import pad, unpad
+from PIL import Image
+
+
+def generate_aes_key():
+    from Forums.Initialization.forums_mining import config
+
+    password = "password"
+    password_bytes = bytes(password, encoding="utf-8")
+
+    # Derive a key from the seed using PBKDF2
+    key = hashlib.pbkdf2_hmac(hash_name='sha256', password=password_bytes, salt=bytes(), iterations=1)
+
+    # Use the first 16 bytes of the derived key as the AES key
+    aes_key = key[:16]
+
+    # print("key: ", aes_key)
+    return aes_key
+
+
+BLOCK_SIZE = 32
+aes_key = generate_aes_key()
+encryptCipher = AES.new(aes_key, AES.MODE_ECB)
+decryptCipher = AES.new(aes_key, AES.MODE_ECB)
 
 
 def cleanText(originalText):
@@ -269,7 +303,96 @@ def convertFromLongDate(longDate, crawlerdate):
     return correct_date
 
 
-def cleanHTML(html):
+def aes_encryption(item):
+
+    to_bytes = bytes(item)
+
+    encrypted_bytes = encryptCipher.encrypt(pad(to_bytes, BLOCK_SIZE))
+
+    return encrypted_bytes
+
+
+def aes_decryption(item):
+
+    to_bytes = bytes(item)
+
+    decrypted_bytes = decryptCipher.decrypt(to_bytes)
+
+    return unpad(decrypted_bytes, BLOCK_SIZE)
+
+
+def encrypt_encode_image_to_base64(driver, xpath):
+
+    try:
+
+        img_element = driver.find_element(by=By.XPATH, value=xpath)
+        image_data = img_element.screenshot_as_png
+
+        encrypted_image = aes_encryption(image_data)
+        base64_image = base64.b64encode(encrypted_image)
+        string_image = base64_image.decode('utf-8')
+
+        return string_image
+
+    except:
+        pass
+
+    return None
+
+
+def decode_decrypt_image_in_base64(html_content):
+
+    soup = BeautifulSoup(html_content, 'html.parser')
+
+    for img_tag in soup.find_all('img'):
+
+        src_attr = img_tag.get('src')
+
+        if src_attr and src_attr.startswith('data:image'):
+
+            try:
+
+                string_image = src_attr.split('base64,')[-1]
+                base64_image = bytes(string_image, encoding='utf-8')
+                encrypted_image = base64.b64decode(base64_image)
+                decrypted_image = aes_decryption(encrypted_image)
+
+                im = Image.open(io.BytesIO(decrypted_image))
+                im.show()
+
+            except Exception as e:
+                print(e)
+                pass
+
+
+def replace_image_sources(driver, html_content):
+
+    tree = lxml.fromstring(html_content)
+
+    for picture_tag in tree.findall('.//picture'):
+        for source_tag in picture_tag.findall('.//source'):
+            picture_tag.remove(source_tag)
+
+    for img_tag in tree.findall('.//img'):
+
+        img_xpath = tree.getroottree().getpath(img_tag)
+
+        string_image = encrypt_encode_image_to_base64(driver, img_xpath)
+
+        if string_image:
+            img_tag.set('src', f'data:image/png;base64,{string_image}')
+        else:
+            img_tag.getparent().remove(img_tag)
+
+    modified_html = lxml.tostring(tree, encoding='utf-8').decode('utf-8')
+
+    return modified_html
+
+
+def cleanHTML(driver, html):
+
+    clean_html = replace_image_sources(driver, html)
+    # decode_decrypt_image_in_base64(clean_html)
 
     formats = [
         "jpg", "jpeg", "jfif", "pjpeg", "pjp",
@@ -278,8 +401,6 @@ def cleanHTML(html):
     ]
 
     # remove images
-    clean_html = re.sub(r"<img.*?>", "", html)
-    clean_html = re.sub(r"<picture.*?>", "", clean_html)
     clean_html = re.sub(r"<svg.*?>", "", clean_html)
     for fmat in formats:
         clean_html = re.sub(r"<object.*" + fmat + ".*?>", "", clean_html)