khangtran
/
dark_web_forums


								__author__ = 'DarkWeb'


								import string

								import time

								import re

								import hashlib

								import base64

								import io

								import configparser

								import json

								import keras

								import cv2

								import numpy as np

								from keras.preprocessing import image

								from keras.applications.imagenet_utils import preprocess_input

								from keras.models import Model

								from datetime import datetime, timedelta

								from lxml import html as lxml

								from selenium.webdriver.common.by import By

								from Crypto.Cipher import AES

								from Crypto.Util.Padding import pad, unpad

								from PIL import Image

								from urllib.parse import urlsplit, urljoin


								def generate_aes_key():

								    config = configparser.ConfigParser()

								    config.read('../../setup.ini')


								    secret = config.get('Encryption', 'secret')

								    secret_bytes = bytes(secret, encoding="utf-8")


								    # Derive a key from the seed using PBKDF2

								    key = hashlib.pbkdf2_hmac(hash_name='sha256', password=secret_bytes, salt=bytes(), iterations=1)


								    # Use the first 16 bytes of the derived key as the AES key

								    aes_key = key[:16]


								    # print("key: ", aes_key)

								    return aes_key


								BLOCK_SIZE = 32

								aes_key = generate_aes_key()

								encryptCipher = AES.new(aes_key, AES.MODE_ECB)

								decryptCipher = AES.new(aes_key, AES.MODE_ECB)


								model = keras.applications.ResNet50(weights='imagenet', include_top=True)

								feat_extractor = Model(inputs=model.input, outputs=model.get_layer('avg_pool').output)


								sift = cv2.SIFT_create(

								    nfeatures=0,                # Number of features, 0 for unlimited

								    nOctaveLayers=3,            # Number of layers per octave

								    contrastThreshold=0.09,     # Contrast threshold

								    edgeThreshold=10,           # Edge threshold

								    sigma=1.6                   # Initial Gaussian blur sigma

								)


								def generate_image_hash(image_string):


								    image_bytes = bytes(image_string, encoding='utf-8')

								    image_bytes = base64.b64decode(image_bytes)


								    return hashlib.sha256(image_bytes).hexdigest()


								def extract_hidden_layer_output(image_string):


								    image_bytes = bytes(image_string, encoding='utf-8')

								    image_bytes = base64.b64decode(image_bytes)

								    im = Image.open(io.BytesIO(image_bytes)).convert('RGB')


								    x = image.img_to_array(im)

								    x = image.smart_resize(x, size=model.input_shape[1:3], interpolation='nearest')

								    x = np.expand_dims(x, axis=0)

								    x = preprocess_input(x)


								    return json.dumps(feat_extractor.predict(x)[0].tolist())


								def extract_keypoints(image_string):


								    image_bytes = bytes(image_string, encoding='utf-8')

								    image_bytes = base64.b64decode(image_bytes)

								    image_array = np.asarray(bytearray(image_bytes), dtype=np.uint8)


								    img = cv2.imdecode(image_array, cv2.IMREAD_GRAYSCALE)


								    keypoints, descriptors = sift.detectAndCompute(img, None)


								    if len(keypoints) == 0:

								        return None, None


								    return json.dumps(wrap_keypoints(keypoints)), json.dumps(descriptors.tolist())


								def wrap_keypoints(keypoints):


								    keypoints_list = []


								    for i in range(len(keypoints)):

								        temp = {

								            'pt': keypoints[i].pt,

								            'size': keypoints[i].size,

								            'angle': keypoints[i].angle,

								            'octave': keypoints[i].octave,

								            'response': keypoints[i].response,

								            'class_id': keypoints[i].class_id

								        }

								        keypoints_list.append(temp)


								    return keypoints_list


								def unwrap_keypoints(keypoints_list):


								    keypoints = []


								    for temp in keypoints_list:

								        point = cv2.KeyPoint(

								            x=temp['pt'][0],

								            y=temp['pt'][1],

								            size=temp['size'],

								            angle=temp['angle'],

								            octave=temp['octave'],

								            response=temp['response'],

								            class_id=temp['class_id']

								        )

								        keypoints.append(point)


								    return tuple(keypoints)


								def cleanText(originalText):


								    safe_chars = string.ascii_letters + string.digits + " " + "_" + "/" + "&" + "$" + "#" "@" + "+" + "-" + "*" + "=" \

								                     ":" + ";" + "." "," + "?" + "!" + "{" + "}" + "[" + "]" + "(" + ")" + "%" + "`" + "~" + "^" + "|" + "<" + ">"


								    for index, text in enumerate(originalText):


								        originalText[index] = ''.join([char if char in safe_chars else '' for char in text])


								    return originalText


								def cleanLink(originalLink):


								    safe_chars = string.ascii_letters + string.digits


								    originalLink = ''.join([char if char in safe_chars else '' for char in originalLink])


								    return originalLink


								def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nombre, CVE, MS, category, describe,

								                     views, reviews, rating_item, addDate, BTC, USD, EURO, sold, qLeft, shipFrom, shipTo, href, image, image_vendor):


								    rw = []


								    current_time = datetime.now()

								    day = current_time.strftime("%m/%d/%Y")

								    ahora = current_time.strftime("%I:%M:%S")


								    for n in range(nm):


								        lne = marketplace                                                       # 0

								        lne += ","

								        lne += "-1" if len(vendor) == 0 else vendor[n]                          # 1

								        lne += ","

								        lne += "-1" if len(rating_vendor) == 0 else rating_vendor[n]            # 2

								        lne += ","

								        lne += "-1" if len(success_vendor) == 0 else success_vendor[n]          # 3

								        lne += ","

								        lne += nombre[n]                                                        # 4

								        lne += ','

								        lne += "-1" if len(describe) == 0 else describe[n]                      # 5

								        lne += ","

								        lne += "-1" if len(CVE) == 0 else CVE[n]                                # 6

								        lne += ","

								        lne += "-1" if len(MS) == 0 else MS[n]                                  # 7

								        lne += ","

								        lne += "-1" if len(category) == 0 else category[n]                      # 8

								        lne += ","

								        lne += "-1" if len(views) == 0 else views[n]                            # 9

								        lne += ","

								        lne += "-1" if len(reviews) == 0 else reviews[n]                        # 10

								        lne += ","

								        lne += "-1" if len(rating_item) == 0 else rating_item[n]                # 11

								        lne += ","

								        lne += "-1" if len(addDate) == 0 else addDate[n]                        # 12

								        lne += ","

								        lne += "-1" if len(BTC) == 0 else BTC[n]                                # 13

								        lne += ","

								        lne += "-1" if len(USD) == 0 else USD[n]                                # 14

								        lne += ","

								        lne += "-1" if len(EURO) == 0 else EURO[n]                              # 15

								        lne += ","

								        lne += "-1" if len(sold) == 0 else sold[n]                              # 16

								        lne += ","

								        lne += "-1" if len(qLeft) == 0 else qLeft[n]                            # 17

								        lne += ","

								        lne += "-1" if len(shipFrom) == 0 else shipFrom[n]                      # 18

								        lne += ","

								        lne += "-1" if len(shipTo) == 0 else shipTo[n]                          # 19

								        lne += ","

								        lne += "-1" if len(image) == 0 else image[n]                            # 20

								        lne += ","

								        lne += "-1" if len(image_vendor) == 0 else image_vendor[n]              # 21

								        lne += ","

								        lne += "-1" if len(href) == 0 else href[n]                              # 22

								        lne += ","

								        lne += day + " " + ahora                                                # 23


								        rw.append(lne)


								    return rw


								def cleanString(originalString):

								    updated_string = originalString.replace(",", "")    #replace all commas

								    updated_string = updated_string.replace("\n", "")   #replace all newlines

								    updated_string = updated_string.replace("\t", "")   #replace all tabs

								    updated_string = updated_string.replace("\r", "")   #replace all carriage returns

								    updated_string = updated_string.replace("'", "^")   #replace all semicolons

								    updated_string = updated_string.replace(u"»", '')   #replace all arrows

								    updated_string = updated_string.replace("!", "")    #replace all exclamation points

								    updated_string = updated_string.replace(";", "")    #replace all exclamations


								    return updated_string


								def checkDateFormat(myString):

								    isDate = re.match('[0-1][0-9]\/[0-3][0-9]\/[1-2][0-9]{3}', myString)

								    return isDate


								def cleanNumbers(inputString):


								    reg_ex = re.compile(r'[^\d.]+')

								    updated_string = reg_ex.sub('', inputString)


								    return updated_string


								def aes_encryption(data_bytes):


								    encrypted_bytes = encryptCipher.encrypt(pad(data_bytes, BLOCK_SIZE))


								    return encrypted_bytes


								def aes_decryption(data_bytes):


								    decrypted_bytes = decryptCipher.decrypt(data_bytes)


								    return unpad(decrypted_bytes, BLOCK_SIZE)


								def encrypt_encode_image_to_base64(driver, xpath):


								    try:


								        img_element = driver.find_element(by=By.XPATH, value=xpath)

								        image_data = img_element.screenshot_as_png


								        encrypted_image = aes_encryption(image_data)

								        base64_image = base64.b64encode(encrypted_image)

								        enc_image_string = base64_image.decode('utf-8')


								        return enc_image_string


								    except Exception as e:

								        print(e)

								        pass


								    return None


								def decode_decrypt_image_in_base64(image_string):


								    try:


								        image_bytes = bytes(image_string, encoding='utf-8')

								        encrypted_bytes = base64.b64decode(image_bytes)

								        decrypted_image = aes_decryption(encrypted_bytes)

								        base64_image = base64.b64encode(decrypted_image)

								        dec_image_string = base64_image.decode('utf-8')


								        return dec_image_string


								    except Exception as e:

								        print(e)

								        pass


								    return None


								def replace_image_sources(driver, html_content):


								    tree = lxml.fromstring(html_content)


								    for picture_tag in tree.findall('.//picture'):

								        for source_tag in picture_tag.findall('.//source'):

								            picture_tag.remove(source_tag)


								    for img_tag in tree.findall('.//img'):


								        img_xpath = tree.getroottree().getpath(img_tag)


								        string_image = encrypt_encode_image_to_base64(driver, img_xpath)


								        if string_image:

								            img_tag.set('src', f'data:image/png;base64,{string_image}')

								        else:

								            img_tag.getparent().remove(img_tag)


								    modified_html = lxml.tostring(tree, encoding='utf-8').decode('utf-8')


								    return modified_html


								def cleanHTML(driver, html):


								    clean_html = replace_image_sources(driver, html)


								    formats = [

								        "jpg", "jpeg", "jfif", "pjpeg", "pjp",

								        "png", "apng", "svg", "bmp", "gif",

								        "avif", "webp", "ico", "cur", "tiff"

								    ]


								    # remove images

								    clean_html = re.sub(r"<svg[\s\S]*?svg>", "", clean_html)

								    for fmat in formats:

								        clean_html = re.sub(r"<object.*" + fmat + "[\s\S]*?object>", "", clean_html)

								    clean_html = re.sub(r"<canvas[\s\S]*?canvas>", "", clean_html)


								    # remove JavaScript

								    clean_html = re.sub(r"<script[\s\S]*?script>", "", clean_html)

								    clean_html = re.sub(r"<iframe[\s\S]*?iframe>", "", clean_html)

								    clean_html = re.sub(r"<object.*javascript[\s\S]*?object>", "", clean_html)

								    clean_html = re.sub(r"<aplet.*mayscript[\s\S]*?aplet>", "", clean_html)

								    clean_html = re.sub(r"<embed.*scriptable[\s\S]*?embed>", "", clean_html)


								    # image and JavaScript

								    clean_html = re.sub(r"<div[^>]*style=\"[^\"]*background-image[\s\S]*?div>", "", clean_html)


								    return clean_html


								def get_relative_url(target_url):

								    # Use a dummy base URL to handle both absolute and relative URLs

								    base_url = "http://dummybaseurl.com/"

								    absolute_url = urljoin(base_url, target_url)


								    # Parse the absolute URL

								    parsed_absolute_url = urlsplit(absolute_url)


								    # Extract the path and query from the absolute URL as the relative URL

								    return parsed_absolute_url.path + '?' + parsed_absolute_url.query \

								        if parsed_absolute_url.query else parsed_absolute_url.path