diff --git a/Forums/Initialization/prepare_parser.py b/Forums/Initialization/prepare_parser.py index ac1523f..3d2f388 100644 --- a/Forums/Initialization/prepare_parser.py +++ b/Forums/Initialization/prepare_parser.py @@ -97,6 +97,7 @@ def read_file(filePath, createLog, logFile): html = codecs.open(filePath.strip('\n'), encoding='utf8') soup = BeautifulSoup(html, "html.parser") html.close() + time.sleep(0.01) # making sure the file is closed before returning soup object return soup except: @@ -104,6 +105,7 @@ def read_file(filePath, createLog, logFile): html = open(filePath.strip('\n')) soup = BeautifulSoup(html, "html.parser") html.close() + time.sleep(0.01) # making sure the file is closed before returning soup object return soup except: @@ -212,7 +214,7 @@ def persist_record(url, rec, cur, con, createLog, logFile, listingFile, descript def move_file(filePath, createLog, logFile): source = filePath - destination = filePath.replace(os.path.basename(filePath), "") + r'Read/' + os.path.basename(filePath) + destination = filePath.replace(os.path.basename(filePath), "") + 'Read\\' + os.path.basename(filePath) try: shutil.move(source, destination, shutil.copy2) @@ -250,7 +252,7 @@ def new_parse(forum, url, createLog): # Creating the tables (The database should be created manually) create_database(cur, con) - mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages") + mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums\\" + forum + "\\HTML_Pages") # Creating the log file for each Forum if createLog: diff --git a/Forums/Utilities/utilities.py b/Forums/Utilities/utilities.py index 741ec1f..2a5e2f0 100644 --- a/Forums/Utilities/utilities.py +++ b/Forums/Utilities/utilities.py @@ -353,29 +353,24 @@ def encrypt_encode_image_to_base64(driver, xpath): return None -def decode_decrypt_image_in_base64(html_content): +def decode_decrypt_image_in_base64(string_image): - soup = BeautifulSoup(html_content, 'html.parser') - - for img_tag in soup.find_all('img'): - - src_attr = img_tag.get('src') + try: - if src_attr and src_attr.startswith('data:image'): + base64_image = bytes(string_image, encoding='utf-8') + encrypted_image = base64.b64decode(base64_image) + decrypted_image = aes_decryption(encrypted_image) - try: + im = Image.open(io.BytesIO(decrypted_image)) + im.show() - string_image = src_attr.split('base64,')[-1] - base64_image = bytes(string_image, encoding='utf-8') - encrypted_image = base64.b64decode(base64_image) - decrypted_image = aes_decryption(encrypted_image) + return decrypted_image - im = Image.open(io.BytesIO(decrypted_image)) - im.show() + except Exception as e: + print(e) + pass - except Exception as e: - print(e) - pass + return None def replace_image_sources(driver, html_content): diff --git a/MarketPlaces/DB_Connection/db_connection.py b/MarketPlaces/DB_Connection/db_connection.py index 03de3d2..8769869 100644 --- a/MarketPlaces/DB_Connection/db_connection.py +++ b/MarketPlaces/DB_Connection/db_connection.py @@ -229,7 +229,7 @@ def create_vendor(cur, row, marketId): recset = cur.fetchall() - decode_decrypt_image_in_base64(recset[0][5]) + # decode_decrypt_image_in_base64(recset[0][5]) if (str(recset[0]['rating_vendor']) != str(row[2] if row[2] != '-1' else None) or # there was a change in the vendor information str(recset[0]['successfultransactions_vendor']) != str(row[3] if row[3] != '-1' else None) or diff --git a/MarketPlaces/Initialization/markets_mining.py b/MarketPlaces/Initialization/markets_mining.py index 21888fc..5ec07b6 100644 --- a/MarketPlaces/Initialization/markets_mining.py +++ b/MarketPlaces/Initialization/markets_mining.py @@ -89,7 +89,7 @@ def opentor(): if __name__ == '__main__': - # opentor() + opentor() mktsList = getMarkets() diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py index b94723f..9cfe2a9 100644 --- a/MarketPlaces/Initialization/prepare_parser.py +++ b/MarketPlaces/Initialization/prepare_parser.py @@ -107,6 +107,7 @@ def read_file(filePath, createLog, logFile): html = codecs.open(filePath.strip('\n'), encoding='utf8') soup = BeautifulSoup(html, "html.parser") html.close() + time.sleep(0.01) # making sure the file is closed before returning soup object return soup except: @@ -114,6 +115,7 @@ def read_file(filePath, createLog, logFile): html = open(filePath.strip('\n')) soup = BeautifulSoup(html, "html.parser") html.close() + time.sleep(0.01) # making sure the file is closed before returning soup object return soup except: @@ -271,7 +273,7 @@ def persist_record(url, rec, cur, con, createLog, logFile, listingFile, descript def move_file(filePath, createLog, logFile): source = filePath - destination = filePath.replace(os.path.basename(filePath), "") + r'Read/' + os.path.basename(filePath) + destination = filePath.replace(os.path.basename(filePath), "") + 'Read\\' + os.path.basename(filePath) try: shutil.move(source, destination, shutil.copy2) @@ -307,7 +309,7 @@ def new_parse(marketPlace, url, createLog): # Creating the tables (The database should be created manually) create_database(cur, con) - mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + marketPlace + "/HTML_Pages") + mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces\\" + marketPlace + "\\HTML_Pages") # Creating the log file for each Forum if createLog: