import os from parser import onniForums_description_parser from parser import onniForums_listing_parser from bs4 import BeautifulSoup baseUrl = './HTML_Pages/06222023/Listing/httponnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qdonionForumCardingFraud.html' with open(baseUrl, 'r') as file: testHTML = file.read() soup = BeautifulSoup(testHTML, 'html.parser') output = onniForums_listing_parser(soup) print(output) # all_descriptions = os.listdir("./HTML_Pages/06202023/Description/")[1:] # total = len(all_descriptions) # descriptions_with_unicode_error = 0 # print("\nTESTING DESCRIPTION PARSER:\n") # for desc in all_descriptions: # print(f"\nTesting: ./HTML_Pages/06202023/Description/{desc} \n") # try: # with open(f"./HTML_Pages/06202023/Description/{desc}", "r") as file: # test_html = file.read() # soup = BeautifulSoup(test_html, features="html.parser") # description_output = onniForums_description_parser(soup) # print(f"\nTopic name : {description_output[0]}") # print(f"Contents : {description_output[1]}") # print(f"Users : {description_output[2]}") # print(f"Dates posted: {description_output[3]}") # print(f"Feedbacks : {description_output[4]}") # print(f"Statuses : {description_output[5]}") # print(f"Reputations : {description_output[6]}") # print(f"Signatures : {description_output[7]}") # print(f"Interests : {description_output[8]}\n") # except UnicodeDecodeError: # descriptions_with_unicode_error += 1 # print(f"UnicodeDecodeError: the file `{desc}` cannot be decoded by Python!") # print("\nTESTING COMPLETE\n") # print(f"Number of descriptions : {total}") # print(f"Descriptions w/ errors : {descriptions_with_unicode_error}") # print(f"Failure percentage : {round(descriptions_with_unicode_error/total, 4) * 100}%\n")