this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
1.9 KiB

  1. import os
  2. from Forums.OnniForums.parser import onniForums_description_parser
  3. from Forums.OnniForums.parser import onniForums_listing_parser
  4. from bs4 import BeautifulSoup
  5. baseUrl = './HTML_Pages/06272023/Listing/httponnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qdonionForumCardingFraud.html'
  6. with open(baseUrl, 'r') as file:
  7. testHTML = file.read()
  8. soup = BeautifulSoup(testHTML, 'html.parser')
  9. output = onniForums_listing_parser(soup)
  10. print(output)
  11. all_descriptions = os.listdir("./HTML_Pages/06272023/Description/")[1:]
  12. total = len(all_descriptions)
  13. descriptions_with_unicode_error = 0
  14. print("\nTESTING DESCRIPTION PARSER:\n")
  15. for desc in all_descriptions:
  16. print(f"\nTesting: ./HTML_Pages/06272023/Description/{desc} \n")
  17. try:
  18. with open(f"./HTML_Pages/06272023/Description/{desc}", "r") as file:
  19. test_html = file.read()
  20. soup = BeautifulSoup(test_html, features="html.parser")
  21. description_output = onniForums_description_parser(soup)
  22. print(f"\nTopic name : {description_output[0]}")
  23. print(f"Contents : {description_output[1]}")
  24. print(f"Users : {description_output[2]}")
  25. print(f"Dates posted: {description_output[3]}")
  26. print(f"Feedbacks : {description_output[4]}")
  27. print(f"Statuses : {description_output[5]}")
  28. print(f"Reputations : {description_output[6]}")
  29. print(f"Signatures : {description_output[7]}")
  30. print(f"Interests : {description_output[8]}\n")
  31. except UnicodeDecodeError:
  32. descriptions_with_unicode_error += 1
  33. print(f"UnicodeDecodeError: the file `{desc}` cannot be decoded by Python!")
  34. print("\nTESTING COMPLETE\n")
  35. print(f"Number of descriptions : {total}")
  36. print(f"Descriptions w/ errors : {descriptions_with_unicode_error}")
  37. print(f"Failure percentage : {round(descriptions_with_unicode_error/total, 4) * 100}%\n")