From 709ca196cc642d8c6f6bd769c270ea59e9e53b12 Mon Sep 17 00:00:00 2001
From: Khoi <minhkhoitran2k3@gmail.com>
Date: Thu, 20 Jul 2023 15:10:39 -0700
Subject: [PATCH] Completed and tested parsers for Libre

---
 Forums/Libre/parser.py | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/Forums/Libre/parser.py b/Forums/Libre/parser.py
index 1991d7a..c951ad5 100644
--- a/Forums/Libre/parser.py
+++ b/Forums/Libre/parser.py
@@ -7,12 +7,14 @@ from datetime import timedelta
 import re
 
 # Here, we are importing BeautifulSoup to search through the HTML tree
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, ResultSet, Tag
 
 # This is the method to parse the Description Pages (one page to each topic in the Listing Pages)
 
 
-def libre_description_parser(soup):
+
+
+def libre_description_parser(soup: Tag):
     # Fields to be parsed
 
     topic = "-1"  # 0 *topic name
@@ -29,11 +31,34 @@ def libre_description_parser(soup):
 
     topic_found = soup.find("a", {"class": "link text-xl text-zinc-300"}).text
     topic = cleanString(topic_found.strip())
+    
+    original_post: Tag = soup.find("div", {"class": "flex items-start"})
+    
+    original_user = original_post.find("div", {"class": "info-p"}).find("a", {"class": "link"}).text
+    user.append(cleanString(original_user.replace("/u/", "").strip()))
+    
+    original_user_statistics: ResultSet[Tag] = original_post.find("div", {"class": "info-p"}).find_all("span")
+    
+    original_time = original_user_statistics[0].text[2:]
+    datetime_append = datetime.strptime(original_time, "%Y-%m-%d %H:%M:%S GMT")
+    addDate.append(datetime_append)
+    
+    original_karma = original_user_statistics[1].text[2]
+    reputation.append(cleanString(original_karma.strip()))
+    
+    original_content = soup.find("div", {"class": "content-p"}).text
+    post.append(cleanString(original_content.strip()))
+    
+
+    status.append("-1")
+    interest.append("-1")
+    sign.append("-1")
+    feedback.append("-1")
 
     # Finding the repeated tag that corresponds to the listing of posts
 
     # try:
-    posts = soup.find_all("div", {"class": "flex items-stretch"})
+    posts: ResultSet[Tag] = soup.find_all("div", {"class": "flex items-stretch"})
 
     # For each message (post), get all the fields we are interested to:
 
@@ -42,9 +67,8 @@ def libre_description_parser(soup):
 
         # Finding the author (user) of the post
 
-        user_name = ipost.find("a", {"class": "link"}).text
-        user_name_cleaned = user_name.split("/")[1]
-        user.append(cleanString(user_name_cleaned))  # Remember to clean the problematic characters
+        user_name = ipost.find("p", {"class": "text-zinc-400 text-justify"}).find("a", {"class": "link"}).text
+        user.append(cleanString(user_name.replace("/u/", "").strip()))  # Remember to clean the problematic characters
 
         status.append("-1")
 
@@ -64,6 +88,7 @@ def libre_description_parser(soup):
 
         date_posted = ipost.find("p", {"class": "text-zinc-400 text-justify"}).text
         date_time_cleaned = date_posted.replace(user_name, "")[3:-12]
+        print(date_time_cleaned)
         datetime_append = datetime.strptime(date_time_cleaned, "%Y-%m-%d %H:%M:%S GMT")
         addDate.append(datetime_append)
 
@@ -73,8 +98,6 @@ def libre_description_parser(soup):
 
         # Finding the user's signature
 
-        # signature = ipost.find('div', {"class": "post_wrapper"}).find('div', {"class": "moderatorbar"}).find('div', {"class": "signature"})
-
         sign.append("-1")
 
         # As no information about user's feedback was found, just assign "-1" to the variable