|
@ -38,14 +38,20 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup): |
|
|
datetime_obj = datetime.strptime(datetime_string, "%Y-%m-%dT%H:%M:%S") |
|
|
datetime_obj = datetime.strptime(datetime_string, "%Y-%m-%dT%H:%M:%S") |
|
|
addDate.append(datetime_obj) |
|
|
addDate.append(datetime_obj) |
|
|
|
|
|
|
|
|
question_user_status = question.find("span", {"class": "qa-q-view-who-title"}).text |
|
|
|
|
|
status.append(cleanString(question_user_status.strip())) |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
question_user_status = question.find("span", {"class": "qa-q-view-who-title"}).text |
|
|
|
|
|
status.append(cleanString(question_user_status.strip())) |
|
|
|
|
|
except AttributeError: |
|
|
|
|
|
status.append("-1") |
|
|
|
|
|
|
|
|
question_user_karma = question.find("span", {"class": "qa-q-view-who-points-data"}).text |
|
|
|
|
|
# Convert karma to pure numerical string |
|
|
|
|
|
if question_user_karma.find("k") > -1: |
|
|
|
|
|
question_user_karma = str(float(question_user_karma.replace("k", "")) * 1000) |
|
|
|
|
|
reputation.append(cleanString(question_user_karma.strip())) |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
question_user_karma = question.find("span", {"class": "qa-q-view-who-points-data"}).text |
|
|
|
|
|
# Convert karma to pure numerical string |
|
|
|
|
|
if question_user_karma.find("k") > -1: |
|
|
|
|
|
question_user_karma = str(float(question_user_karma.replace("k", "")) * 1000) |
|
|
|
|
|
reputation.append(cleanString(question_user_karma.strip())) |
|
|
|
|
|
except AttributeError: |
|
|
|
|
|
reputation.append("-1") |
|
|
|
|
|
|
|
|
question_content = question.find("div", {"class": "qa-q-view-content qa-post-content"}).text |
|
|
question_content = question.find("div", {"class": "qa-q-view-content qa-post-content"}).text |
|
|
post.append(cleanString(question_content.strip())) |
|
|
post.append(cleanString(question_content.strip())) |
|
@ -71,14 +77,20 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup): |
|
|
post_data = replies.find("div", {"class": "qa-a-item-content qa-post-content"}).find("div",{"itemprop":"text"}).text |
|
|
post_data = replies.find("div", {"class": "qa-a-item-content qa-post-content"}).find("div",{"itemprop":"text"}).text |
|
|
post.append(cleanString(post_data.strip())) |
|
|
post.append(cleanString(post_data.strip())) |
|
|
|
|
|
|
|
|
user_reputations = replies.find("span", {"class", "qa-a-item-who-title"}).text |
|
|
|
|
|
status.append(cleanString(user_reputations.strip())) |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
user_reputations = replies.find("span", {"class", "qa-a-item-who-title"}).text |
|
|
|
|
|
status.append(cleanString(user_reputations.strip())) |
|
|
|
|
|
except AttributeError: |
|
|
|
|
|
status.append("-1") |
|
|
|
|
|
|
|
|
karma = replies.find("span", {"class": "qa-a-item-who-points-data"}).text |
|
|
|
|
|
# Convert karma to pure numerical string |
|
|
|
|
|
if karma.find("k") > -1: |
|
|
|
|
|
karma = str(float(karma.replace("k", "")) * 1000) |
|
|
|
|
|
reputation.append(cleanString(karma.strip())) |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
karma = replies.find("span", {"class": "qa-a-item-who-points-data"}).text |
|
|
|
|
|
# Convert karma to pure numerical string |
|
|
|
|
|
if karma.find("k") > -1: |
|
|
|
|
|
karma = str(float(karma.replace("k", "")) * 1000) |
|
|
|
|
|
reputation.append(cleanString(karma.strip())) |
|
|
|
|
|
except AttributeError: |
|
|
|
|
|
reputation.append("-1") |
|
|
|
|
|
|
|
|
feedback.append("-1") |
|
|
feedback.append("-1") |
|
|
sign.append("-1") |
|
|
sign.append("-1") |
|
@ -114,7 +126,7 @@ def HiddenAnswers_listing_parser(soup: BeautifulSoup): |
|
|
topic_of_query = queries.find("div", {"class": "qa-q-item-title"}).find("a").text |
|
|
topic_of_query = queries.find("div", {"class": "qa-q-item-title"}).find("a").text |
|
|
topic.append(cleanString(topic_of_query.strip())) |
|
|
topic.append(cleanString(topic_of_query.strip())) |
|
|
|
|
|
|
|
|
author = queries.find("span", {"class": "qa-q-item-who-data"}).find("a").text |
|
|
|
|
|
|
|
|
author = queries.find("span", {"class": "qa-q-item-who-data"}).text |
|
|
user.append(cleanString(author.strip())) |
|
|
user.append(cleanString(author.strip())) |
|
|
|
|
|
|
|
|
num_answers = queries.find("span", {"class": "qa-a-count-data"}).text |
|
|
num_answers = queries.find("span", {"class": "qa-a-count-data"}).text |
|
@ -124,10 +136,19 @@ def HiddenAnswers_listing_parser(soup: BeautifulSoup): |
|
|
|
|
|
|
|
|
date_posted = queries.find("span", {"class": "qa-q-item-when-data"}).text |
|
|
date_posted = queries.find("span", {"class": "qa-q-item-when-data"}).text |
|
|
|
|
|
|
|
|
if date_posted.find("day") > 0: |
|
|
|
|
|
datetime_obj = datetime.now() - timedelta(days=1) |
|
|
|
|
|
|
|
|
if date_posted.find("minute") > 0: |
|
|
|
|
|
minutes_ago = date_posted.split(' ')[0] |
|
|
|
|
|
datetime_obj = datetime.now() - timedelta(minutes=int(minutes_ago)) |
|
|
|
|
|
|
|
|
|
|
|
elif date_posted.find("day") > 0: |
|
|
|
|
|
days_ago = date_posted.split(' ')[0] |
|
|
|
|
|
datetime_obj = datetime.now() - timedelta(days=int(days_ago)) |
|
|
|
|
|
|
|
|
|
|
|
elif bool(re.search(r"\d{4}", date_posted)): |
|
|
|
|
|
datetime_obj = datetime.strptime(date_posted, "%b %d, %Y") |
|
|
|
|
|
|
|
|
else: |
|
|
else: |
|
|
datetime_obj = datetime.strptime(f"{date_posted} {date.today().year}", "%b %d %Y") |
|
|
|
|
|
|
|
|
datetime_obj = datetime.strptime(f"{date_posted}, {date.today().year}", "%b %d, %Y") |
|
|
addDate.append(datetime_obj) |
|
|
addDate.append(datetime_obj) |
|
|
#this link will be cleaned |
|
|
#this link will be cleaned |
|
|
|
|
|
|
|
|