|
|
@ -73,13 +73,11 @@ def persist_data(url, row, cur): |
|
|
|
|
|
|
|
forum = create_forum(cur, row, url) |
|
|
|
|
|
|
|
board = create_board(cur, row, forum) |
|
|
|
author = create_author(cur, row, forum) |
|
|
|
|
|
|
|
author = create_user(cur, row, forum, 0) |
|
|
|
topic = create_topic(cur, forum, row, author) |
|
|
|
|
|
|
|
topic = create_topic(cur, row, forum, board, author) |
|
|
|
|
|
|
|
create_posts(cur, row, forum, board, topic) |
|
|
|
create_posts(cur, row, forum, topic) |
|
|
|
|
|
|
|
|
|
|
|
def incrementError(): |
|
|
@ -191,8 +189,9 @@ def persist_record(url, rec, cur, con, createLog, logFile, listingFile, descript |
|
|
|
if trace.find("already exists") == -1: |
|
|
|
incrementError() |
|
|
|
print(f"There was a problem to persist the files ({listingFile} + {descriptionFile}) in the database!") |
|
|
|
traceback.print_exc() |
|
|
|
if createLog: |
|
|
|
logFile.write(str(nError) + f"There was a problem to persist the files ({listingFile} + {descriptionFile}) in the database!\n") |
|
|
|
logFile.write(str(nError) + f". There was a problem to persist the files ({listingFile} + {descriptionFile}) in the database!\n") |
|
|
|
return False |
|
|
|
else: |
|
|
|
return True |
|
|
@ -223,7 +222,7 @@ def new_parse(forum, url, createLog): |
|
|
|
|
|
|
|
from Forums.Initialization.forums_mining import config, CURRENT_DATE |
|
|
|
|
|
|
|
print("Parsing The " + forum + " Forum and conduct data classification to store the information in the database.") |
|
|
|
print("Parsing the " + forum + " forum and conduct data classification to store the information in the database.") |
|
|
|
|
|
|
|
# Connecting to the database |
|
|
|
con = connectDataBase() |
|
|
@ -261,6 +260,7 @@ def new_parse(forum, url, createLog): |
|
|
|
parseDescriptionError = False |
|
|
|
persistDescriptionError = False |
|
|
|
moveDescriptionError = False |
|
|
|
findDescriptionError = False |
|
|
|
|
|
|
|
rw = [] |
|
|
|
|
|
|
@ -272,6 +272,8 @@ def new_parse(forum, url, createLog): |
|
|
|
|
|
|
|
if doDescription: |
|
|
|
|
|
|
|
nFound = 0 |
|
|
|
|
|
|
|
for rec in rw: |
|
|
|
|
|
|
|
rec = rec.split(',') |
|
|
@ -280,6 +282,9 @@ def new_parse(forum, url, createLog): |
|
|
|
|
|
|
|
# Reading the associated description Html Pages |
|
|
|
descriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", descriptionPattern)) |
|
|
|
|
|
|
|
nFound += len(descriptions) |
|
|
|
|
|
|
|
for descriptionIndex, descriptionFile in enumerate(descriptions): |
|
|
|
|
|
|
|
print("Reading description folder of '" + forum + "', file '" + os.path.basename( |
|
|
@ -331,7 +336,19 @@ def new_parse(forum, url, createLog): |
|
|
|
else: |
|
|
|
moveDescriptionError = True |
|
|
|
|
|
|
|
if not (readDescriptionError or parseDescriptionError or persistDescriptionError or moveDescriptionError): |
|
|
|
if not (nFound > 0): |
|
|
|
|
|
|
|
findDescriptionError = True |
|
|
|
|
|
|
|
incrementError() |
|
|
|
print(f"There was a problem to locate the file(s) for {listingFile} in the Description section!") |
|
|
|
if createLog: |
|
|
|
logFile.write( |
|
|
|
str(nError) + f". There was a problem to locate the file(s) for {listingFile}" |
|
|
|
f" in the Description section!\n") |
|
|
|
|
|
|
|
if not (readDescriptionError or parseDescriptionError or persistDescriptionError |
|
|
|
or moveDescriptionError or findDescriptionError): |
|
|
|
|
|
|
|
# move listing files of completed folder |
|
|
|
move_file(listingFile, createLog, logFile) |
|
|
|