Browse Source

Db connection.

main
ericssonmarin-cpp 1 year ago
parent
commit
f375e0860f
1 changed files with 213 additions and 72 deletions
  1. +213
    -72
      Forums/DB_Connection/db_connection.py

+ 213
- 72
Forums/DB_Connection/db_connection.py View File

@ -7,9 +7,9 @@ import configparser
def connectDataBase():
try:
from Forums.Initialization.forums_mining import config
from Forums.Initialization.forums_mining import config
try:
ip = config.get('PostgreSQL', 'ip')
username = config.get('PostgreSQL', 'username')
@ -20,7 +20,7 @@ def connectDataBase():
except:
print ("Data base (darkweb_forums) not found.")
print ("Data base " + config.get('PostgreSQL', 'database') + " not found.")
raise SystemExit
@ -63,15 +63,34 @@ def verifyBoard(cur, forum, nameBoard):
print (trace)
def verifyTopic(cur, forumId, boardId, authorId, titleTopic):
def verifyTopic(cur, forumId, authorId, titleTopic):
try:
cur.execute("select topic_id from topics where forum_id = %(forumId)s and board_id = %(boardId)s and "
"author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1", {'forumId': forumId,
'boardId': boardId,
'authorId': authorId,
'titleTopic': titleTopic})
cur.execute("select topic_id from topics where forum_id = %(forumId)s and author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1",
{'forumId': forumId, 'authorId': authorId, 'titleTopic': titleTopic})
recset = cur.fetchall()
if recset:
return recset[0][0]
else:
return 0
except:
trace = traceback.format_exc()
print (trace)
def verifyPost(cur, topicId, userId, dateAdded):
try:
cur.execute("select post_id from posts where topic_id = %(topicId)s and "
"user_id = %(userId)s and dateadded_post = %(dateAdded)s limit 1", {'topicId': topicId,
'userId': userId,
'dateAdded': dateAdded})
recset = cur.fetchall()
@ -167,7 +186,7 @@ def getLastUser(cur):
try:
cur.execute("select user_id from users order by user_id desc")
cur.execute("select user_id from users order by user_id desc limit 1")
recset = cur.fetchall()
@ -182,12 +201,11 @@ def getLastUser(cur):
print (trace)
'''
def getLastPost(cur):
try:
cur.execute("select id from Posts order by id desc limit 1")
cur.execute("select post_id from posts order by post_id desc limit 1")
recset = cur.fetchall()
@ -200,7 +218,6 @@ def getLastPost(cur):
trace = traceback.format_exc()
print (trace)
'''
def create_forum(cur, row, url):
@ -220,46 +237,86 @@ def create_forum(cur, row, url):
return forumId
def create_board(cur, row, forumId):
def create_topic(cur, forumId, row, authorId):
topicId = verifyTopic(cur, forumId, authorId, row[3])
if not topicId:
topicId = int(getLastTopic(cur) + 1)
newTopic = True
else:
newTopic = False
boardId = verifyBoard(cur, forumId, row[1])
if newTopic:
if not boardId:
sql = "Insert into topics (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
"href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, %s, " \
"%s, %s, %s, %s, %s)"
boardId = int(getLastBoard(cur) + 1)
recset = [topicId, forumId, authorId,
row[3], row[1],
row[4] if row[4] != '-1' else None,
row[5] if row[5] != '-1' else None,
row[6] if row[6] != '-1' else None,
row[7] if row[7] != '-1' else None,
row[8],
row[17]]
cur.execute(sql, recset)
sql = "Insert into boards (board_id, forum_id, name_board, dateinserted_board) Values (%s, %s, %s, %s)"
else:
recset = [boardId, forumId, row[1], row[8]]
# Tracking potential topic changes
sql = "select * from topics where topic_Id = %(topicId)s"
cur.execute(sql, {'topicId': topicId})
cur.execute(sql, recset)
recset = cur.fetchall()
return boardId
if (str(recset[0][4]) != str(row[1]) or str(recset[0][5]) != str(row[4]) or str(recset[0][6]) != str(row[5])): # there was a change in the topic information
sql = "Insert into topics_history (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
"href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, " \
"%s, %s, %s, %s, %s, %s)"
recset = [topicId, forumId, authorId,
recset[0][3],
recset[0][4],
recset[0][5],
recset[0][6],
recset[0][7],
recset[0][8],
recset[0][9],
recset[0][10]]
cur.execute(sql, recset)
sql = "Update topics set board_topic = %(board_topic)s, views_topic = %(views_topic)s, posts_topic = %(posts_topic)s, " \
"dateinserted_topic = %(dateinserted_topic)s where topic_id = %(topicId)s"
cur.execute(sql, {'board_topic': row[1],
'views_topic': row[4] if row[4] != '-1' else None,
'posts_topic': row[5] if row[5] != '-1' else None,
'dateinserted_topic': row[8],
'topicId': topicId})
return topicId
def create_topic(cur, row, forumId, boardId, authorId):
topicId = verifyTopic(cur, forumId, boardId, authorId, row[3])
def create_author(cur, row, forumId):
if not topicId:
userId = verifyUser(cur, row[2], forumId)
topicId = int(getLastTopic(cur) + 1)
if not userId:
sql = "Insert into topics (topic_id, forum_id, board_id, author_id, title_topic, views_topic, posts_topic, " \
"href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, %s, " \
"%s, %s, %s, %s, %s)"
userId = int(getLastUser(cur) + 1)
sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
"signature_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
recset = [userId, forumId,
row[2], 'Nan', 'Nan', 'Nan', 'Nan', #telling the create_posts function to not track changes here
row[8]]
recset = [topicId, forumId, boardId, authorId,
row[3],
row[4] if row[4] != '-1' else None,
row[5] if row[5] != '-1' else None,
row[6] if row[6] != '-1' else None,
row[7] if row[7] != '-1' else None,
row[8],
row[17]]
cur.execute(sql, recset)
return topicId
return userId
def create_user(cur, row, forumId, index):
@ -267,8 +324,12 @@ def create_user(cur, row, forumId, index):
userId = verifyUser(cur, row[9][index], forumId)
if not userId:
userId = int(getLastUser(cur) + 1)
newUser = True
else:
newUser = False
if newUser:
sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
"signature_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
@ -283,30 +344,97 @@ def create_user(cur, row, forumId, index):
cur.execute(sql, recset)
else:
# Tracking potential user changes
sql = "select * from users where user_id = %(userId)s"
cur.execute(sql, {'userId': userId})
recset = cur.fetchall()
if (str(recset[0][3]) != str(row[10][index]) or str(recset[0][4]) != str(row[11][index]) or
str(recset[0][5]) != str(row[12][index] if row[12][index] != '-1' else None) or str(recset[0][6]) != str(row[13][index] if row[13][index] != '-1' else None)): # there was a change in the user information
if (str(recset[0][3]) != 'Nan' or str(recset[0][4]) != 'Nan' or str(recset[0][5]) != 'Nan' or str(recset[0][6]) != 'Nan'):
sql = "Insert into users_history (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
"signature_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
recset = [userId, forumId,
recset[0][2],
recset[0][3],
recset[0][4],
recset[0][5],
recset[0][6],
recset[0][7]]
cur.execute(sql, recset)
sql = "Update users set status_user = %(status_user)s, reputation_user = %(reputation_user)s, " \
"interest_user = %(interest_user)s, signature_user = %(signature_user)s, dateinserted_user = %(dateinserted_user)s " \
"where user_id = %(userId)s"
cur.execute(sql, {'status_user': row[10][index] if row[10][index] != '-1' else None,
'reputation_user': row[11][index] if row[11][index] != '-1' else None,
'interest_user': row[12][index] if row[12][index] != '-1' else None,
'signature_user': row[13][index] if row[13][index] != '-1' else None,
'dateinserted_user': row[8] if row[8] != '-1' else None,
'userId': userId})
return userId
def create_posts(cur, row, forumId, boardId, topicId):
def create_posts(cur, row, forumId, topicId):
if row[9] != "-1":
for i in range(len(row[9])):
if i != 0:
userId = create_user(cur, row, forumId, i)
userId = create_user(cur, row, forumId, i)
postId = verifyPost(cur, topicId, userId, row[16][i])
if not postId:
postId = int(getLastPost(cur) + 1)
sql = "Insert into posts (post_id, topic_id, user_id, content_post, feedback_post, " \
"dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s)"
recset = [postId, topicId, userId,
row[14][i] if row[14][i] != '-1' else None,
row[15][i] if row[15][i] != '-1' else None,
row[16][i] if row[16][i] != '-1' else None,
row[8]]
cur.execute(sql, recset)
else:
userId = verifyUser(cur, row[2], forumId)
sql = "Insert into posts (forum_id, board_id, topic_id, user_id, content_post, feedback_post, " \
"dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
# Tracking potential post changes
sql = "select * from posts where post_id = %(postId)s"
cur.execute(sql, {'postId': postId})
recset = cur.fetchall()
if (str(recset[0][3]) != str(row[14][i]) or str(recset[0][4]) != str(row[15][i] if row[15][i] != '-1' else None)): # there was a change in the post information
sql = "Insert into posts_history (post_id, topic_id, user_id, content_post, feedback_post, " \
"dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s)"
recset = [postId, topicId, userId,
recset[0][3],
recset[0][4],
recset[0][5],
recset[0][6]]
recset = [forumId, boardId, topicId, userId,
row[14][i] if row[14][i] != '-1' else None,
row[15][i] if row[15][i] != '-1' else None,
row[16][i] if row[16][i] != '-1' else None,
row[8]]
cur.execute(sql, recset)
cur.execute(sql, recset)
sql = "Update posts set content_post = %(content_post)s, feedback_post = %(feedback_post)s, dateinserted_post = %(dateinserted_post)s " \
"where post_id = %(postId)s"
cur.execute(sql, {'content_post': row[14][i] if row[14][i] != '-1' else None,
'feedback_post': row[15][i] if row[15][i] != '-1' else None,
'dateinserted_post': row[8],
'postId': postId})
def create_database(cur, con):
@ -314,14 +442,11 @@ def create_database(cur, con):
try:
sql = "create table forums (forum_id integer NOT NULL, name_forum character varying(255) NOT NULL, url_forum " \
"character varying(255) null, dateinserted_forum timestamp(6) with time zone NOT NULL, constraint " \
"character varying(255) NOT null, dateinserted_forum timestamp(6) with time zone NOT NULL, constraint " \
"forums_pk primary key (forum_id))"
cur.execute(sql)
sql = "create table boards (board_id integer NOT NULL, forum_id integer NOT NULL, name_board character " \
"varying(255) NOT NULL, dateinserted_board timestamp(6) with time zone NOT NULL, constraint boards_pk " \
"primary key (board_id), constraint boards_forum_id_fkey foreign key (forum_id) references forums (" \
"forum_id))"
sql = "create unique index unique_forum ON forums USING btree (name_forum ASC NULLS LAST)"
cur.execute(sql)
sql = "create table users (user_id integer NOT NULL, forum_id integer NOT NULL, name_user character varying(" \
@ -331,6 +456,9 @@ def create_database(cur, con):
"constraint users_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
cur.execute(sql)
sql = "create unique index unique_user ON users USING btree (forum_id ASC NULLS LAST, name_user ASC NULLS LAST)"
cur.execute(sql)
sql = "create table users_history(user_id integer NOT NULL, forum_id integer NOT NULL, name_user character " \
"varying(255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) " \
"null, interest_user character varying(5000) null, signature_user character varying(1000) null, " \
@ -340,37 +468,50 @@ def create_database(cur, con):
"forum_id))"
cur.execute(sql)
sql = "create table topics(topic_id integer NOT NULL, forum_id integer NOT NULL, board_id integer NOT NULL, " \
"author_id integer NOT NULL, title_topic character varying(255) NOT NULL, views_topic integer null, " \
"posts_topic integer null, href_topic character varying(255) null, dateadded_topic timestamp(6) with " \
sql = "create table topics(topic_id integer NOT NULL, forum_id integer NOT NULL, author_id integer NOT NULL, " \
"title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, views_topic integer null, " \
"posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) with " \
"time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic double " \
"precision NOT NULL, constraint topics_pk primary key (topic_id), constraint topics_author_id_fkey " \
"foreign key (author_id) references users (user_id), constraint topics_board_id_fkey foreign key (" \
"board_id) references boards (board_id), constraint topics_forum_id_fkey foreign key (forum_id) " \
"references forums (forum_id))"
"foreign key (author_id) references users (user_id), constraint topics_forum_id_fkey foreign key (" \
"forum_id) references forums (forum_id))"
cur.execute(sql)
sql = "create unique index unique_topic ON topics USING btree (forum_id ASC NULLS LAST, author_id ASC NULLS LAST, " \
"title_topic ASC NULLS LAST)"
cur.execute(sql)
sql = "create table topics_history(topic_id integer NOT NULL, forum_id integer NOT NULL, board_id integer NOT " \
"NULL, author_id integer NOT NULL, title_topic character varying(255) NOT NULL, views_topic integer " \
"null, posts_topic integer null, href_topic character varying(255) null, dateadded_topic timestamp(6) " \
sql = "create table topics_history(topic_id integer NOT NULL, forum_id integer NOT NULL, author_id integer NOT NULL, " \
"title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, views_topic integer " \
"null, posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) " \
"with time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic " \
"double precision NOT NULL, constraint topics_history_pk primary key (topic_id, dateinserted_topic), " \
"constraint topics_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
"constraint topics_history_author_id_fkey foreign key (author_id) references users (user_id), " \
"constraint topics_history_board_id_fkey foreign key (board_id) references boards (board_id), " \
"constraint topics_history_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
"constraint topics_history_board_id_fkey foreign key (forum_id) references forums (forum_id))"
cur.execute(sql)
sql = "create table posts(forum_id integer NOT NULL, board_id integer NOT NULL, topic_id integer NOT NULL, " \
"user_id integer NOT NULL, content_post character varying(100000) null, feedback_post integer null, " \
sql = "create table posts(post_id integer NOT NULL, topic_id integer NOT NULL, " \
"user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
"dateadded_post timestamp(6) with time zone NOT NULL, dateinserted_post timestamp(6) with time zone NOT " \
"NULL, constraint posts_pk primary key (forum_id, board_id, topic_id, user_id, dateadded_post), " \
"constraint posts_author_id_fkey foreign key (user_id) references users (user_id), constraint " \
"posts_board_id_fkey foreign key (board_id) references boards (board_id), constraint " \
"posts_forum_id_fkey foreign key (forum_id) references forums (forum_id), constraint " \
"NULL, constraint posts_pk primary key (post_id), " \
"constraint posts_user_id_fkey foreign key (user_id) references users (user_id), constraint " \
"posts_topic_id_fkey foreign key (topic_id) references topics (topic_id))"
cur.execute(sql)
sql = "create unique index unique_post ON posts USING btree (topic_id ASC NULLS LAST, user_id ASC NULLS LAST, " \
"dateadded_post ASC NULLS LAST)"
cur.execute(sql)
sql = "create table posts_history(post_id integer NOT NULL, topic_id integer NOT NULL, " \
"user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
"dateadded_post timestamp(6) with time zone NOT NULL, dateinserted_post timestamp(6) with time zone NOT " \
"NULL, constraint posts_history_pk primary key (post_id, dateinserted_post), " \
"constraint posts_history_user_id_fkey foreign key (user_id) references users (user_id), " \
"constraint posts_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
"constraint posts_history_post_id_fkey foreign key (post_id) references posts (post_id))"
cur.execute(sql)
con.commit()
except:


Loading…
Cancel
Save