|
|
- __author__ = 'DarkWeb'
-
- import psycopg2
- import traceback
- from Forums.Utilities.utilities import *
-
-
- def connectDataBase():
-
- from Forums.Initialization.forums_mining import config
-
- try:
-
- ip = config.get('PostgreSQL', 'ip')
- username = config.get('PostgreSQL', 'username')
- password = config.get('PostgreSQL', 'password')
- database = config.get('PostgreSQL', 'database')
-
- return psycopg2.connect(host=ip, user=username, password=password, dbname=database)
-
- except:
-
- print ("Data base " + config.get('PostgreSQL', 'database') + " not found.")
- raise SystemExit
-
-
- def verifyForum(cur, nameForum):
-
- try:
-
- cur.execute("lock table forums IN ACCESS EXCLUSIVE MODE")
-
- cur.execute("select forum_id from forums where name_forum = %(nameForum)s limit 1", {'nameForum': nameForum})
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['forum_id']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
-
- def verifyTopic(cur, forumId, authorId, titleTopic):
-
- try:
-
- cur.execute("lock table topics IN ACCESS EXCLUSIVE MODE")
-
- cur.execute("select topic_id from topics where forum_id = %(forumId)s and author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1",
- {'forumId': forumId, 'authorId': authorId, 'titleTopic': titleTopic})
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['topic_id']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
-
- def verifyPost(cur, topicId, userId, dateAdded):
-
- try:
-
- cur.execute("lock table posts IN ACCESS EXCLUSIVE MODE")
-
- cur.execute("select post_id from posts where topic_id = %(topicId)s and "
- "user_id = %(userId)s and dateadded_post = %(dateAdded)s limit 1", {'topicId': topicId,
- 'userId': userId,
- 'dateAdded': dateAdded})
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['post_id']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
-
- def verifyUser(cur, nameUser, forumId):
-
- try:
-
- cur.execute("lock table users IN ACCESS EXCLUSIVE MODE")
-
- cur.execute("select user_id from users where name_user = %(nameUser)s and forum_id = %(forumId)s limit 1",
- {'nameUser': nameUser, 'forumId': forumId})
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['user_id']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
-
- def getLastForum(cur):
-
- try:
-
- cur.execute("select forum_id from forums order by forum_id desc limit 1")
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['forum_id']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
-
- def getLastTopic(cur):
-
- try:
-
- cur.execute("select topic_id from topics order by topic_id desc limit 1")
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['topic_id']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
-
- def getLastUser(cur):
-
- try:
-
- cur.execute("select user_id from users order by user_id desc limit 1")
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['user_id']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
- def getLastUserVersion(cur, userId):
-
- try:
-
- cur.execute("select version_user from users_history where user_id = %(userId)s order by version_user desc limit 1", {'userId': userId})
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['version_user']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
- def getLastTopicVersion(cur, topicId):
-
- try:
-
- cur.execute("select version_topic from topics_history where topic_id = %(topicId)s order by version_topic desc limit 1", {'topicId': topicId})
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['version_topic']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
- def getLastPostVersion(cur, postId):
-
- try:
-
- cur.execute("select version_post from posts_history where post_id = %(postId)s order by version_post desc limit 1", {'postId': postId})
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['version_post']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
- def getLastPost(cur):
-
- try:
-
- cur.execute("select post_id from posts order by post_id desc limit 1")
-
- recset = cur.fetchall()
-
- if recset:
- return recset[0]['post_id']
- else:
- return 0
-
- except:
-
- trace = traceback.format_exc()
- print (trace)
-
- def create_forum(cur, row, url):
-
- forumId = verifyForum(cur, row[0])
-
- if not forumId:
-
- forumId = int(getLastForum(cur) + 1)
-
- sql = "Insert into forums (forum_id, name_forum, url_forum, dateinserted_forum) Values (%s, %s, %s, %s)"
-
- recset = [forumId, row[0], url, row[8]]
-
- cur.execute(sql, recset)
-
- return forumId
-
-
- def create_topic(cur, forumId, row, authorId):
-
- topicId = verifyTopic(cur, forumId, authorId, row[3])
-
- if not topicId:
- topicId = int(getLastTopic(cur) + 1)
- newTopic = True
- else:
- newTopic = False
-
- if newTopic:
-
- sql = "Insert into topics (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
- "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, %s, " \
- "%s, %s, %s, %s, %s)"
-
- recset = [topicId, forumId, authorId,
- row[3], row[1],
- row[4] if row[4] != '-1' else None,
- row[5] if row[5] != '-1' else None,
- row[6] if row[6] != '-1' else None,
- row[7] if row[7] != '-1' else None,
- row[8],
- row[19]]
- cur.execute(sql, recset)
-
- else:
-
- # Tracking potential topic changes
- sql = "select * from topics where topic_Id = %(topicId)s"
- cur.execute(sql, {'topicId': topicId})
-
- recset = cur.fetchall()
-
- if (str(recset[0]['board_topic']) != str(row[1]) or
- str(recset[0]['views_topic']) != str(row[4] if row[4] != '-1' else None) or # there was a change in the topic information
- str(recset[0]['posts_topic']) != str(row[5] if row[5] != '-1' else None)):
-
- topicVersionId = int(getLastTopicVersion(cur, topicId) + 1)
-
- sql = "Insert into topics_history (topic_id, version_topic, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
- "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, " \
- "%s, %s, %s, %s, %s, %s, %s)"
-
- recset = [topicId, topicVersionId, forumId, authorId,
- recset[0]['title_topic'],
- recset[0]['board_topic'],
- recset[0]['views_topic'],
- recset[0]['posts_topic'],
- recset[0]['href_topic'],
- recset[0]['dateadded_topic'],
- recset[0]['dateinserted_topic'],
- recset[0]['classification_topic']]
- cur.execute(sql, recset)
-
- sql = "Update topics set board_topic = %(board_topic)s, views_topic = %(views_topic)s, posts_topic = %(posts_topic)s, " \
- "dateinserted_topic = %(dateinserted_topic)s where topic_id = %(topicId)s"
- cur.execute(sql, {'board_topic': row[1],
- 'views_topic': row[4] if row[4] != '-1' else None,
- 'posts_topic': row[5] if row[5] != '-1' else None,
- 'dateinserted_topic': row[8],
- 'topicId': topicId})
-
- return topicId
-
-
- def create_author(cur, row, forumId):
-
- userId = verifyUser(cur, row[2], forumId)
-
- if not userId:
-
- userId = int(getLastUser(cur) + 1)
-
- sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
- "signature_user, image_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
-
- recset = [userId, forumId,
- row[2], 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', #telling the create_user function to not track changes here
- row[8]]
-
- cur.execute(sql, recset)
-
- return userId
-
-
- def create_user(cur, row, forumId, index):
-
- userId = verifyUser(cur, row[10][index], forumId)
-
- if not userId:
- userId = int(getLastUser(cur) + 1)
- newUser = True
- else:
- newUser = False
-
- if newUser:
-
- sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
- "signature_user, image_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
-
- recset = [userId, forumId,
- row[10][index],
- row[11][index] if row[11][index] != '-1' else None,
- row[12][index] if row[12][index] != '-1' else None,
- row[13][index] if row[13][index] != '-1' else None,
- row[14][index] if row[14][index] != '-1' else None,
- row[9][index] if row[9][index] != '-1' else None,
- row[8]]
-
- cur.execute(sql, recset)
-
- else:
-
- # Tracking potential user changes
- sql = "select * from users where user_id = %(userId)s"
- cur.execute(sql, {'userId': userId})
-
- recset = cur.fetchall()
-
- #decode_decrypt_image_in_base64(recset[0]['image_user'])
-
- if (str(recset[0]['status_user']) != str(row[11][index] if row[11][index] != '-1' else None) or
- str(recset[0]['reputation_user']) != str(row[12][index] if row[12][index] != '-1' else None) or
- str(recset[0]['interest_user']) != str(row[13][index] if row[13][index] != '-1' else None) or
- str(recset[0]['signature_user']) != str(row[14][index] if row[14][index] != '-1' else None) or
- str(recset[0]['image_user']) != str(row[9][index] if row[9][index] != '-1' else None)): # there was a change in the user information
-
- if (str(recset[0]['status_user']) != 'Nan' or
- str(recset[0]['reputation_user']) != 'Nan' or
- str(recset[0]['interest_user']) != 'Nan' or
- str(recset[0]['signature_user']) != 'Nan' or
- str(recset[0]['image_user']) != 'Nan'):
-
- userVersionId = int(getLastUserVersion(cur, userId) + 1)
-
- sql = "Insert into users_history (user_id, version_user, forum_id, name_user, status_user, reputation_user, interest_user, " \
- "signature_user, image_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
-
- recset = [userId, userVersionId, forumId,
- recset[0]['name_user'],
- recset[0]['status_user'],
- recset[0]['reputation_user'],
- recset[0]['interest_user'],
- recset[0]['signature_user'],
- recset[0]['image_user'],
- recset[0]['dateinserted_user']]
-
- cur.execute(sql, recset)
-
- sql = "Update users set status_user = %(status_user)s, reputation_user = %(reputation_user)s, " \
- "interest_user = %(interest_user)s, signature_user = %(signature_user)s, image_user = %(image_user)s, " \
- "dateinserted_user = %(dateinserted_user)s where user_id = %(userId)s"
- cur.execute(sql, {'status_user': row[11][index] if row[11][index] != '-1' else None,
- 'reputation_user': row[12][index] if row[12][index] != '-1' else None,
- 'interest_user': row[13][index] if row[13][index] != '-1' else None,
- 'signature_user': row[14][index] if row[14][index] != '-1' else None,
- 'image_user': row[9][index] if row[9][index] != '-1' else None,
- 'dateinserted_user': row[8] if row[8] != '-1' else None,
- 'userId': userId})
-
- return userId
-
-
- def create_posts(cur, row, forumId, topicId):
-
- if row[10] != "-1":
-
- for i in range(len(row[10])):
-
- userId = create_user(cur, row, forumId, i)
-
- postId = verifyPost(cur, topicId, userId, row[17][i])
-
- if not postId:
-
- postId = int(getLastPost(cur) + 1)
-
- sql = "Insert into posts (post_id, topic_id, user_id, content_post, feedback_post, image_post," \
- "dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
-
- recset = [postId, topicId, userId,
- row[15][i] if row[15][i] != '-1' else None,
- row[16][i] if row[16][i] != '-1' else None,
- row[18][i] if row[18][i] != '-1' else None,
- row[17][i] if row[17][i] != '-1' else None,
- row[8]]
-
- cur.execute(sql, recset)
-
- else:
-
- # Tracking potential post changes
- sql = "select * from posts where post_id = %(postId)s"
- cur.execute(sql, {'postId': postId})
-
- recset = cur.fetchall()
-
- if (str(recset[0]['content_post']) != str(row[15][i]) or
- str(recset[0]['feedback_post']) != str(row[16][i] if row[16][i] != '-1' else None) or
- str(recset[0]['image_post']) != str(row[18][i] if row[18][i] != '-1' else None)): # there was a change in the post information
-
- #decode_decrypt_image_in_base64(recset[0]['image_post'])
-
- postVersionId = int(getLastPostVersion(cur, postId) + 1)
-
- sql = "Insert into posts_history (post_id, version_post, topic_id, user_id, content_post, feedback_post, " \
- "image_post, dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
-
- recset = [postId, postVersionId, topicId, userId,
- recset[0]['content_post'],
- recset[0]['feedback_post'],
- recset[0]['image_post'],
- recset[0]['dateadded_post'],
- recset[0]['dateinserted_post']]
-
- cur.execute(sql, recset)
-
- sql = "Update posts set content_post = %(content_post)s, feedback_post = %(feedback_post)s, " \
- "image_post = %(image_post)s, dateinserted_post = %(dateinserted_post)s where post_id = %(postId)s"
- cur.execute(sql, {'content_post': row[15][i] if row[15][i] != '-1' else None,
- 'feedback_post': row[16][i] if row[16][i] != '-1' else None,
- 'image_post': row[18][i] if row[18][i] != '-1' else None,
- 'dateinserted_post': row[8],
- 'postId': postId})
-
- def create_database(cur, con):
-
- try:
-
- sql = "create table forums (forum_id integer NOT NULL, name_forum character varying(255) NOT NULL, url_forum " \
- "character varying(255) NOT null, dateinserted_forum timestamp(6) with time zone NOT NULL, " \
- "constraint forums_pk primary key (forum_id))"
- cur.execute(sql)
-
- sql = "create unique index unique_forum ON forums USING btree (name_forum ASC NULLS LAST)"
- cur.execute(sql)
-
- sql = "create table users (user_id integer NOT NULL, forum_id integer NOT NULL, name_user character varying(" \
- "255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) null, " \
- "interest_user character varying(5000) null, signature_user character varying(1000) null, " \
- "image_user character varying(10000000) null, dateinserted_user timestamp(6) with time zone NOT NULL, " \
- "constraint users_pk primary key (user_id), " \
- "constraint users_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
- cur.execute(sql)
-
- sql = "create unique index unique_user ON users USING btree (forum_id ASC NULLS LAST, name_user ASC NULLS LAST)"
- cur.execute(sql)
-
- sql = "create table users_history(user_id integer NOT NULL, version_user integer not null, forum_id integer NOT NULL, " \
- "name_user character varying(255) NOT NULL, status_user character varying(255) null, " \
- "reputation_user character varying(255) null, interest_user character varying(5000) null, " \
- "signature_user character varying(1000) null, image_user character varying(10000000) null, " \
- "dateinserted_user timestamp(6) with time zone NOT NULL, " \
- "constraint users_history_pk primary key (user_id, version_user), " \
- "constraint users_history_user_id_fkey foreign key (user_id) references " \
- "users (user_id), constraint users_history_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
- cur.execute(sql)
-
- sql = "create table topics(topic_id integer NOT NULL, forum_id integer NOT NULL, author_id integer NOT NULL, " \
- "title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, views_topic integer null, " \
- "posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) with time zone null, " \
- "dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic double precision NOT NULL, " \
- "constraint topics_pk primary key (topic_id), constraint topics_author_id_fkey " \
- "foreign key (author_id) references users (user_id), constraint topics_forum_id_fkey foreign key (" \
- "forum_id) references forums (forum_id))"
- cur.execute(sql)
-
- sql = "create unique index unique_topic ON topics USING btree (forum_id ASC NULLS LAST, author_id ASC NULLS LAST, " \
- "title_topic ASC NULLS LAST)"
- cur.execute(sql)
-
- sql = "create table topics_history(topic_id integer NOT NULL, version_topic integer not null, forum_id integer NOT NULL, " \
- "author_id integer NOT NULL, title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, " \
- "views_topic integer null, posts_topic integer null, href_topic character varying(255) NOT null, " \
- "dateadded_topic timestamp(6) with time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, " \
- "classification_topic double precision NOT NULL, " \
- "constraint topics_history_pk primary key (topic_id, version_topic), " \
- "constraint topics_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
- "constraint topics_history_author_id_fkey foreign key (author_id) references users (user_id), " \
- "constraint topics_history_board_id_fkey foreign key (forum_id) references forums (forum_id))"
- cur.execute(sql)
-
- sql = "create table posts(post_id integer NOT NULL, topic_id integer NOT NULL, " \
- "user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
- "image_post character varying(10000000) null, dateadded_post timestamp(6) with time zone NOT NULL, " \
- "dateinserted_post timestamp(6) with time zone NOT NULL, " \
- "constraint posts_pk primary key (post_id), " \
- "constraint posts_user_id_fkey foreign key (user_id) references users (user_id), constraint " \
- "posts_topic_id_fkey foreign key (topic_id) references topics (topic_id))"
- cur.execute(sql)
-
- sql = "create unique index unique_post ON posts USING btree (topic_id ASC NULLS LAST, user_id ASC NULLS LAST, " \
- "dateadded_post ASC NULLS LAST)"
- cur.execute(sql)
-
- sql = "create table posts_history(post_id integer NOT NULL, version_post integer not null, topic_id integer NOT NULL, " \
- "user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
- "image_post character varying(10000000) null, dateadded_post timestamp(6) with time zone NOT NULL, " \
- "dateinserted_post timestamp(6) with time zone NOT NULL, " \
- "constraint posts_history_pk primary key (post_id, version_post), " \
- "constraint posts_history_user_id_fkey foreign key (user_id) references users (user_id), " \
- "constraint posts_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
- "constraint posts_history_post_id_fkey foreign key (post_id) references posts (post_id))"
- cur.execute(sql)
-
- con.commit()
-
- except:
-
- con.rollback()
-
- trace = traceback.format_exc()
-
- if (trace.find("already exists")==-1):
- print ("There was a problem during the database creation." )
- raise SystemExit
|