@ -7,9 +7,9 @@ import configparser
def connectDataBase ( ) :
try :
from Forums.Initialization.forums_mining import config
from Forums.Initialization.forums_mining import config
try :
ip = config . get ( ' PostgreSQL ' , ' ip ' )
username = config . get ( ' PostgreSQL ' , ' username ' )
@ -20,7 +20,7 @@ def connectDataBase():
except :
print ( " Data base (darkweb_forums) not found. " )
print ( " Data base " + config . get ( ' PostgreSQL ' , ' database ' ) + " not found." )
raise SystemExit
@ -63,15 +63,34 @@ def verifyBoard(cur, forum, nameBoard):
print ( trace )
def verifyTopic ( cur , forumId , boardId , authorId , titleTopic ) :
def verifyTopic ( cur , forumId , authorId , titleTopic ) :
try :
cur . execute ( " select topic_id from topics where forum_id = %(forumId)s and board_id = %(boardId)s and "
" author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1 " , { ' forumId ' : forumId ,
' boardId ' : boardId ,
' authorId ' : authorId ,
' titleTopic ' : titleTopic } )
cur . execute ( " select topic_id from topics where forum_id = %(forumId)s and author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1 " ,
{ ' forumId ' : forumId , ' authorId ' : authorId , ' titleTopic ' : titleTopic } )
recset = cur . fetchall ( )
if recset :
return recset [ 0 ] [ 0 ]
else :
return 0
except :
trace = traceback . format_exc ( )
print ( trace )
def verifyPost ( cur , topicId , userId , dateAdded ) :
try :
cur . execute ( " select post_id from posts where topic_id = %(topicId)s and "
" user_id = %(userId)s and dateadded_post = %(dateAdded)s limit 1 " , { ' topicId ' : topicId ,
' userId ' : userId ,
' dateAdded ' : dateAdded } )
recset = cur . fetchall ( )
@ -167,7 +186,7 @@ def getLastUser(cur):
try :
cur . execute ( " select user_id from users order by user_id desc " )
cur . execute ( " select user_id from users order by user_id desc limit 1 " )
recset = cur . fetchall ( )
@ -182,12 +201,11 @@ def getLastUser(cur):
print ( trace )
'''
def getLastPost ( cur ) :
try :
cur . execute ( " select id from Posts order by id desc limit 1 " )
cur . execute ( " select post_id from posts order by post_ id desc limit 1 " )
recset = cur . fetchall ( )
@ -200,7 +218,6 @@ def getLastPost(cur):
trace = traceback . format_exc ( )
print ( trace )
'''
def create_forum ( cur , row , url ) :
@ -220,46 +237,86 @@ def create_forum(cur, row, url):
return forumId
def create_board ( cur , row , forumId ) :
def create_topic ( cur , forumId , row , authorId ) :
topicId = verifyTopic ( cur , forumId , authorId , row [ 3 ] )
if not topicId :
topicId = int ( getLastTopic ( cur ) + 1 )
newTopic = True
else :
newTopic = False
boardId = verifyBoard ( cur , forumId , row [ 1 ] )
if newTopic :
if not boardId :
sql = " Insert into topics (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
" href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values ( %s , %s , %s , %s , %s , %s , " \
" %s , %s , %s , %s , %s ) "
boardId = int ( getLastBoard ( cur ) + 1 )
recset = [ topicId , forumId , authorId ,
row [ 3 ] , row [ 1 ] ,
row [ 4 ] if row [ 4 ] != ' -1 ' else None ,
row [ 5 ] if row [ 5 ] != ' -1 ' else None ,
row [ 6 ] if row [ 6 ] != ' -1 ' else None ,
row [ 7 ] if row [ 7 ] != ' -1 ' else None ,
row [ 8 ] ,
row [ 17 ] ]
cur . execute ( sql , recset )
sql = " Insert into boards (board_id, forum_id, name_board, dateinserted_board) Values ( %s , %s , %s , %s ) "
else :
recset = [ boardId , forumId , row [ 1 ] , row [ 8 ] ]
# Tracking potential topic changes
sql = " select * from topics where topic_Id = %(topicId)s "
cur . execute ( sql , { ' topicId ' : topicId } )
cur . execute ( sql , recset )
recset = cur . fetchall ( )
return boardId
if ( str ( recset [ 0 ] [ 4 ] ) != str ( row [ 1 ] ) or str ( recset [ 0 ] [ 5 ] ) != str ( row [ 4 ] ) or str ( recset [ 0 ] [ 6 ] ) != str ( row [ 5 ] ) ) : # there was a change in the topic information
sql = " Insert into topics_history (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
" href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values ( %s , %s , %s , %s , %s , " \
" %s , %s , %s , %s , %s , %s ) "
recset = [ topicId , forumId , authorId ,
recset [ 0 ] [ 3 ] ,
recset [ 0 ] [ 4 ] ,
recset [ 0 ] [ 5 ] ,
recset [ 0 ] [ 6 ] ,
recset [ 0 ] [ 7 ] ,
recset [ 0 ] [ 8 ] ,
recset [ 0 ] [ 9 ] ,
recset [ 0 ] [ 10 ] ]
cur . execute ( sql , recset )
sql = " Update topics set board_topic = %(board_topic)s , views_topic = %(views_topic)s , posts_topic = %(posts_topic)s , " \
" dateinserted_topic = %(dateinserted_topic)s where topic_id = %(topicId)s "
cur . execute ( sql , { ' board_topic ' : row [ 1 ] ,
' views_topic ' : row [ 4 ] if row [ 4 ] != ' -1 ' else None ,
' posts_topic ' : row [ 5 ] if row [ 5 ] != ' -1 ' else None ,
' dateinserted_topic ' : row [ 8 ] ,
' topicId ' : topicId } )
return topicId
def create_topic ( cur , row , forumId , boardId , authorId ) :
topicId = verifyTopic ( cur , forumId , boardId , authorId , row [ 3 ] )
def create_author ( cur , row , forumId ) :
if not topicId :
userId = verifyUser ( cur , row [ 2 ] , forumId )
topicId = int ( getLastTopic ( cur ) + 1 )
if not userId :
sql = " Insert into topics (topic_id, forum_id, board_id, author_id, title_topic, views_topic, posts_topic, " \
" href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values ( %s , %s , %s , %s , %s , %s , " \
" %s , %s , %s , %s , %s ) "
userId = int ( getLastUser ( cur ) + 1 )
sql = " Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
" signature_user, dateinserted_user) Values ( %s , %s , %s , %s , %s , %s , %s , %s ) "
recset = [ userId , forumId ,
row [ 2 ] , ' Nan ' , ' Nan ' , ' Nan ' , ' Nan ' , #telling the create_posts function to not track changes here
row [ 8 ] ]
recset = [ topicId , forumId , boardId , authorId ,
row [ 3 ] ,
row [ 4 ] if row [ 4 ] != ' -1 ' else None ,
row [ 5 ] if row [ 5 ] != ' -1 ' else None ,
row [ 6 ] if row [ 6 ] != ' -1 ' else None ,
row [ 7 ] if row [ 7 ] != ' -1 ' else None ,
row [ 8 ] ,
row [ 17 ] ]
cur . execute ( sql , recset )
return topic Id
return userId
def create_user ( cur , row , forumId , index ) :
@ -267,8 +324,12 @@ def create_user(cur, row, forumId, index):
userId = verifyUser ( cur , row [ 9 ] [ index ] , forumId )
if not userId :
userId = int ( getLastUser ( cur ) + 1 )
newUser = True
else :
newUser = False
if newUser :
sql = " Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
" signature_user, dateinserted_user) Values ( %s , %s , %s , %s , %s , %s , %s , %s ) "
@ -283,30 +344,97 @@ def create_user(cur, row, forumId, index):
cur . execute ( sql , recset )
else :
# Tracking potential user changes
sql = " select * from users where user_id = %(userId)s "
cur . execute ( sql , { ' userId ' : userId } )
recset = cur . fetchall ( )
if ( str ( recset [ 0 ] [ 3 ] ) != str ( row [ 10 ] [ index ] ) or str ( recset [ 0 ] [ 4 ] ) != str ( row [ 11 ] [ index ] ) or
str ( recset [ 0 ] [ 5 ] ) != str ( row [ 12 ] [ index ] if row [ 12 ] [ index ] != ' -1 ' else None ) or str ( recset [ 0 ] [ 6 ] ) != str ( row [ 13 ] [ index ] if row [ 13 ] [ index ] != ' -1 ' else None ) ) : # there was a change in the user information
if ( str ( recset [ 0 ] [ 3 ] ) != ' Nan ' or str ( recset [ 0 ] [ 4 ] ) != ' Nan ' or str ( recset [ 0 ] [ 5 ] ) != ' Nan ' or str ( recset [ 0 ] [ 6 ] ) != ' Nan ' ) :
sql = " Insert into users_history (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
" signature_user, dateinserted_user) Values ( %s , %s , %s , %s , %s , %s , %s , %s ) "
recset = [ userId , forumId ,
recset [ 0 ] [ 2 ] ,
recset [ 0 ] [ 3 ] ,
recset [ 0 ] [ 4 ] ,
recset [ 0 ] [ 5 ] ,
recset [ 0 ] [ 6 ] ,
recset [ 0 ] [ 7 ] ]
cur . execute ( sql , recset )
sql = " Update users set status_user = %(status_user)s , reputation_user = %(reputation_user)s , " \
" interest_user = %(interest_user)s , signature_user = %(signature_user)s , dateinserted_user = %(dateinserted_user)s " \
" where user_id = %(userId)s "
cur . execute ( sql , { ' status_user ' : row [ 10 ] [ index ] if row [ 10 ] [ index ] != ' -1 ' else None ,
' reputation_user ' : row [ 11 ] [ index ] if row [ 11 ] [ index ] != ' -1 ' else None ,
' interest_user ' : row [ 12 ] [ index ] if row [ 12 ] [ index ] != ' -1 ' else None ,
' signature_user ' : row [ 13 ] [ index ] if row [ 13 ] [ index ] != ' -1 ' else None ,
' dateinserted_user ' : row [ 8 ] if row [ 8 ] != ' -1 ' else None ,
' userId ' : userId } )
return userId
def create_posts ( cur , row , forumId , boardId , topicId ) :
def create_posts ( cur , row , forumId , topicId ) :
if row [ 9 ] != " -1 " :
for i in range ( len ( row [ 9 ] ) ) :
if i != 0 :
userId = create_user ( cur , row , forumId , i )
userId = create_user ( cur , row , forumId , i )
postId = verifyPost ( cur , topicId , userId , row [ 16 ] [ i ] )
if not postId :
postId = int ( getLastPost ( cur ) + 1 )
sql = " Insert into posts (post_id, topic_id, user_id, content_post, feedback_post, " \
" dateadded_post, dateinserted_post) Values ( %s , %s , %s , %s , %s , %s , %s ) "
recset = [ postId , topicId , userId ,
row [ 14 ] [ i ] if row [ 14 ] [ i ] != ' -1 ' else None ,
row [ 15 ] [ i ] if row [ 15 ] [ i ] != ' -1 ' else None ,
row [ 16 ] [ i ] if row [ 16 ] [ i ] != ' -1 ' else None ,
row [ 8 ] ]
cur . execute ( sql , recset )
else :
userId = verifyUser ( cur , row [ 2 ] , forumId )
sql = " Insert into posts (forum_id, board_id, topic_id, user_id, content_post, feedback_post, " \
" dateadded_post, dateinserted_post) Values ( %s , %s , %s , %s , %s , %s , %s , %s ) "
# Tracking potential post changes
sql = " select * from posts where post_id = %(postId)s "
cur . execute ( sql , { ' postId ' : postId } )
recset = cur . fetchall ( )
if ( str ( recset [ 0 ] [ 3 ] ) != str ( row [ 14 ] [ i ] ) or str ( recset [ 0 ] [ 4 ] ) != str ( row [ 15 ] [ i ] if row [ 15 ] [ i ] != ' -1 ' else None ) ) : # there was a change in the post information
sql = " Insert into posts_history (post_id, topic_id, user_id, content_post, feedback_post, " \
" dateadded_post, dateinserted_post) Values ( %s , %s , %s , %s , %s , %s , %s ) "
recset = [ postId , topicId , userId ,
recset [ 0 ] [ 3 ] ,
recset [ 0 ] [ 4 ] ,
recset [ 0 ] [ 5 ] ,
recset [ 0 ] [ 6 ] ]
recset = [ forumId , boardId , topicId , userId ,
row [ 14 ] [ i ] if row [ 14 ] [ i ] != ' -1 ' else None ,
row [ 15 ] [ i ] if row [ 15 ] [ i ] != ' -1 ' else None ,
row [ 16 ] [ i ] if row [ 16 ] [ i ] != ' -1 ' else None ,
row [ 8 ] ]
cur . execute ( sql , recset )
cur . execute ( sql , recset )
sql = " Update posts set content_post = %(content_post)s , feedback_post = %(feedback_post)s , dateinserted_post = %(dateinserted_post)s " \
" where post_id = %(postId)s "
cur . execute ( sql , { ' content_post ' : row [ 14 ] [ i ] if row [ 14 ] [ i ] != ' -1 ' else None ,
' feedback_post ' : row [ 15 ] [ i ] if row [ 15 ] [ i ] != ' -1 ' else None ,
' dateinserted_post ' : row [ 8 ] ,
' postId ' : postId } )
def create_database ( cur , con ) :
@ -314,14 +442,11 @@ def create_database(cur, con):
try :
sql = " create table forums (forum_id integer NOT NULL, name_forum character varying(255) NOT NULL, url_forum " \
" character varying(255) null, dateinserted_forum timestamp(6) with time zone NOT NULL, constraint " \
" character varying(255) NOT null, dateinserted_forum timestamp(6) with time zone NOT NULL, constraint " \
" forums_pk primary key (forum_id)) "
cur . execute ( sql )
sql = " create table boards (board_id integer NOT NULL, forum_id integer NOT NULL, name_board character " \
" varying(255) NOT NULL, dateinserted_board timestamp(6) with time zone NOT NULL, constraint boards_pk " \
" primary key (board_id), constraint boards_forum_id_fkey foreign key (forum_id) references forums ( " \
" forum_id)) "
sql = " create unique index unique_forum ON forums USING btree (name_forum ASC NULLS LAST) "
cur . execute ( sql )
sql = " create table users (user_id integer NOT NULL, forum_id integer NOT NULL, name_user character varying( " \
@ -331,6 +456,9 @@ def create_database(cur, con):
" constraint users_forum_id_fkey foreign key (forum_id) references forums (forum_id)) "
cur . execute ( sql )
sql = " create unique index unique_user ON users USING btree (forum_id ASC NULLS LAST, name_user ASC NULLS LAST) "
cur . execute ( sql )
sql = " create table users_history(user_id integer NOT NULL, forum_id integer NOT NULL, name_user character " \
" varying(255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) " \
" null, interest_user character varying(5000) null, signature_user character varying(1000) null, " \
@ -340,37 +468,50 @@ def create_database(cur, con):
" forum_id)) "
cur . execute ( sql )
sql = " create table topics(topic_id integer NOT NULL, forum_id integer NOT NULL, board _id integer NOT NULL, " \
" author_id integer NOT NULL, title _topic character varying(255) NOT NULL, views_topic integer null, " \
" posts_topic integer null, href_topic character varying(255) null, dateadded_topic timestamp(6) with " \
sql = " create table topics(topic_id integer NOT NULL, forum_id integer NOT NULL, author _id integer NOT NULL, " \
" title_topic character varying(255) NOT NULL, board _topic character varying(255) NOT NULL, views_topic integer null, " \
" posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) with " \
" time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic double " \
" precision NOT NULL, constraint topics_pk primary key (topic_id), constraint topics_author_id_fkey " \
" foreign key (author_id) references users (user_id), constraint topics_board_id_fkey foreign key ( " \
" board_id) references boards (board_id), constraint topics_forum_id_fkey foreign key (forum_id) " \
" references forums (forum_id)) "
" foreign key (author_id) references users (user_id), constraint topics_forum_id_fkey foreign key ( " \
" forum_id) references forums (forum_id)) "
cur . execute ( sql )
sql = " create unique index unique_topic ON topics USING btree (forum_id ASC NULLS LAST, author_id ASC NULLS LAST, " \
" title_topic ASC NULLS LAST) "
cur . execute ( sql )
sql = " create table topics_history(topic_id integer NOT NULL, forum_id integer NOT NULL, board_id integer NOT " \
" NULL, author_id integer NOT NULL, title _topic character varying(255) NOT NULL, views_topic integer " \
" null, posts_topic integer null, href_topic character varying(255) null, dateadded_topic timestamp(6) " \
sql = " create table topics_history(topic_id integer NOT NULL, forum_id integer NOT NULL, author_id integer NOT NULL, " \
" title_topic character varying(255) NOT NULL, board _topic character varying(255) NOT NULL, views_topic integer " \
" null, posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) " \
" with time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic " \
" double precision NOT NULL, constraint topics_history_pk primary key (topic_id, dateinserted_topic), " \
" constraint topics_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
" constraint topics_history_author_id_fkey foreign key (author_id) references users (user_id), " \
" constraint topics_history_board_id_fkey foreign key (board_id) references boards (board_id), " \
" constraint topics_history_forum_id_fkey foreign key (forum_id) references forums (forum_id)) "
" constraint topics_history_board_id_fkey foreign key (forum_id) references forums (forum_id)) "
cur . execute ( sql )
sql = " create table posts(forum_id integer NOT NULL, board _id integer NOT NULL, topic_id integer NOT NULL, " \
" user_id integer NOT NULL, content_post character varying(100000) null, feedback_post integer null, " \
sql = " create table posts(post _id integer NOT NULL, topic_id integer NOT NULL, " \
" user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
" dateadded_post timestamp(6) with time zone NOT NULL, dateinserted_post timestamp(6) with time zone NOT " \
" NULL, constraint posts_pk primary key (forum_id, board_id, topic_id, user_id, dateadded_post), " \
" constraint posts_author_id_fkey foreign key (user_id) references users (user_id), constraint " \
" posts_board_id_fkey foreign key (board_id) references boards (board_id), constraint " \
" posts_forum_id_fkey foreign key (forum_id) references forums (forum_id), constraint " \
" NULL, constraint posts_pk primary key (post_id), " \
" constraint posts_user_id_fkey foreign key (user_id) references users (user_id), constraint " \
" posts_topic_id_fkey foreign key (topic_id) references topics (topic_id)) "
cur . execute ( sql )
sql = " create unique index unique_post ON posts USING btree (topic_id ASC NULLS LAST, user_id ASC NULLS LAST, " \
" dateadded_post ASC NULLS LAST) "
cur . execute ( sql )
sql = " create table posts_history(post_id integer NOT NULL, topic_id integer NOT NULL, " \
" user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
" dateadded_post timestamp(6) with time zone NOT NULL, dateinserted_post timestamp(6) with time zone NOT " \
" NULL, constraint posts_history_pk primary key (post_id, dateinserted_post), " \
" constraint posts_history_user_id_fkey foreign key (user_id) references users (user_id), " \
" constraint posts_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
" constraint posts_history_post_id_fkey foreign key (post_id) references posts (post_id)) "
cur . execute ( sql )
con . commit ( )
except :