this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

383 lines
12 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. import psycopg2
  3. import traceback
  4. import configparser
  5. def connectDataBase():
  6. try:
  7. from Forums.Initialization.forums_mining import config
  8. ip = config.get('PostgreSQL', 'ip')
  9. username = config.get('PostgreSQL', 'username')
  10. password = config.get('PostgreSQL', 'password')
  11. database = config.get('PostgreSQL', 'database')
  12. return psycopg2.connect(host=ip, user=username, password=password, dbname=database)
  13. except:
  14. print ("Data base (darkweb_forums) not found.")
  15. raise SystemExit
  16. def verifyForum(cur, nameForum):
  17. try:
  18. cur.execute("select forum_id from forums where name_forum = %(nameForum)s limit 1", {'nameForum': nameForum})
  19. recset = cur.fetchall()
  20. if recset:
  21. return recset[0][0]
  22. else:
  23. return 0
  24. except:
  25. trace = traceback.format_exc()
  26. print (trace)
  27. def verifyBoard(cur, forum, nameBoard):
  28. try:
  29. cur.execute("select board_id from boards where forum_id = %(forum)s and name_board = %(nameBoard)s limit 1",
  30. {'forum': forum, 'nameBoard': nameBoard})
  31. recset = cur.fetchall()
  32. if recset:
  33. return recset[0][0]
  34. else:
  35. return 0
  36. except:
  37. trace = traceback.format_exc()
  38. print (trace)
  39. def verifyTopic(cur, forumId, boardId, authorId, titleTopic):
  40. try:
  41. cur.execute("select topic_id from topics where forum_id = %(forumId)s and board_id = %(boardId)s and "
  42. "author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1", {'forumId': forumId,
  43. 'boardId': boardId,
  44. 'authorId': authorId,
  45. 'titleTopic': titleTopic})
  46. recset = cur.fetchall()
  47. if recset:
  48. return recset[0][0]
  49. else:
  50. return 0
  51. except:
  52. trace = traceback.format_exc()
  53. print (trace)
  54. def verifyUser(cur, nameUser, forumId):
  55. try:
  56. cur.execute("select user_id from users where name_user = %(nameUser)s and forum_id = %(forumId)s limit 1",
  57. {'nameUser': nameUser, 'forumId': forumId})
  58. recset = cur.fetchall()
  59. if recset:
  60. return recset[0][0]
  61. else:
  62. return 0
  63. except:
  64. trace = traceback.format_exc()
  65. print (trace)
  66. def getLastForum(cur):
  67. try:
  68. cur.execute("select forum_id from forums order by forum_id desc limit 1")
  69. recset = cur.fetchall()
  70. if recset:
  71. return recset[0][0]
  72. else:
  73. return 0
  74. except:
  75. trace = traceback.format_exc()
  76. print (trace)
  77. def getLastBoard(cur):
  78. try:
  79. cur.execute("select board_id from boards order by board_id desc limit 1")
  80. recset = cur.fetchall()
  81. if recset:
  82. return recset[0][0]
  83. else:
  84. return 0
  85. except:
  86. trace = traceback.format_exc()
  87. print (trace)
  88. def getLastTopic(cur):
  89. try:
  90. cur.execute("select topic_id from topics order by topic_id desc limit 1")
  91. recset = cur.fetchall()
  92. if recset:
  93. return recset[0][0]
  94. else:
  95. return 0
  96. except:
  97. trace = traceback.format_exc()
  98. print (trace)
  99. def getLastUser(cur):
  100. try:
  101. cur.execute("select user_id from users order by user_id desc")
  102. recset = cur.fetchall()
  103. if recset:
  104. return recset[0][0]
  105. else:
  106. return 0
  107. except:
  108. trace = traceback.format_exc()
  109. print (trace)
  110. '''
  111. def getLastPost(cur):
  112. try:
  113. cur.execute("select id from Posts order by id desc limit 1")
  114. recset = cur.fetchall()
  115. if recset:
  116. return recset[0][0]
  117. else:
  118. return 0
  119. except:
  120. trace = traceback.format_exc()
  121. print (trace)
  122. '''
  123. def create_forum(cur, row, url):
  124. forumId = verifyForum(cur, row[0])
  125. if not forumId:
  126. forumId = int(getLastForum(cur) + 1)
  127. sql = "Insert into forums (forum_id, name_forum, url_forum, dateinserted_forum) Values (%s, %s, %s, %s)"
  128. recset = [forumId, row[0], url, row[8]]
  129. cur.execute(sql, recset)
  130. return forumId
  131. def create_board(cur, row, forumId):
  132. boardId = verifyBoard(cur, forumId, row[1])
  133. if not boardId:
  134. boardId = int(getLastBoard(cur) + 1)
  135. sql = "Insert into boards (board_id, forum_id, name_board, dateinserted_board) Values (%s, %s, %s, %s)"
  136. recset = [boardId, forumId, row[1], row[8]]
  137. cur.execute(sql, recset)
  138. return boardId
  139. def create_topic(cur, row, forumId, boardId, authorId):
  140. topicId = verifyTopic(cur, forumId, boardId, authorId, row[3])
  141. if not topicId:
  142. topicId = int(getLastTopic(cur) + 1)
  143. sql = "Insert into topics (topic_id, forum_id, board_id, author_id, title_topic, views_topic, posts_topic, " \
  144. "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, %s, " \
  145. "%s, %s, %s, %s, %s)"
  146. recset = [topicId, forumId, boardId, authorId,
  147. row[3],
  148. row[4] if row[4] != '-1' else None,
  149. row[5] if row[5] != '-1' else None,
  150. row[6] if row[6] != '-1' else None,
  151. row[7] if row[7] != '-1' else None,
  152. row[8],
  153. row[17]]
  154. cur.execute(sql, recset)
  155. return topicId
  156. def create_user(cur, row, forumId, index):
  157. userId = verifyUser(cur, row[9][index], forumId)
  158. if not userId:
  159. userId = int(getLastUser(cur) + 1)
  160. sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  161. "signature_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
  162. recset = [userId, forumId,
  163. row[9][index],
  164. row[10][index] if row[10][index] != '-1' else None,
  165. row[11][index] if row[11][index] != '-1' else None,
  166. row[12][index] if row[12][index] != '-1' else None,
  167. row[13][index] if row[13][index] != '-1' else None,
  168. row[8]]
  169. cur.execute(sql, recset)
  170. return userId
  171. def create_posts(cur, row, forumId, boardId, topicId):
  172. if row[9] != "-1":
  173. for i in range(len(row[9])):
  174. if i != 0:
  175. userId = create_user(cur, row, forumId, i)
  176. else:
  177. userId = verifyUser(cur, row[2], forumId)
  178. sql = "Insert into posts (forum_id, board_id, topic_id, user_id, content_post, feedback_post, " \
  179. "dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
  180. recset = [forumId, boardId, topicId, userId,
  181. row[14][i] if row[14][i] != '-1' else None,
  182. row[15][i] if row[15][i] != '-1' else None,
  183. row[16][i] if row[16][i] != '-1' else None,
  184. row[8]]
  185. cur.execute(sql, recset)
  186. def create_database(cur, con):
  187. try:
  188. sql = "create table forums (forum_id integer NOT NULL, name_forum character varying(255) NOT NULL, url_forum " \
  189. "character varying(255) null, dateinserted_forum timestamp(6) with time zone NOT NULL, constraint " \
  190. "forums_pk primary key (forum_id))"
  191. cur.execute(sql)
  192. sql = "create table boards (board_id integer NOT NULL, forum_id integer NOT NULL, name_board character " \
  193. "varying(255) NOT NULL, dateinserted_board timestamp(6) with time zone NOT NULL, constraint boards_pk " \
  194. "primary key (board_id), constraint boards_forum_id_fkey foreign key (forum_id) references forums (" \
  195. "forum_id))"
  196. cur.execute(sql)
  197. sql = "create table users (user_id integer NOT NULL, forum_id integer NOT NULL, name_user character varying(" \
  198. "255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) null, " \
  199. "interest_user character varying(5000) null, signature_user character varying(1000) null, " \
  200. "dateinserted_user timestamp(6) with time zone NOT NULL, constraint users_pk primary key (user_id), " \
  201. "constraint users_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
  202. cur.execute(sql)
  203. sql = "create table users_history(user_id integer NOT NULL, forum_id integer NOT NULL, name_user character " \
  204. "varying(255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) " \
  205. "null, interest_user character varying(5000) null, signature_user character varying(1000) null, " \
  206. "dateinserted_user timestamp(6) with time zone NOT NULL, constraint users_history_pk primary key (" \
  207. "user_id, dateinserted_user), constraint users_history_user_id_fkey foreign key (user_id) references " \
  208. "users (user_id), constraint users_history_forum_id_fkey foreign key (forum_id) references forums (" \
  209. "forum_id))"
  210. cur.execute(sql)
  211. sql = "create table topics(topic_id integer NOT NULL, forum_id integer NOT NULL, board_id integer NOT NULL, " \
  212. "author_id integer NOT NULL, title_topic character varying(255) NOT NULL, views_topic integer null, " \
  213. "posts_topic integer null, href_topic character varying(255) null, dateadded_topic timestamp(6) with " \
  214. "time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic double " \
  215. "precision NOT NULL, constraint topics_pk primary key (topic_id), constraint topics_author_id_fkey " \
  216. "foreign key (author_id) references users (user_id), constraint topics_board_id_fkey foreign key (" \
  217. "board_id) references boards (board_id), constraint topics_forum_id_fkey foreign key (forum_id) " \
  218. "references forums (forum_id))"
  219. cur.execute(sql)
  220. sql = "create table topics_history(topic_id integer NOT NULL, forum_id integer NOT NULL, board_id integer NOT " \
  221. "NULL, author_id integer NOT NULL, title_topic character varying(255) NOT NULL, views_topic integer " \
  222. "null, posts_topic integer null, href_topic character varying(255) null, dateadded_topic timestamp(6) " \
  223. "with time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic " \
  224. "double precision NOT NULL, constraint topics_history_pk primary key (topic_id, dateinserted_topic), " \
  225. "constraint topics_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
  226. "constraint topics_history_author_id_fkey foreign key (author_id) references users (user_id), " \
  227. "constraint topics_history_board_id_fkey foreign key (board_id) references boards (board_id), " \
  228. "constraint topics_history_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
  229. cur.execute(sql)
  230. sql = "create table posts(forum_id integer NOT NULL, board_id integer NOT NULL, topic_id integer NOT NULL, " \
  231. "user_id integer NOT NULL, content_post character varying(100000) null, feedback_post integer null, " \
  232. "dateadded_post timestamp(6) with time zone NOT NULL, dateinserted_post timestamp(6) with time zone NOT " \
  233. "NULL, constraint posts_pk primary key (forum_id, board_id, topic_id, user_id, dateadded_post), " \
  234. "constraint posts_author_id_fkey foreign key (user_id) references users (user_id), constraint " \
  235. "posts_board_id_fkey foreign key (board_id) references boards (board_id), constraint " \
  236. "posts_forum_id_fkey foreign key (forum_id) references forums (forum_id), constraint " \
  237. "posts_topic_id_fkey foreign key (topic_id) references topics (topic_id))"
  238. cur.execute(sql)
  239. con.commit()
  240. except:
  241. con.rollback()
  242. trace = traceback.format_exc()
  243. if (trace.find("already exists")==-1):
  244. print ("There was a problem during the database creation." )
  245. raise SystemExit