this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

525 lines
19 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. import psycopg2
  3. import traceback
  4. import configparser
  5. def connectDataBase():
  6. from Forums.Initialization.forums_mining import config
  7. try:
  8. ip = config.get('PostgreSQL', 'ip')
  9. username = config.get('PostgreSQL', 'username')
  10. password = config.get('PostgreSQL', 'password')
  11. database = config.get('PostgreSQL', 'database')
  12. return psycopg2.connect(host=ip, user=username, password=password, dbname=database)
  13. except:
  14. print ("Data base " + config.get('PostgreSQL', 'database') + " not found.")
  15. raise SystemExit
  16. def verifyForum(cur, nameForum):
  17. try:
  18. cur.execute("select forum_id from forums where name_forum = %(nameForum)s limit 1", {'nameForum': nameForum})
  19. recset = cur.fetchall()
  20. if recset:
  21. return recset[0][0]
  22. else:
  23. return 0
  24. except:
  25. trace = traceback.format_exc()
  26. print (trace)
  27. def verifyBoard(cur, forum, nameBoard):
  28. try:
  29. cur.execute("select board_id from boards where forum_id = %(forum)s and name_board = %(nameBoard)s limit 1",
  30. {'forum': forum, 'nameBoard': nameBoard})
  31. recset = cur.fetchall()
  32. if recset:
  33. return recset[0][0]
  34. else:
  35. return 0
  36. except:
  37. trace = traceback.format_exc()
  38. print (trace)
  39. def verifyTopic(cur, forumId, authorId, titleTopic):
  40. try:
  41. cur.execute("select topic_id from topics where forum_id = %(forumId)s and author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1",
  42. {'forumId': forumId, 'authorId': authorId, 'titleTopic': titleTopic})
  43. recset = cur.fetchall()
  44. if recset:
  45. return recset[0][0]
  46. else:
  47. return 0
  48. except:
  49. trace = traceback.format_exc()
  50. print (trace)
  51. def verifyPost(cur, topicId, userId, dateAdded):
  52. try:
  53. cur.execute("select post_id from posts where topic_id = %(topicId)s and "
  54. "user_id = %(userId)s and dateadded_post = %(dateAdded)s limit 1", {'topicId': topicId,
  55. 'userId': userId,
  56. 'dateAdded': dateAdded})
  57. recset = cur.fetchall()
  58. if recset:
  59. return recset[0][0]
  60. else:
  61. return 0
  62. except:
  63. trace = traceback.format_exc()
  64. print (trace)
  65. def verifyUser(cur, nameUser, forumId):
  66. try:
  67. cur.execute("select user_id from users where name_user = %(nameUser)s and forum_id = %(forumId)s limit 1",
  68. {'nameUser': nameUser, 'forumId': forumId})
  69. recset = cur.fetchall()
  70. if recset:
  71. return recset[0][0]
  72. else:
  73. return 0
  74. except:
  75. trace = traceback.format_exc()
  76. print (trace)
  77. def getLastForum(cur):
  78. try:
  79. cur.execute("select forum_id from forums order by forum_id desc limit 1")
  80. recset = cur.fetchall()
  81. if recset:
  82. return recset[0][0]
  83. else:
  84. return 0
  85. except:
  86. trace = traceback.format_exc()
  87. print (trace)
  88. def getLastBoard(cur):
  89. try:
  90. cur.execute("select board_id from boards order by board_id desc limit 1")
  91. recset = cur.fetchall()
  92. if recset:
  93. return recset[0][0]
  94. else:
  95. return 0
  96. except:
  97. trace = traceback.format_exc()
  98. print (trace)
  99. def getLastTopic(cur):
  100. try:
  101. cur.execute("select topic_id from topics order by topic_id desc limit 1")
  102. recset = cur.fetchall()
  103. if recset:
  104. return recset[0][0]
  105. else:
  106. return 0
  107. except:
  108. trace = traceback.format_exc()
  109. print (trace)
  110. def getLastUser(cur):
  111. try:
  112. cur.execute("select user_id from users order by user_id desc limit 1")
  113. recset = cur.fetchall()
  114. if recset:
  115. return recset[0][0]
  116. else:
  117. return 0
  118. except:
  119. trace = traceback.format_exc()
  120. print (trace)
  121. def getLastPost(cur):
  122. try:
  123. cur.execute("select post_id from posts order by post_id desc limit 1")
  124. recset = cur.fetchall()
  125. if recset:
  126. return recset[0][0]
  127. else:
  128. return 0
  129. except:
  130. trace = traceback.format_exc()
  131. print (trace)
  132. def create_forum(cur, row, url):
  133. forumId = verifyForum(cur, row[0])
  134. if not forumId:
  135. forumId = int(getLastForum(cur) + 1)
  136. sql = "Insert into forums (forum_id, name_forum, url_forum, dateinserted_forum) Values (%s, %s, %s, %s)"
  137. recset = [forumId, row[0], url, row[8]]
  138. cur.execute(sql, recset)
  139. return forumId
  140. def create_topic(cur, forumId, row, authorId):
  141. topicId = verifyTopic(cur, forumId, authorId, row[3])
  142. if not topicId:
  143. topicId = int(getLastTopic(cur) + 1)
  144. newTopic = True
  145. else:
  146. newTopic = False
  147. if newTopic:
  148. sql = "Insert into topics (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
  149. "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, %s, " \
  150. "%s, %s, %s, %s, %s)"
  151. recset = [topicId, forumId, authorId,
  152. row[3], row[1],
  153. row[4] if row[4] != '-1' else None,
  154. row[5] if row[5] != '-1' else None,
  155. row[6] if row[6] != '-1' else None,
  156. row[7] if row[7] != '-1' else None,
  157. row[8],
  158. row[17]]
  159. cur.execute(sql, recset)
  160. else:
  161. # Tracking potential topic changes
  162. sql = "select * from topics where topic_Id = %(topicId)s"
  163. cur.execute(sql, {'topicId': topicId})
  164. recset = cur.fetchall()
  165. if (str(recset[0][4]) != str(row[1]) or str(recset[0][5]) != str(row[4] if row[4] != '-1' else None) or # there was a change in the topic information
  166. str(recset[0][6]) != str(row[5] if row[5] != '-1' else None)):
  167. sql = "Insert into topics_history (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
  168. "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, " \
  169. "%s, %s, %s, %s, %s, %s)"
  170. recset = [topicId, forumId, authorId,
  171. recset[0][3],
  172. recset[0][4],
  173. recset[0][5],
  174. recset[0][6],
  175. recset[0][7],
  176. recset[0][8],
  177. recset[0][9],
  178. recset[0][10]]
  179. cur.execute(sql, recset)
  180. sql = "Update topics set board_topic = %(board_topic)s, views_topic = %(views_topic)s, posts_topic = %(posts_topic)s, " \
  181. "dateinserted_topic = %(dateinserted_topic)s where topic_id = %(topicId)s"
  182. cur.execute(sql, {'board_topic': row[1],
  183. 'views_topic': row[4] if row[4] != '-1' else None,
  184. 'posts_topic': row[5] if row[5] != '-1' else None,
  185. 'dateinserted_topic': row[8],
  186. 'topicId': topicId})
  187. return topicId
  188. def create_author(cur, row, forumId):
  189. userId = verifyUser(cur, row[2], forumId)
  190. if not userId:
  191. userId = int(getLastUser(cur) + 1)
  192. sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  193. "signature_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
  194. recset = [userId, forumId,
  195. row[2], 'Nan', 'Nan', 'Nan', 'Nan', #telling the create_posts function to not track changes here
  196. row[8]]
  197. cur.execute(sql, recset)
  198. return userId
  199. def create_user(cur, row, forumId, index):
  200. userId = verifyUser(cur, row[9][index], forumId)
  201. if not userId:
  202. userId = int(getLastUser(cur) + 1)
  203. newUser = True
  204. else:
  205. newUser = False
  206. if newUser:
  207. sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  208. "signature_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
  209. recset = [userId, forumId,
  210. row[9][index],
  211. row[10][index] if row[10][index] != '-1' else None,
  212. row[11][index] if row[11][index] != '-1' else None,
  213. row[12][index] if row[12][index] != '-1' else None,
  214. row[13][index] if row[13][index] != '-1' else None,
  215. row[8]]
  216. cur.execute(sql, recset)
  217. else:
  218. # Tracking potential user changes
  219. sql = "select * from users where user_id = %(userId)s"
  220. cur.execute(sql, {'userId': userId})
  221. recset = cur.fetchall()
  222. if (str(recset[0][3]) != str(row[10][index] if row[10][index] != '-1' else None) or str(recset[0][4]) != str(row[11][index] if row[11][index] != '-1' else None) or
  223. str(recset[0][5]) != str(row[12][index] if row[12][index] != '-1' else None) or str(recset[0][6]) != str(row[13][index] if row[13][index] != '-1' else None)): # there was a change in the user information
  224. if (str(recset[0][3]) != 'Nan' or str(recset[0][4]) != 'Nan' or str(recset[0][5]) != 'Nan' or str(recset[0][6]) != 'Nan'):
  225. sql = "Insert into users_history (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  226. "signature_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
  227. recset = [userId, forumId,
  228. recset[0][2],
  229. recset[0][3],
  230. recset[0][4],
  231. recset[0][5],
  232. recset[0][6],
  233. recset[0][7]]
  234. cur.execute(sql, recset)
  235. sql = "Update users set status_user = %(status_user)s, reputation_user = %(reputation_user)s, " \
  236. "interest_user = %(interest_user)s, signature_user = %(signature_user)s, dateinserted_user = %(dateinserted_user)s " \
  237. "where user_id = %(userId)s"
  238. cur.execute(sql, {'status_user': row[10][index] if row[10][index] != '-1' else None,
  239. 'reputation_user': row[11][index] if row[11][index] != '-1' else None,
  240. 'interest_user': row[12][index] if row[12][index] != '-1' else None,
  241. 'signature_user': row[13][index] if row[13][index] != '-1' else None,
  242. 'dateinserted_user': row[8] if row[8] != '-1' else None,
  243. 'userId': userId})
  244. return userId
  245. def create_posts(cur, row, forumId, topicId):
  246. if row[9] != "-1":
  247. for i in range(len(row[9])):
  248. userId = create_user(cur, row, forumId, i)
  249. postId = verifyPost(cur, topicId, userId, row[16][i])
  250. if not postId:
  251. postId = int(getLastPost(cur) + 1)
  252. sql = "Insert into posts (post_id, topic_id, user_id, content_post, feedback_post, " \
  253. "dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s)"
  254. recset = [postId, topicId, userId,
  255. row[14][i] if row[14][i] != '-1' else None,
  256. row[15][i] if row[15][i] != '-1' else None,
  257. row[16][i] if row[16][i] != '-1' else None,
  258. row[8]]
  259. cur.execute(sql, recset)
  260. else:
  261. # Tracking potential post changes
  262. sql = "select * from posts where post_id = %(postId)s"
  263. cur.execute(sql, {'postId': postId})
  264. recset = cur.fetchall()
  265. if (str(recset[0][3]) != str(row[14][i]) or str(recset[0][4]) != str(row[15][i] if row[15][i] != '-1' else None)): # there was a change in the post information
  266. sql = "Insert into posts_history (post_id, topic_id, user_id, content_post, feedback_post, " \
  267. "dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s)"
  268. recset = [postId, topicId, userId,
  269. recset[0][3],
  270. recset[0][4],
  271. recset[0][5],
  272. recset[0][6]]
  273. cur.execute(sql, recset)
  274. sql = "Update posts set content_post = %(content_post)s, feedback_post = %(feedback_post)s, dateinserted_post = %(dateinserted_post)s " \
  275. "where post_id = %(postId)s"
  276. cur.execute(sql, {'content_post': row[14][i] if row[14][i] != '-1' else None,
  277. 'feedback_post': row[15][i] if row[15][i] != '-1' else None,
  278. 'dateinserted_post': row[8],
  279. 'postId': postId})
  280. def create_database(cur, con):
  281. try:
  282. sql = "create table forums (forum_id integer NOT NULL, name_forum character varying(255) NOT NULL, url_forum " \
  283. "character varying(255) NOT null, dateinserted_forum timestamp(6) with time zone NOT NULL, constraint " \
  284. "forums_pk primary key (forum_id))"
  285. cur.execute(sql)
  286. sql = "create unique index unique_forum ON forums USING btree (name_forum ASC NULLS LAST)"
  287. cur.execute(sql)
  288. sql = "create table users (user_id integer NOT NULL, forum_id integer NOT NULL, name_user character varying(" \
  289. "255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) null, " \
  290. "interest_user character varying(5000) null, signature_user character varying(1000) null, " \
  291. "dateinserted_user timestamp(6) with time zone NOT NULL, constraint users_pk primary key (user_id), " \
  292. "constraint users_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
  293. cur.execute(sql)
  294. sql = "create unique index unique_user ON users USING btree (forum_id ASC NULLS LAST, name_user ASC NULLS LAST)"
  295. cur.execute(sql)
  296. sql = "create table users_history(user_id integer NOT NULL, forum_id integer NOT NULL, name_user character " \
  297. "varying(255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) " \
  298. "null, interest_user character varying(5000) null, signature_user character varying(1000) null, " \
  299. "dateinserted_user timestamp(6) with time zone NOT NULL, constraint users_history_pk primary key (" \
  300. "user_id, dateinserted_user), constraint users_history_user_id_fkey foreign key (user_id) references " \
  301. "users (user_id), constraint users_history_forum_id_fkey foreign key (forum_id) references forums (" \
  302. "forum_id))"
  303. cur.execute(sql)
  304. sql = "create table topics(topic_id integer NOT NULL, forum_id integer NOT NULL, author_id integer NOT NULL, " \
  305. "title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, views_topic integer null, " \
  306. "posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) with " \
  307. "time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic double " \
  308. "precision NOT NULL, constraint topics_pk primary key (topic_id), constraint topics_author_id_fkey " \
  309. "foreign key (author_id) references users (user_id), constraint topics_forum_id_fkey foreign key (" \
  310. "forum_id) references forums (forum_id))"
  311. cur.execute(sql)
  312. sql = "create unique index unique_topic ON topics USING btree (forum_id ASC NULLS LAST, author_id ASC NULLS LAST, " \
  313. "title_topic ASC NULLS LAST)"
  314. cur.execute(sql)
  315. sql = "create table topics_history(topic_id integer NOT NULL, forum_id integer NOT NULL, author_id integer NOT NULL, " \
  316. "title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, views_topic integer " \
  317. "null, posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) " \
  318. "with time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic " \
  319. "double precision NOT NULL, constraint topics_history_pk primary key (topic_id, dateinserted_topic), " \
  320. "constraint topics_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
  321. "constraint topics_history_author_id_fkey foreign key (author_id) references users (user_id), " \
  322. "constraint topics_history_board_id_fkey foreign key (forum_id) references forums (forum_id))"
  323. cur.execute(sql)
  324. sql = "create table posts(post_id integer NOT NULL, topic_id integer NOT NULL, " \
  325. "user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
  326. "dateadded_post timestamp(6) with time zone NOT NULL, dateinserted_post timestamp(6) with time zone NOT " \
  327. "NULL, constraint posts_pk primary key (post_id), " \
  328. "constraint posts_user_id_fkey foreign key (user_id) references users (user_id), constraint " \
  329. "posts_topic_id_fkey foreign key (topic_id) references topics (topic_id))"
  330. cur.execute(sql)
  331. sql = "create unique index unique_post ON posts USING btree (topic_id ASC NULLS LAST, user_id ASC NULLS LAST, " \
  332. "dateadded_post ASC NULLS LAST)"
  333. cur.execute(sql)
  334. sql = "create table posts_history(post_id integer NOT NULL, topic_id integer NOT NULL, " \
  335. "user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
  336. "dateadded_post timestamp(6) with time zone NOT NULL, dateinserted_post timestamp(6) with time zone NOT " \
  337. "NULL, constraint posts_history_pk primary key (post_id, dateinserted_post), " \
  338. "constraint posts_history_user_id_fkey foreign key (user_id) references users (user_id), " \
  339. "constraint posts_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
  340. "constraint posts_history_post_id_fkey foreign key (post_id) references posts (post_id))"
  341. cur.execute(sql)
  342. con.commit()
  343. except:
  344. con.rollback()
  345. trace = traceback.format_exc()
  346. if (trace.find("already exists")==-1):
  347. print ("There was a problem during the database creation." )
  348. raise SystemExit