this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

558 lines
20 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. import psycopg2
  3. import traceback
  4. def connectDataBase():
  5. from Forums.Initialization.forums_mining import config
  6. try:
  7. ip = config.get('PostgreSQL', 'ip')
  8. username = config.get('PostgreSQL', 'username')
  9. password = config.get('PostgreSQL', 'password')
  10. database = config.get('PostgreSQL', 'database')
  11. return psycopg2.connect(host=ip, user=username, password=password, dbname=database)
  12. except:
  13. print ("Data base " + config.get('PostgreSQL', 'database') + " not found.")
  14. raise SystemExit
  15. def verifyForum(cur, nameForum):
  16. try:
  17. cur.execute("select forum_id from forums where name_forum = %(nameForum)s limit 1", {'nameForum': nameForum})
  18. recset = cur.fetchall()
  19. if recset:
  20. return recset[0][0]
  21. else:
  22. return 0
  23. except:
  24. trace = traceback.format_exc()
  25. print (trace)
  26. def verifyTopic(cur, forumId, authorId, titleTopic):
  27. try:
  28. cur.execute("select topic_id from topics where forum_id = %(forumId)s and author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1",
  29. {'forumId': forumId, 'authorId': authorId, 'titleTopic': titleTopic})
  30. recset = cur.fetchall()
  31. if recset:
  32. return recset[0][0]
  33. else:
  34. return 0
  35. except:
  36. trace = traceback.format_exc()
  37. print (trace)
  38. def verifyPost(cur, topicId, userId, dateAdded):
  39. try:
  40. cur.execute("select post_id from posts where topic_id = %(topicId)s and "
  41. "user_id = %(userId)s and dateadded_post = %(dateAdded)s limit 1", {'topicId': topicId,
  42. 'userId': userId,
  43. 'dateAdded': dateAdded})
  44. recset = cur.fetchall()
  45. if recset:
  46. return recset[0][0]
  47. else:
  48. return 0
  49. except:
  50. trace = traceback.format_exc()
  51. print (trace)
  52. def verifyUser(cur, nameUser, forumId):
  53. try:
  54. cur.execute("select user_id from users where name_user = %(nameUser)s and forum_id = %(forumId)s limit 1",
  55. {'nameUser': nameUser, 'forumId': forumId})
  56. recset = cur.fetchall()
  57. if recset:
  58. return recset[0][0]
  59. else:
  60. return 0
  61. except:
  62. trace = traceback.format_exc()
  63. print (trace)
  64. def getLastForum(cur):
  65. try:
  66. cur.execute("select forum_id from forums order by forum_id desc limit 1")
  67. recset = cur.fetchall()
  68. if recset:
  69. return recset[0][0]
  70. else:
  71. return 0
  72. except:
  73. trace = traceback.format_exc()
  74. print (trace)
  75. def getLastTopic(cur):
  76. try:
  77. cur.execute("select topic_id from topics order by topic_id desc limit 1")
  78. recset = cur.fetchall()
  79. if recset:
  80. return recset[0][0]
  81. else:
  82. return 0
  83. except:
  84. trace = traceback.format_exc()
  85. print (trace)
  86. def getLastUser(cur):
  87. try:
  88. cur.execute("select user_id from users order by user_id desc limit 1")
  89. recset = cur.fetchall()
  90. if recset:
  91. return recset[0][0]
  92. else:
  93. return 0
  94. except:
  95. trace = traceback.format_exc()
  96. print (trace)
  97. def getLastUserVersion(cur, userId):
  98. try:
  99. cur.execute("select version_user from users_history where user_id = %(userId)s order by version_user desc limit 1", {'userId': userId})
  100. recset = cur.fetchall()
  101. if recset:
  102. return recset[0][0]
  103. else:
  104. return 0
  105. except:
  106. trace = traceback.format_exc()
  107. print (trace)
  108. def getLastTopicVersion(cur, topicId):
  109. try:
  110. cur.execute("select version_topic from topics_history where topic_id = %(topicId)s order by version_topic desc limit 1", {'topicId': topicId})
  111. recset = cur.fetchall()
  112. if recset:
  113. return recset[0][0]
  114. else:
  115. return 0
  116. except:
  117. trace = traceback.format_exc()
  118. print (trace)
  119. def getLastPostVersion(cur, postId):
  120. try:
  121. cur.execute("select version_post from posts_history where post_id = %(postId)s order by version_post desc limit 1", {'postId': postId})
  122. recset = cur.fetchall()
  123. if recset:
  124. return recset[0][0]
  125. else:
  126. return 0
  127. except:
  128. trace = traceback.format_exc()
  129. print (trace)
  130. def getLastPost(cur):
  131. try:
  132. cur.execute("select post_id from posts order by post_id desc limit 1")
  133. recset = cur.fetchall()
  134. if recset:
  135. return recset[0][0]
  136. else:
  137. return 0
  138. except:
  139. trace = traceback.format_exc()
  140. print (trace)
  141. def create_forum(cur, row, url):
  142. forumId = verifyForum(cur, row[0])
  143. if not forumId:
  144. forumId = int(getLastForum(cur) + 1)
  145. sql = "Insert into forums (forum_id, name_forum, url_forum, dateinserted_forum) Values (%s, %s, %s, %s)"
  146. recset = [forumId, row[0], url, row[8]]
  147. cur.execute(sql, recset)
  148. return forumId
  149. def create_topic(cur, forumId, row, authorId):
  150. topicId = verifyTopic(cur, forumId, authorId, row[3])
  151. if not topicId:
  152. topicId = int(getLastTopic(cur) + 1)
  153. newTopic = True
  154. else:
  155. newTopic = False
  156. if newTopic:
  157. sql = "Insert into topics (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
  158. "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, %s, " \
  159. "%s, %s, %s, %s, %s)"
  160. recset = [topicId, forumId, authorId,
  161. row[3], row[1],
  162. row[4] if row[4] != '-1' else None,
  163. row[5] if row[5] != '-1' else None,
  164. row[6] if row[6] != '-1' else None,
  165. row[7] if row[7] != '-1' else None,
  166. row[8],
  167. row[19]]
  168. cur.execute(sql, recset)
  169. else:
  170. # Tracking potential topic changes
  171. sql = "select * from topics where topic_Id = %(topicId)s"
  172. cur.execute(sql, {'topicId': topicId})
  173. recset = cur.fetchall()
  174. if (str(recset[0][4]) != str(row[1]) or str(recset[0][5]) != str(row[4] if row[4] != '-1' else None) or # there was a change in the topic information
  175. str(recset[0][6]) != str(row[5] if row[5] != '-1' else None)):
  176. topicVersionId = int(getLastTopicVersion(cur, topicId) + 1)
  177. sql = "Insert into topics_history (topic_id, version_topic, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
  178. "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, " \
  179. "%s, %s, %s, %s, %s, %s, %s)"
  180. recset = [topicId, topicVersionId, forumId, authorId,
  181. recset[0][3],
  182. recset[0][4],
  183. recset[0][5],
  184. recset[0][6],
  185. recset[0][7],
  186. recset[0][8],
  187. recset[0][9],
  188. recset[0][10]]
  189. cur.execute(sql, recset)
  190. sql = "Update topics set board_topic = %(board_topic)s, views_topic = %(views_topic)s, posts_topic = %(posts_topic)s, " \
  191. "dateinserted_topic = %(dateinserted_topic)s where topic_id = %(topicId)s"
  192. cur.execute(sql, {'board_topic': row[1],
  193. 'views_topic': row[4] if row[4] != '-1' else None,
  194. 'posts_topic': row[5] if row[5] != '-1' else None,
  195. 'dateinserted_topic': row[8],
  196. 'topicId': topicId})
  197. return topicId
  198. def create_author(cur, row, forumId):
  199. userId = verifyUser(cur, row[2], forumId)
  200. if not userId:
  201. userId = int(getLastUser(cur) + 1)
  202. sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  203. "signature_user, image_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
  204. recset = [userId, forumId,
  205. row[2], 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', #telling the create_user function to not track changes here
  206. row[8]]
  207. cur.execute(sql, recset)
  208. return userId
  209. def create_user(cur, row, forumId, index):
  210. userId = verifyUser(cur, row[10][index], forumId)
  211. if not userId:
  212. userId = int(getLastUser(cur) + 1)
  213. newUser = True
  214. else:
  215. newUser = False
  216. if newUser:
  217. sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  218. "signature_user, image_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
  219. recset = [userId, forumId,
  220. row[10][index],
  221. row[11][index] if row[11][index] != '-1' else None,
  222. row[12][index] if row[12][index] != '-1' else None,
  223. row[13][index] if row[13][index] != '-1' else None,
  224. row[14][index] if row[14][index] != '-1' else None,
  225. row[9][index] if row[9][index] != '-1' else None,
  226. row[8]]
  227. cur.execute(sql, recset)
  228. else:
  229. # Tracking potential user changes
  230. sql = "select * from users where user_id = %(userId)s"
  231. cur.execute(sql, {'userId': userId})
  232. recset = cur.fetchall()
  233. # decode_decrypt_image_in_base64(recset[0][7])
  234. if (str(recset[0][3]) != str(row[11][index] if row[11][index] != '-1' else None) or str(recset[0][4]) != str(row[12][index] if row[12][index] != '-1' else None) or
  235. str(recset[0][5]) != str(row[13][index] if row[13][index] != '-1' else None) or str(recset[0][6]) != str(row[14][index] if row[14][index] != '-1' else None) or
  236. str(recset[0][7]) != str(row[9][index] if row[9][index] != '-1' else None)): # there was a change in the user information
  237. if (str(recset[0][3]) != 'Nan' or str(recset[0][4]) != 'Nan' or str(recset[0][5]) != 'Nan' or str(recset[0][6]) != 'Nan' or str(recset[0][7]) != 'Nan'):
  238. userVersionId = int(getLastUserVersion(cur, userId) + 1)
  239. sql = "Insert into users_history (user_id, version_user, forum_id, name_user, status_user, reputation_user, interest_user, " \
  240. "signature_user, image_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
  241. recset = [userId, userVersionId, forumId,
  242. recset[0][2],
  243. recset[0][3],
  244. recset[0][4],
  245. recset[0][5],
  246. recset[0][6],
  247. recset[0][7],
  248. recset[0][8]]
  249. cur.execute(sql, recset)
  250. sql = "Update users set status_user = %(status_user)s, reputation_user = %(reputation_user)s, " \
  251. "interest_user = %(interest_user)s, signature_user = %(signature_user)s, image_user = %(image_user)s, " \
  252. "dateinserted_user = %(dateinserted_user)s where user_id = %(userId)s"
  253. cur.execute(sql, {'status_user': row[11][index] if row[11][index] != '-1' else None,
  254. 'reputation_user': row[12][index] if row[12][index] != '-1' else None,
  255. 'interest_user': row[13][index] if row[13][index] != '-1' else None,
  256. 'signature_user': row[14][index] if row[14][index] != '-1' else None,
  257. 'image_user': row[9][index] if row[9][index] != '-1' else None,
  258. 'dateinserted_user': row[8] if row[8] != '-1' else None,
  259. 'userId': userId})
  260. return userId
  261. def create_posts(cur, row, forumId, topicId):
  262. if row[10] != "-1":
  263. for i in range(len(row[10])):
  264. userId = create_user(cur, row, forumId, i)
  265. postId = verifyPost(cur, topicId, userId, row[17][i])
  266. if not postId:
  267. postId = int(getLastPost(cur) + 1)
  268. sql = "Insert into posts (post_id, topic_id, user_id, content_post, feedback_post, image_post," \
  269. "dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
  270. recset = [postId, topicId, userId,
  271. row[15][i] if row[15][i] != '-1' else None,
  272. row[16][i] if row[16][i] != '-1' else None,
  273. row[18][i] if row[18][i] != '-1' else None,
  274. row[17][i] if row[17][i] != '-1' else None,
  275. row[8]]
  276. cur.execute(sql, recset)
  277. else:
  278. # Tracking potential post changes
  279. sql = "select * from posts where post_id = %(postId)s"
  280. cur.execute(sql, {'postId': postId})
  281. recset = cur.fetchall()
  282. if (str(recset[0][3]) != str(row[15][i]) or str(recset[0][4]) != str(row[16][i] if row[16][i] != '-1' else None) or
  283. str(recset[0][5]) != str(row[18][i] if row[18][i] != '-1' else None)): # there was a change in the post information
  284. postVersionId = int(getLastPostVersion(cur, postId) + 1)
  285. sql = "Insert into posts_history (post_id, version_post, topic_id, user_id, content_post, feedback_post, " \
  286. "image_post, dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
  287. recset = [postId, postVersionId, topicId, userId,
  288. recset[0][3],
  289. recset[0][4],
  290. recset[0][5],
  291. recset[0][6],
  292. recset[0][7]]
  293. cur.execute(sql, recset)
  294. sql = "Update posts set content_post = %(content_post)s, feedback_post = %(feedback_post)s, " \
  295. "image_post = %(image_post)s, dateinserted_post = %(dateinserted_post)s where post_id = %(postId)s"
  296. cur.execute(sql, {'content_post': row[15][i] if row[15][i] != '-1' else None,
  297. 'feedback_post': row[16][i] if row[16][i] != '-1' else None,
  298. 'image_post': row[18][i] if row[18][i] != '-1' else None,
  299. 'dateinserted_post': row[8],
  300. 'postId': postId})
  301. def create_database(cur, con):
  302. try:
  303. sql = "create table forums (forum_id integer NOT NULL, name_forum character varying(255) NOT NULL, url_forum " \
  304. "character varying(255) NOT null, dateinserted_forum timestamp(6) with time zone NOT NULL, " \
  305. "constraint forums_pk primary key (forum_id))"
  306. cur.execute(sql)
  307. sql = "create unique index unique_forum ON forums USING btree (name_forum ASC NULLS LAST)"
  308. cur.execute(sql)
  309. sql = "create table users (user_id integer NOT NULL, forum_id integer NOT NULL, name_user character varying(" \
  310. "255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) null, " \
  311. "interest_user character varying(5000) null, signature_user character varying(1000) null, " \
  312. "image_user character varying(1000000) null, dateinserted_user timestamp(6) with time zone NOT NULL, " \
  313. "constraint users_pk primary key (user_id), " \
  314. "constraint users_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
  315. cur.execute(sql)
  316. sql = "create unique index unique_user ON users USING btree (forum_id ASC NULLS LAST, name_user ASC NULLS LAST)"
  317. cur.execute(sql)
  318. sql = "create table users_history(user_id integer NOT NULL, version_user integer not null, forum_id integer NOT NULL, " \
  319. "name_user character varying(255) NOT NULL, status_user character varying(255) null, " \
  320. "reputation_user character varying(255) null, interest_user character varying(5000) null, " \
  321. "signature_user character varying(1000) null, image_user character varying(1000000) null, " \
  322. "dateinserted_user timestamp(6) with time zone NOT NULL, " \
  323. "constraint users_history_pk primary key (user_id, version_user), " \
  324. "constraint users_history_user_id_fkey foreign key (user_id) references " \
  325. "users (user_id), constraint users_history_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
  326. cur.execute(sql)
  327. sql = "create table topics(topic_id integer NOT NULL, forum_id integer NOT NULL, author_id integer NOT NULL, " \
  328. "title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, views_topic integer null, " \
  329. "posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) with time zone null, " \
  330. "dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic double precision NOT NULL, " \
  331. "constraint topics_pk primary key (topic_id), constraint topics_author_id_fkey " \
  332. "foreign key (author_id) references users (user_id), constraint topics_forum_id_fkey foreign key (" \
  333. "forum_id) references forums (forum_id))"
  334. cur.execute(sql)
  335. sql = "create unique index unique_topic ON topics USING btree (forum_id ASC NULLS LAST, author_id ASC NULLS LAST, " \
  336. "title_topic ASC NULLS LAST)"
  337. cur.execute(sql)
  338. sql = "create table topics_history(topic_id integer NOT NULL, version_topic integer not null, forum_id integer NOT NULL, " \
  339. "author_id integer NOT NULL, title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, " \
  340. "views_topic integer null, posts_topic integer null, href_topic character varying(255) NOT null, " \
  341. "dateadded_topic timestamp(6) with time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, " \
  342. "classification_topic double precision NOT NULL, " \
  343. "constraint topics_history_pk primary key (topic_id, version_topic), " \
  344. "constraint topics_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
  345. "constraint topics_history_author_id_fkey foreign key (author_id) references users (user_id), " \
  346. "constraint topics_history_board_id_fkey foreign key (forum_id) references forums (forum_id))"
  347. cur.execute(sql)
  348. sql = "create table posts(post_id integer NOT NULL, topic_id integer NOT NULL, " \
  349. "user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
  350. "image_post character varying(1000000) null, dateadded_post timestamp(6) with time zone NOT NULL, " \
  351. "dateinserted_post timestamp(6) with time zone NOT NULL, " \
  352. "constraint posts_pk primary key (post_id), " \
  353. "constraint posts_user_id_fkey foreign key (user_id) references users (user_id), constraint " \
  354. "posts_topic_id_fkey foreign key (topic_id) references topics (topic_id))"
  355. cur.execute(sql)
  356. sql = "create unique index unique_post ON posts USING btree (topic_id ASC NULLS LAST, user_id ASC NULLS LAST, " \
  357. "dateadded_post ASC NULLS LAST)"
  358. cur.execute(sql)
  359. sql = "create table posts_history(post_id integer NOT NULL, version_post integer not null, topic_id integer NOT NULL, " \
  360. "user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
  361. "image_post character varying(1000000) null, dateadded_post timestamp(6) with time zone NOT NULL, " \
  362. "dateinserted_post timestamp(6) with time zone NOT NULL, " \
  363. "constraint posts_history_pk primary key (post_id, version_post), " \
  364. "constraint posts_history_user_id_fkey foreign key (user_id) references users (user_id), " \
  365. "constraint posts_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
  366. "constraint posts_history_post_id_fkey foreign key (post_id) references posts (post_id))"
  367. cur.execute(sql)
  368. con.commit()
  369. except:
  370. con.rollback()
  371. trace = traceback.format_exc()
  372. if (trace.find("already exists")==-1):
  373. print ("There was a problem during the database creation." )
  374. raise SystemExit