this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

576 lines
22 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. import psycopg2
  3. import traceback
  4. from Forums.Utilities.utilities import *
  5. def connectDataBase():
  6. from Forums.Initialization.forums_mining import config
  7. try:
  8. ip = config.get('PostgreSQL', 'ip')
  9. username = config.get('PostgreSQL', 'username')
  10. password = config.get('PostgreSQL', 'password')
  11. database = config.get('PostgreSQL', 'database')
  12. return psycopg2.connect(host=ip, user=username, password=password, dbname=database)
  13. except:
  14. print ("Data base " + config.get('PostgreSQL', 'database') + " not found.")
  15. raise SystemExit
  16. def verifyForum(cur, nameForum):
  17. try:
  18. cur.execute("lock table forums IN ACCESS EXCLUSIVE MODE")
  19. cur.execute("select forum_id from forums where name_forum = %(nameForum)s limit 1", {'nameForum': nameForum})
  20. recset = cur.fetchall()
  21. if recset:
  22. return recset[0]['forum_id']
  23. else:
  24. return 0
  25. except:
  26. trace = traceback.format_exc()
  27. print (trace)
  28. def verifyTopic(cur, forumId, authorId, titleTopic):
  29. try:
  30. cur.execute("lock table topics IN ACCESS EXCLUSIVE MODE")
  31. cur.execute("select topic_id from topics where forum_id = %(forumId)s and author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1",
  32. {'forumId': forumId, 'authorId': authorId, 'titleTopic': titleTopic})
  33. recset = cur.fetchall()
  34. if recset:
  35. return recset[0]['topic_id']
  36. else:
  37. return 0
  38. except:
  39. trace = traceback.format_exc()
  40. print (trace)
  41. def verifyPost(cur, topicId, userId, dateAdded):
  42. try:
  43. cur.execute("lock table posts IN ACCESS EXCLUSIVE MODE")
  44. cur.execute("select post_id from posts where topic_id = %(topicId)s and "
  45. "user_id = %(userId)s and dateadded_post = %(dateAdded)s limit 1", {'topicId': topicId,
  46. 'userId': userId,
  47. 'dateAdded': dateAdded})
  48. recset = cur.fetchall()
  49. if recset:
  50. return recset[0]['post_id']
  51. else:
  52. return 0
  53. except:
  54. trace = traceback.format_exc()
  55. print (trace)
  56. def verifyUser(cur, nameUser, forumId):
  57. try:
  58. cur.execute("lock table users IN ACCESS EXCLUSIVE MODE")
  59. cur.execute("select user_id from users where name_user = %(nameUser)s and forum_id = %(forumId)s limit 1",
  60. {'nameUser': nameUser, 'forumId': forumId})
  61. recset = cur.fetchall()
  62. if recset:
  63. return recset[0]['user_id']
  64. else:
  65. return 0
  66. except:
  67. trace = traceback.format_exc()
  68. print (trace)
  69. def getLastForum(cur):
  70. try:
  71. cur.execute("select forum_id from forums order by forum_id desc limit 1")
  72. recset = cur.fetchall()
  73. if recset:
  74. return recset[0]['forum_id']
  75. else:
  76. return 0
  77. except:
  78. trace = traceback.format_exc()
  79. print (trace)
  80. def getLastTopic(cur):
  81. try:
  82. cur.execute("select topic_id from topics order by topic_id desc limit 1")
  83. recset = cur.fetchall()
  84. if recset:
  85. return recset[0]['topic_id']
  86. else:
  87. return 0
  88. except:
  89. trace = traceback.format_exc()
  90. print (trace)
  91. def getLastUser(cur):
  92. try:
  93. cur.execute("select user_id from users order by user_id desc limit 1")
  94. recset = cur.fetchall()
  95. if recset:
  96. return recset[0]['user_id']
  97. else:
  98. return 0
  99. except:
  100. trace = traceback.format_exc()
  101. print (trace)
  102. def getLastUserVersion(cur, userId):
  103. try:
  104. cur.execute("select version_user from users_history where user_id = %(userId)s order by version_user desc limit 1", {'userId': userId})
  105. recset = cur.fetchall()
  106. if recset:
  107. return recset[0]['version_user']
  108. else:
  109. return 0
  110. except:
  111. trace = traceback.format_exc()
  112. print (trace)
  113. def getLastTopicVersion(cur, topicId):
  114. try:
  115. cur.execute("select version_topic from topics_history where topic_id = %(topicId)s order by version_topic desc limit 1", {'topicId': topicId})
  116. recset = cur.fetchall()
  117. if recset:
  118. return recset[0]['version_topic']
  119. else:
  120. return 0
  121. except:
  122. trace = traceback.format_exc()
  123. print (trace)
  124. def getLastPostVersion(cur, postId):
  125. try:
  126. cur.execute("select version_post from posts_history where post_id = %(postId)s order by version_post desc limit 1", {'postId': postId})
  127. recset = cur.fetchall()
  128. if recset:
  129. return recset[0]['version_post']
  130. else:
  131. return 0
  132. except:
  133. trace = traceback.format_exc()
  134. print (trace)
  135. def getLastPost(cur):
  136. try:
  137. cur.execute("select post_id from posts order by post_id desc limit 1")
  138. recset = cur.fetchall()
  139. if recset:
  140. return recset[0]['post_id']
  141. else:
  142. return 0
  143. except:
  144. trace = traceback.format_exc()
  145. print (trace)
  146. def create_forum(cur, row, url):
  147. forumId = verifyForum(cur, row[0])
  148. if not forumId:
  149. forumId = int(getLastForum(cur) + 1)
  150. sql = "Insert into forums (forum_id, name_forum, url_forum, dateinserted_forum) Values (%s, %s, %s, %s)"
  151. recset = [forumId, row[0], url, row[8]]
  152. cur.execute(sql, recset)
  153. return forumId
  154. def create_topic(cur, forumId, row, authorId):
  155. topicId = verifyTopic(cur, forumId, authorId, row[3])
  156. if not topicId:
  157. topicId = int(getLastTopic(cur) + 1)
  158. newTopic = True
  159. else:
  160. newTopic = False
  161. if newTopic:
  162. sql = "Insert into topics (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
  163. "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, %s, " \
  164. "%s, %s, %s, %s, %s)"
  165. recset = [topicId, forumId, authorId,
  166. row[3], row[1],
  167. row[4] if row[4] != '-1' else None,
  168. row[5] if row[5] != '-1' else None,
  169. row[6] if row[6] != '-1' else None,
  170. row[7] if row[7] != '-1' else None,
  171. row[8],
  172. row[19]]
  173. cur.execute(sql, recset)
  174. else:
  175. # Tracking potential topic changes
  176. sql = "select * from topics where topic_Id = %(topicId)s"
  177. cur.execute(sql, {'topicId': topicId})
  178. recset = cur.fetchall()
  179. if (str(recset[0]['board_topic']) != str(row[1]) or
  180. str(recset[0]['views_topic']) != str(row[4] if row[4] != '-1' else None) or # there was a change in the topic information
  181. str(recset[0]['posts_topic']) != str(row[5] if row[5] != '-1' else None)):
  182. topicVersionId = int(getLastTopicVersion(cur, topicId) + 1)
  183. sql = "Insert into topics_history (topic_id, version_topic, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
  184. "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, " \
  185. "%s, %s, %s, %s, %s, %s, %s)"
  186. recset = [topicId, topicVersionId, forumId, authorId,
  187. recset[0]['title_topic'],
  188. recset[0]['board_topic'],
  189. recset[0]['views_topic'],
  190. recset[0]['posts_topic'],
  191. recset[0]['href_topic'],
  192. recset[0]['dateadded_topic'],
  193. recset[0]['dateinserted_topic'],
  194. recset[0]['classification_topic']]
  195. cur.execute(sql, recset)
  196. sql = "Update topics set board_topic = %(board_topic)s, views_topic = %(views_topic)s, posts_topic = %(posts_topic)s, " \
  197. "dateinserted_topic = %(dateinserted_topic)s where topic_id = %(topicId)s"
  198. cur.execute(sql, {'board_topic': row[1],
  199. 'views_topic': row[4] if row[4] != '-1' else None,
  200. 'posts_topic': row[5] if row[5] != '-1' else None,
  201. 'dateinserted_topic': row[8],
  202. 'topicId': topicId})
  203. return topicId
  204. def create_author(cur, row, forumId):
  205. userId = verifyUser(cur, row[2], forumId)
  206. if not userId:
  207. userId = int(getLastUser(cur) + 1)
  208. sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  209. "signature_user, image_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
  210. recset = [userId, forumId,
  211. row[2], 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', #telling the create_user function to not track changes here
  212. row[8]]
  213. cur.execute(sql, recset)
  214. return userId
  215. def create_user(cur, row, forumId, index):
  216. userId = verifyUser(cur, row[10][index], forumId)
  217. if not userId:
  218. userId = int(getLastUser(cur) + 1)
  219. newUser = True
  220. else:
  221. newUser = False
  222. if newUser:
  223. sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  224. "signature_user, image_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
  225. recset = [userId, forumId,
  226. row[10][index],
  227. row[11][index] if row[11][index] != '-1' else None,
  228. row[12][index] if row[12][index] != '-1' else None,
  229. row[13][index] if row[13][index] != '-1' else None,
  230. row[14][index] if row[14][index] != '-1' else None,
  231. row[9][index] if row[9][index] != '-1' else None,
  232. row[8]]
  233. cur.execute(sql, recset)
  234. else:
  235. # Tracking potential user changes
  236. sql = "select * from users where user_id = %(userId)s"
  237. cur.execute(sql, {'userId': userId})
  238. recset = cur.fetchall()
  239. #decode_decrypt_image_in_base64(recset[0]['image_user'])
  240. if (str(recset[0]['status_user']) != str(row[11][index] if row[11][index] != '-1' else None) or
  241. str(recset[0]['reputation_user']) != str(row[12][index] if row[12][index] != '-1' else None) or
  242. str(recset[0]['interest_user']) != str(row[13][index] if row[13][index] != '-1' else None) or
  243. str(recset[0]['signature_user']) != str(row[14][index] if row[14][index] != '-1' else None) or
  244. str(recset[0]['image_user']) != str(row[9][index] if row[9][index] != '-1' else None)): # there was a change in the user information
  245. if (str(recset[0]['status_user']) != 'Nan' or
  246. str(recset[0]['reputation_user']) != 'Nan' or
  247. str(recset[0]['interest_user']) != 'Nan' or
  248. str(recset[0]['signature_user']) != 'Nan' or
  249. str(recset[0]['image_user']) != 'Nan'):
  250. userVersionId = int(getLastUserVersion(cur, userId) + 1)
  251. sql = "Insert into users_history (user_id, version_user, forum_id, name_user, status_user, reputation_user, interest_user, " \
  252. "signature_user, image_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
  253. recset = [userId, userVersionId, forumId,
  254. recset[0]['name_user'],
  255. recset[0]['status_user'],
  256. recset[0]['reputation_user'],
  257. recset[0]['interest_user'],
  258. recset[0]['signature_user'],
  259. recset[0]['image_user'],
  260. recset[0]['dateinserted_user']]
  261. cur.execute(sql, recset)
  262. sql = "Update users set status_user = %(status_user)s, reputation_user = %(reputation_user)s, " \
  263. "interest_user = %(interest_user)s, signature_user = %(signature_user)s, image_user = %(image_user)s, " \
  264. "dateinserted_user = %(dateinserted_user)s where user_id = %(userId)s"
  265. cur.execute(sql, {'status_user': row[11][index] if row[11][index] != '-1' else None,
  266. 'reputation_user': row[12][index] if row[12][index] != '-1' else None,
  267. 'interest_user': row[13][index] if row[13][index] != '-1' else None,
  268. 'signature_user': row[14][index] if row[14][index] != '-1' else None,
  269. 'image_user': row[9][index] if row[9][index] != '-1' else None,
  270. 'dateinserted_user': row[8] if row[8] != '-1' else None,
  271. 'userId': userId})
  272. return userId
  273. def create_posts(cur, row, forumId, topicId):
  274. if row[10] != "-1":
  275. for i in range(len(row[10])):
  276. userId = create_user(cur, row, forumId, i)
  277. postId = verifyPost(cur, topicId, userId, row[17][i])
  278. if not postId:
  279. postId = int(getLastPost(cur) + 1)
  280. sql = "Insert into posts (post_id, topic_id, user_id, content_post, feedback_post, image_post," \
  281. "dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
  282. recset = [postId, topicId, userId,
  283. row[15][i] if row[15][i] != '-1' else None,
  284. row[16][i] if row[16][i] != '-1' else None,
  285. row[18][i] if row[18][i] != '-1' else None,
  286. row[17][i] if row[17][i] != '-1' else None,
  287. row[8]]
  288. cur.execute(sql, recset)
  289. else:
  290. # Tracking potential post changes
  291. sql = "select * from posts where post_id = %(postId)s"
  292. cur.execute(sql, {'postId': postId})
  293. recset = cur.fetchall()
  294. if (str(recset[0]['content_post']) != str(row[15][i]) or
  295. str(recset[0]['feedback_post']) != str(row[16][i] if row[16][i] != '-1' else None) or
  296. str(recset[0]['image_post']) != str(row[18][i] if row[18][i] != '-1' else None)): # there was a change in the post information
  297. #decode_decrypt_image_in_base64(recset[0]['image_post'])
  298. postVersionId = int(getLastPostVersion(cur, postId) + 1)
  299. sql = "Insert into posts_history (post_id, version_post, topic_id, user_id, content_post, feedback_post, " \
  300. "image_post, dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
  301. recset = [postId, postVersionId, topicId, userId,
  302. recset[0]['content_post'],
  303. recset[0]['feedback_post'],
  304. recset[0]['image_post'],
  305. recset[0]['dateadded_post'],
  306. recset[0]['dateinserted_post']]
  307. cur.execute(sql, recset)
  308. sql = "Update posts set content_post = %(content_post)s, feedback_post = %(feedback_post)s, " \
  309. "image_post = %(image_post)s, dateinserted_post = %(dateinserted_post)s where post_id = %(postId)s"
  310. cur.execute(sql, {'content_post': row[15][i] if row[15][i] != '-1' else None,
  311. 'feedback_post': row[16][i] if row[16][i] != '-1' else None,
  312. 'image_post': row[18][i] if row[18][i] != '-1' else None,
  313. 'dateinserted_post': row[8],
  314. 'postId': postId})
  315. def create_database(cur, con):
  316. try:
  317. sql = "create table forums (forum_id integer NOT NULL, name_forum character varying(255) NOT NULL, url_forum " \
  318. "character varying(255) NOT null, dateinserted_forum timestamp(6) with time zone NOT NULL, " \
  319. "constraint forums_pk primary key (forum_id))"
  320. cur.execute(sql)
  321. sql = "create unique index unique_forum ON forums USING btree (name_forum ASC NULLS LAST)"
  322. cur.execute(sql)
  323. sql = "create table users (user_id integer NOT NULL, forum_id integer NOT NULL, name_user character varying(" \
  324. "255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) null, " \
  325. "interest_user character varying(5000) null, signature_user character varying(1000) null, " \
  326. "image_user character varying(10000000) null, dateinserted_user timestamp(6) with time zone NOT NULL, " \
  327. "constraint users_pk primary key (user_id), " \
  328. "constraint users_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
  329. cur.execute(sql)
  330. sql = "create unique index unique_user ON users USING btree (forum_id ASC NULLS LAST, name_user ASC NULLS LAST)"
  331. cur.execute(sql)
  332. sql = "create table users_history(user_id integer NOT NULL, version_user integer not null, forum_id integer NOT NULL, " \
  333. "name_user character varying(255) NOT NULL, status_user character varying(255) null, " \
  334. "reputation_user character varying(255) null, interest_user character varying(5000) null, " \
  335. "signature_user character varying(1000) null, image_user character varying(10000000) null, " \
  336. "dateinserted_user timestamp(6) with time zone NOT NULL, " \
  337. "constraint users_history_pk primary key (user_id, version_user), " \
  338. "constraint users_history_user_id_fkey foreign key (user_id) references " \
  339. "users (user_id), constraint users_history_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
  340. cur.execute(sql)
  341. sql = "create table topics(topic_id integer NOT NULL, forum_id integer NOT NULL, author_id integer NOT NULL, " \
  342. "title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, views_topic integer null, " \
  343. "posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) with time zone null, " \
  344. "dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic double precision NOT NULL, " \
  345. "constraint topics_pk primary key (topic_id), constraint topics_author_id_fkey " \
  346. "foreign key (author_id) references users (user_id), constraint topics_forum_id_fkey foreign key (" \
  347. "forum_id) references forums (forum_id))"
  348. cur.execute(sql)
  349. sql = "create unique index unique_topic ON topics USING btree (forum_id ASC NULLS LAST, author_id ASC NULLS LAST, " \
  350. "title_topic ASC NULLS LAST)"
  351. cur.execute(sql)
  352. sql = "create table topics_history(topic_id integer NOT NULL, version_topic integer not null, forum_id integer NOT NULL, " \
  353. "author_id integer NOT NULL, title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, " \
  354. "views_topic integer null, posts_topic integer null, href_topic character varying(255) NOT null, " \
  355. "dateadded_topic timestamp(6) with time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, " \
  356. "classification_topic double precision NOT NULL, " \
  357. "constraint topics_history_pk primary key (topic_id, version_topic), " \
  358. "constraint topics_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
  359. "constraint topics_history_author_id_fkey foreign key (author_id) references users (user_id), " \
  360. "constraint topics_history_board_id_fkey foreign key (forum_id) references forums (forum_id))"
  361. cur.execute(sql)
  362. sql = "create table posts(post_id integer NOT NULL, topic_id integer NOT NULL, " \
  363. "user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
  364. "image_post character varying(10000000) null, dateadded_post timestamp(6) with time zone NOT NULL, " \
  365. "dateinserted_post timestamp(6) with time zone NOT NULL, " \
  366. "constraint posts_pk primary key (post_id), " \
  367. "constraint posts_user_id_fkey foreign key (user_id) references users (user_id), constraint " \
  368. "posts_topic_id_fkey foreign key (topic_id) references topics (topic_id))"
  369. cur.execute(sql)
  370. sql = "create unique index unique_post ON posts USING btree (topic_id ASC NULLS LAST, user_id ASC NULLS LAST, " \
  371. "dateadded_post ASC NULLS LAST)"
  372. cur.execute(sql)
  373. sql = "create table posts_history(post_id integer NOT NULL, version_post integer not null, topic_id integer NOT NULL, " \
  374. "user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
  375. "image_post character varying(10000000) null, dateadded_post timestamp(6) with time zone NOT NULL, " \
  376. "dateinserted_post timestamp(6) with time zone NOT NULL, " \
  377. "constraint posts_history_pk primary key (post_id, version_post), " \
  378. "constraint posts_history_user_id_fkey foreign key (user_id) references users (user_id), " \
  379. "constraint posts_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
  380. "constraint posts_history_post_id_fkey foreign key (post_id) references posts (post_id))"
  381. cur.execute(sql)
  382. con.commit()
  383. except:
  384. con.rollback()
  385. trace = traceback.format_exc()
  386. if (trace.find("already exists")==-1):
  387. print ("There was a problem during the database creation." )
  388. raise SystemExit