this is based on calsyslab project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

524 lines
19 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. __author__ = 'DarkWeb'
  2. import psycopg2
  3. import traceback
  4. def connectDataBase():
  5. from Forums.Initialization.forums_mining import config
  6. try:
  7. ip = config.get('PostgreSQL', 'ip')
  8. username = config.get('PostgreSQL', 'username')
  9. password = config.get('PostgreSQL', 'password')
  10. database = config.get('PostgreSQL', 'database')
  11. return psycopg2.connect(host=ip, user=username, password=password, dbname=database)
  12. except:
  13. print ("Data base " + config.get('PostgreSQL', 'database') + " not found.")
  14. raise SystemExit
  15. def verifyForum(cur, nameForum):
  16. try:
  17. cur.execute("select forum_id from forums where name_forum = %(nameForum)s limit 1", {'nameForum': nameForum})
  18. recset = cur.fetchall()
  19. if recset:
  20. return recset[0][0]
  21. else:
  22. return 0
  23. except:
  24. trace = traceback.format_exc()
  25. print (trace)
  26. def verifyBoard(cur, forum, nameBoard):
  27. try:
  28. cur.execute("select board_id from boards where forum_id = %(forum)s and name_board = %(nameBoard)s limit 1",
  29. {'forum': forum, 'nameBoard': nameBoard})
  30. recset = cur.fetchall()
  31. if recset:
  32. return recset[0][0]
  33. else:
  34. return 0
  35. except:
  36. trace = traceback.format_exc()
  37. print (trace)
  38. def verifyTopic(cur, forumId, authorId, titleTopic):
  39. try:
  40. cur.execute("select topic_id from topics where forum_id = %(forumId)s and author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1",
  41. {'forumId': forumId, 'authorId': authorId, 'titleTopic': titleTopic})
  42. recset = cur.fetchall()
  43. if recset:
  44. return recset[0][0]
  45. else:
  46. return 0
  47. except:
  48. trace = traceback.format_exc()
  49. print (trace)
  50. def verifyPost(cur, topicId, userId, dateAdded):
  51. try:
  52. cur.execute("select post_id from posts where topic_id = %(topicId)s and "
  53. "user_id = %(userId)s and dateadded_post = %(dateAdded)s limit 1", {'topicId': topicId,
  54. 'userId': userId,
  55. 'dateAdded': dateAdded})
  56. recset = cur.fetchall()
  57. if recset:
  58. return recset[0][0]
  59. else:
  60. return 0
  61. except:
  62. trace = traceback.format_exc()
  63. print (trace)
  64. def verifyUser(cur, nameUser, forumId):
  65. try:
  66. cur.execute("select user_id from users where name_user = %(nameUser)s and forum_id = %(forumId)s limit 1",
  67. {'nameUser': nameUser, 'forumId': forumId})
  68. recset = cur.fetchall()
  69. if recset:
  70. return recset[0][0]
  71. else:
  72. return 0
  73. except:
  74. trace = traceback.format_exc()
  75. print (trace)
  76. def getLastForum(cur):
  77. try:
  78. cur.execute("select forum_id from forums order by forum_id desc limit 1")
  79. recset = cur.fetchall()
  80. if recset:
  81. return recset[0][0]
  82. else:
  83. return 0
  84. except:
  85. trace = traceback.format_exc()
  86. print (trace)
  87. def getLastBoard(cur):
  88. try:
  89. cur.execute("select board_id from boards order by board_id desc limit 1")
  90. recset = cur.fetchall()
  91. if recset:
  92. return recset[0][0]
  93. else:
  94. return 0
  95. except:
  96. trace = traceback.format_exc()
  97. print (trace)
  98. def getLastTopic(cur):
  99. try:
  100. cur.execute("select topic_id from topics order by topic_id desc limit 1")
  101. recset = cur.fetchall()
  102. if recset:
  103. return recset[0][0]
  104. else:
  105. return 0
  106. except:
  107. trace = traceback.format_exc()
  108. print (trace)
  109. def getLastUser(cur):
  110. try:
  111. cur.execute("select user_id from users order by user_id desc limit 1")
  112. recset = cur.fetchall()
  113. if recset:
  114. return recset[0][0]
  115. else:
  116. return 0
  117. except:
  118. trace = traceback.format_exc()
  119. print (trace)
  120. def getLastPost(cur):
  121. try:
  122. cur.execute("select post_id from posts order by post_id desc limit 1")
  123. recset = cur.fetchall()
  124. if recset:
  125. return recset[0][0]
  126. else:
  127. return 0
  128. except:
  129. trace = traceback.format_exc()
  130. print (trace)
  131. def create_forum(cur, row, url):
  132. forumId = verifyForum(cur, row[0])
  133. if not forumId:
  134. forumId = int(getLastForum(cur) + 1)
  135. sql = "Insert into forums (forum_id, name_forum, url_forum, dateinserted_forum) Values (%s, %s, %s, %s)"
  136. recset = [forumId, row[0], url, row[8]]
  137. cur.execute(sql, recset)
  138. return forumId
  139. def create_topic(cur, forumId, row, authorId):
  140. topicId = verifyTopic(cur, forumId, authorId, row[3])
  141. if not topicId:
  142. topicId = int(getLastTopic(cur) + 1)
  143. newTopic = True
  144. else:
  145. newTopic = False
  146. if newTopic:
  147. sql = "Insert into topics (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
  148. "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, %s, " \
  149. "%s, %s, %s, %s, %s)"
  150. recset = [topicId, forumId, authorId,
  151. row[3], row[1],
  152. row[4] if row[4] != '-1' else None,
  153. row[5] if row[5] != '-1' else None,
  154. row[6] if row[6] != '-1' else None,
  155. row[7] if row[7] != '-1' else None,
  156. row[8],
  157. row[17]]
  158. cur.execute(sql, recset)
  159. else:
  160. # Tracking potential topic changes
  161. sql = "select * from topics where topic_Id = %(topicId)s"
  162. cur.execute(sql, {'topicId': topicId})
  163. recset = cur.fetchall()
  164. if (str(recset[0][4]) != str(row[1]) or str(recset[0][5]) != str(row[4] if row[4] != '-1' else None) or # there was a change in the topic information
  165. str(recset[0][6]) != str(row[5] if row[5] != '-1' else None)):
  166. sql = "Insert into topics_history (topic_id, forum_id, author_id, title_topic, board_topic, views_topic, posts_topic, " \
  167. "href_topic, dateadded_topic, dateinserted_topic, classification_topic) Values (%s, %s, %s, %s, %s, " \
  168. "%s, %s, %s, %s, %s, %s)"
  169. recset = [topicId, forumId, authorId,
  170. recset[0][3],
  171. recset[0][4],
  172. recset[0][5],
  173. recset[0][6],
  174. recset[0][7],
  175. recset[0][8],
  176. recset[0][9],
  177. recset[0][10]]
  178. cur.execute(sql, recset)
  179. sql = "Update topics set board_topic = %(board_topic)s, views_topic = %(views_topic)s, posts_topic = %(posts_topic)s, " \
  180. "dateinserted_topic = %(dateinserted_topic)s where topic_id = %(topicId)s"
  181. cur.execute(sql, {'board_topic': row[1],
  182. 'views_topic': row[4] if row[4] != '-1' else None,
  183. 'posts_topic': row[5] if row[5] != '-1' else None,
  184. 'dateinserted_topic': row[8],
  185. 'topicId': topicId})
  186. return topicId
  187. def create_author(cur, row, forumId):
  188. userId = verifyUser(cur, row[2], forumId)
  189. if not userId:
  190. userId = int(getLastUser(cur) + 1)
  191. sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  192. "signature_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
  193. recset = [userId, forumId,
  194. row[2], 'Nan', 'Nan', 'Nan', 'Nan', #telling the create_posts function to not track changes here
  195. row[8]]
  196. cur.execute(sql, recset)
  197. return userId
  198. def create_user(cur, row, forumId, index):
  199. userId = verifyUser(cur, row[9][index], forumId)
  200. if not userId:
  201. userId = int(getLastUser(cur) + 1)
  202. newUser = True
  203. else:
  204. newUser = False
  205. if newUser:
  206. sql = "Insert into users (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  207. "signature_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
  208. recset = [userId, forumId,
  209. row[9][index],
  210. row[10][index] if row[10][index] != '-1' else None,
  211. row[11][index] if row[11][index] != '-1' else None,
  212. row[12][index] if row[12][index] != '-1' else None,
  213. row[13][index] if row[13][index] != '-1' else None,
  214. row[8]]
  215. cur.execute(sql, recset)
  216. else:
  217. # Tracking potential user changes
  218. sql = "select * from users where user_id = %(userId)s"
  219. cur.execute(sql, {'userId': userId})
  220. recset = cur.fetchall()
  221. if (str(recset[0][3]) != str(row[10][index] if row[10][index] != '-1' else None) or str(recset[0][4]) != str(row[11][index] if row[11][index] != '-1' else None) or
  222. str(recset[0][5]) != str(row[12][index] if row[12][index] != '-1' else None) or str(recset[0][6]) != str(row[13][index] if row[13][index] != '-1' else None)): # there was a change in the user information
  223. if (str(recset[0][3]) != 'Nan' or str(recset[0][4]) != 'Nan' or str(recset[0][5]) != 'Nan' or str(recset[0][6]) != 'Nan'):
  224. sql = "Insert into users_history (user_id, forum_id, name_user, status_user, reputation_user, interest_user, " \
  225. "signature_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s)"
  226. recset = [userId, forumId,
  227. recset[0][2],
  228. recset[0][3],
  229. recset[0][4],
  230. recset[0][5],
  231. recset[0][6],
  232. recset[0][7]]
  233. cur.execute(sql, recset)
  234. sql = "Update users set status_user = %(status_user)s, reputation_user = %(reputation_user)s, " \
  235. "interest_user = %(interest_user)s, signature_user = %(signature_user)s, dateinserted_user = %(dateinserted_user)s " \
  236. "where user_id = %(userId)s"
  237. cur.execute(sql, {'status_user': row[10][index] if row[10][index] != '-1' else None,
  238. 'reputation_user': row[11][index] if row[11][index] != '-1' else None,
  239. 'interest_user': row[12][index] if row[12][index] != '-1' else None,
  240. 'signature_user': row[13][index] if row[13][index] != '-1' else None,
  241. 'dateinserted_user': row[8] if row[8] != '-1' else None,
  242. 'userId': userId})
  243. return userId
  244. def create_posts(cur, row, forumId, topicId):
  245. if row[9] != "-1":
  246. for i in range(len(row[9])):
  247. userId = create_user(cur, row, forumId, i)
  248. postId = verifyPost(cur, topicId, userId, row[16][i])
  249. if not postId:
  250. postId = int(getLastPost(cur) + 1)
  251. sql = "Insert into posts (post_id, topic_id, user_id, content_post, feedback_post, " \
  252. "dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s)"
  253. recset = [postId, topicId, userId,
  254. row[14][i] if row[14][i] != '-1' else None,
  255. row[15][i] if row[15][i] != '-1' else None,
  256. row[16][i] if row[16][i] != '-1' else None,
  257. row[8]]
  258. cur.execute(sql, recset)
  259. else:
  260. # Tracking potential post changes
  261. sql = "select * from posts where post_id = %(postId)s"
  262. cur.execute(sql, {'postId': postId})
  263. recset = cur.fetchall()
  264. if (str(recset[0][3]) != str(row[14][i]) or str(recset[0][4]) != str(row[15][i] if row[15][i] != '-1' else None)): # there was a change in the post information
  265. sql = "Insert into posts_history (post_id, topic_id, user_id, content_post, feedback_post, " \
  266. "dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s)"
  267. recset = [postId, topicId, userId,
  268. recset[0][3],
  269. recset[0][4],
  270. recset[0][5],
  271. recset[0][6]]
  272. cur.execute(sql, recset)
  273. sql = "Update posts set content_post = %(content_post)s, feedback_post = %(feedback_post)s, dateinserted_post = %(dateinserted_post)s " \
  274. "where post_id = %(postId)s"
  275. cur.execute(sql, {'content_post': row[14][i] if row[14][i] != '-1' else None,
  276. 'feedback_post': row[15][i] if row[15][i] != '-1' else None,
  277. 'dateinserted_post': row[8],
  278. 'postId': postId})
  279. def create_database(cur, con):
  280. try:
  281. sql = "create table forums (forum_id integer NOT NULL, name_forum character varying(255) NOT NULL, url_forum " \
  282. "character varying(255) NOT null, dateinserted_forum timestamp(6) with time zone NOT NULL, constraint " \
  283. "forums_pk primary key (forum_id))"
  284. cur.execute(sql)
  285. sql = "create unique index unique_forum ON forums USING btree (name_forum ASC NULLS LAST)"
  286. cur.execute(sql)
  287. sql = "create table users (user_id integer NOT NULL, forum_id integer NOT NULL, name_user character varying(" \
  288. "255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) null, " \
  289. "interest_user character varying(5000) null, signature_user character varying(1000) null, " \
  290. "dateinserted_user timestamp(6) with time zone NOT NULL, constraint users_pk primary key (user_id), " \
  291. "constraint users_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
  292. cur.execute(sql)
  293. sql = "create unique index unique_user ON users USING btree (forum_id ASC NULLS LAST, name_user ASC NULLS LAST)"
  294. cur.execute(sql)
  295. sql = "create table users_history(user_id integer NOT NULL, forum_id integer NOT NULL, name_user character " \
  296. "varying(255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) " \
  297. "null, interest_user character varying(5000) null, signature_user character varying(1000) null, " \
  298. "dateinserted_user timestamp(6) with time zone NOT NULL, constraint users_history_pk primary key (" \
  299. "user_id, dateinserted_user), constraint users_history_user_id_fkey foreign key (user_id) references " \
  300. "users (user_id), constraint users_history_forum_id_fkey foreign key (forum_id) references forums (" \
  301. "forum_id))"
  302. cur.execute(sql)
  303. sql = "create table topics(topic_id integer NOT NULL, forum_id integer NOT NULL, author_id integer NOT NULL, " \
  304. "title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, views_topic integer null, " \
  305. "posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) with " \
  306. "time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic double " \
  307. "precision NOT NULL, constraint topics_pk primary key (topic_id), constraint topics_author_id_fkey " \
  308. "foreign key (author_id) references users (user_id), constraint topics_forum_id_fkey foreign key (" \
  309. "forum_id) references forums (forum_id))"
  310. cur.execute(sql)
  311. sql = "create unique index unique_topic ON topics USING btree (forum_id ASC NULLS LAST, author_id ASC NULLS LAST, " \
  312. "title_topic ASC NULLS LAST)"
  313. cur.execute(sql)
  314. sql = "create table topics_history(topic_id integer NOT NULL, forum_id integer NOT NULL, author_id integer NOT NULL, " \
  315. "title_topic character varying(255) NOT NULL, board_topic character varying(255) NOT NULL, views_topic integer " \
  316. "null, posts_topic integer null, href_topic character varying(255) NOT null, dateadded_topic timestamp(6) " \
  317. "with time zone null, dateinserted_topic timestamp(6) with time zone NOT NULL, classification_topic " \
  318. "double precision NOT NULL, constraint topics_history_pk primary key (topic_id, dateinserted_topic), " \
  319. "constraint topics_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
  320. "constraint topics_history_author_id_fkey foreign key (author_id) references users (user_id), " \
  321. "constraint topics_history_board_id_fkey foreign key (forum_id) references forums (forum_id))"
  322. cur.execute(sql)
  323. sql = "create table posts(post_id integer NOT NULL, topic_id integer NOT NULL, " \
  324. "user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
  325. "dateadded_post timestamp(6) with time zone NOT NULL, dateinserted_post timestamp(6) with time zone NOT " \
  326. "NULL, constraint posts_pk primary key (post_id), " \
  327. "constraint posts_user_id_fkey foreign key (user_id) references users (user_id), constraint " \
  328. "posts_topic_id_fkey foreign key (topic_id) references topics (topic_id))"
  329. cur.execute(sql)
  330. sql = "create unique index unique_post ON posts USING btree (topic_id ASC NULLS LAST, user_id ASC NULLS LAST, " \
  331. "dateadded_post ASC NULLS LAST)"
  332. cur.execute(sql)
  333. sql = "create table posts_history(post_id integer NOT NULL, topic_id integer NOT NULL, " \
  334. "user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
  335. "dateadded_post timestamp(6) with time zone NOT NULL, dateinserted_post timestamp(6) with time zone NOT " \
  336. "NULL, constraint posts_history_pk primary key (post_id, dateinserted_post), " \
  337. "constraint posts_history_user_id_fkey foreign key (user_id) references users (user_id), " \
  338. "constraint posts_history_topic_id_fkey foreign key (topic_id) references topics (topic_id), " \
  339. "constraint posts_history_post_id_fkey foreign key (post_id) references posts (post_id))"
  340. cur.execute(sql)
  341. con.commit()
  342. except:
  343. con.rollback()
  344. trace = traceback.format_exc()
  345. if (trace.find("already exists")==-1):
  346. print ("There was a problem during the database creation." )
  347. raise SystemExit