Browse Source

finished lock tables.

main
westernmeadow 1 year ago
parent
commit
58eede1873
3 changed files with 72 additions and 48 deletions
  1. +7
    -3
      Forums/CryptBB/parser.py
  2. +59
    -44
      Forums/DB_Connection/db_connection.py
  3. +6
    -1
      Forums/Initialization/prepare_parser.py

+ 7
- 3
Forums/CryptBB/parser.py View File

@ -164,9 +164,13 @@ def cryptBB_description_parser(soup):
img = "-1"
image_post.append(img)
img = ipost.find('div', {"class": "author_avatar"}).find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]
avatar = ipost.find('div', {"class": "author_avatar"})
if avatar is not None:
img = avatar.find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]
else:
img = "-1"
else:
img = "-1"
image_user.append(img)


+ 59
- 44
Forums/DB_Connection/db_connection.py View File

@ -27,12 +27,14 @@ def verifyForum(cur, nameForum):
try:
cur.execute("lock table forums IN ACCESS EXCLUSIVE MODE;")
cur.execute("select forum_id from forums where name_forum = %(nameForum)s limit 1", {'nameForum': nameForum})
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['forum_id']
else:
return 0
@ -46,13 +48,15 @@ def verifyTopic(cur, forumId, authorId, titleTopic):
try:
cur.execute("lock table topics IN ACCESS EXCLUSIVE MODE;")
cur.execute("select topic_id from topics where forum_id = %(forumId)s and author_id = %(authorId)s and title_topic = %(titleTopic)s limit 1",
{'forumId': forumId, 'authorId': authorId, 'titleTopic': titleTopic})
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['topic_id']
else:
return 0
@ -66,6 +70,8 @@ def verifyPost(cur, topicId, userId, dateAdded):
try:
cur.execute("lock table posts IN ACCESS EXCLUSIVE MODE;")
cur.execute("select post_id from posts where topic_id = %(topicId)s and "
"user_id = %(userId)s and dateadded_post = %(dateAdded)s limit 1", {'topicId': topicId,
'userId': userId,
@ -74,7 +80,7 @@ def verifyPost(cur, topicId, userId, dateAdded):
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['post_id']
else:
return 0
@ -88,13 +94,15 @@ def verifyUser(cur, nameUser, forumId):
try:
cur.execute("lock table users IN ACCESS EXCLUSIVE MODE;")
cur.execute("select user_id from users where name_user = %(nameUser)s and forum_id = %(forumId)s limit 1",
{'nameUser': nameUser, 'forumId': forumId})
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['user_id']
else:
return 0
@ -113,7 +121,7 @@ def getLastForum(cur):
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['forum_id']
else:
return 0
@ -132,7 +140,7 @@ def getLastTopic(cur):
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['topic_id']
else:
return 0
@ -151,7 +159,7 @@ def getLastUser(cur):
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['user_id']
else:
return 0
@ -169,7 +177,7 @@ def getLastUserVersion(cur, userId):
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['version_user']
else:
return 0
@ -187,7 +195,7 @@ def getLastTopicVersion(cur, topicId):
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['version_topic']
else:
return 0
@ -205,7 +213,7 @@ def getLastPostVersion(cur, postId):
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['version_post']
else:
return 0
@ -223,7 +231,7 @@ def getLastPost(cur):
recset = cur.fetchall()
if recset:
return recset[0][0]
return recset[0]['post_id']
else:
return 0
@ -232,7 +240,6 @@ def getLastPost(cur):
trace = traceback.format_exc()
print (trace)
def create_forum(cur, row, url):
forumId = verifyForum(cur, row[0])
@ -284,8 +291,9 @@ def create_topic(cur, forumId, row, authorId):
recset = cur.fetchall()
if (str(recset[0][4]) != str(row[1]) or str(recset[0][5]) != str(row[4] if row[4] != '-1' else None) or # there was a change in the topic information
str(recset[0][6]) != str(row[5] if row[5] != '-1' else None)):
if (str(recset[0]['board_topic']) != str(row[1]) or
str(recset[0]['views_topic']) != str(row[4] if row[4] != '-1' else None) or # there was a change in the topic information
str(recset[0]['posts_topic']) != str(row[5] if row[5] != '-1' else None)):
topicVersionId = int(getLastTopicVersion(cur, topicId) + 1)
@ -294,14 +302,14 @@ def create_topic(cur, forumId, row, authorId):
"%s, %s, %s, %s, %s, %s, %s)"
recset = [topicId, topicVersionId, forumId, authorId,
recset[0][3],
recset[0][4],
recset[0][5],
recset[0][6],
recset[0][7],
recset[0][8],
recset[0][9],
recset[0][10]]
recset[0]['title_topic'],
recset[0]['board_topic'],
recset[0]['views_topic'],
recset[0]['posts_topic'],
recset[0]['href_topic'],
recset[0]['dateadded_topic'],
recset[0]['dateinserted_topic'],
recset[0]['classification_topic']]
cur.execute(sql, recset)
sql = "Update topics set board_topic = %(board_topic)s, views_topic = %(views_topic)s, posts_topic = %(posts_topic)s, " \
@ -371,11 +379,17 @@ def create_user(cur, row, forumId, index):
# decode_decrypt_image_in_base64(recset[0][7])
if (str(recset[0][3]) != str(row[11][index] if row[11][index] != '-1' else None) or str(recset[0][4]) != str(row[12][index] if row[12][index] != '-1' else None) or
str(recset[0][5]) != str(row[13][index] if row[13][index] != '-1' else None) or str(recset[0][6]) != str(row[14][index] if row[14][index] != '-1' else None) or
str(recset[0][7]) != str(row[9][index] if row[9][index] != '-1' else None)): # there was a change in the user information
if (str(recset[0]['status_user']) != str(row[11][index] if row[11][index] != '-1' else None) or
str(recset[0]['reputation_user']) != str(row[12][index] if row[12][index] != '-1' else None) or
str(recset[0]['interest_user']) != str(row[13][index] if row[13][index] != '-1' else None) or
str(recset[0]['signature_user']) != str(row[14][index] if row[14][index] != '-1' else None) or
str(recset[0]['image_user']) != str(row[9][index] if row[9][index] != '-1' else None)): # there was a change in the user information
if (str(recset[0][3]) != 'Nan' or str(recset[0][4]) != 'Nan' or str(recset[0][5]) != 'Nan' or str(recset[0][6]) != 'Nan' or str(recset[0][7]) != 'Nan'):
if (str(recset[0]['status_user']) != 'Nan' or
str(recset[0]['reputation_user']) != 'Nan' or
str(recset[0]['interest_user']) != 'Nan' or
str(recset[0]['signature_user']) != 'Nan' or
str(recset[0]['image_user']) != 'Nan'):
userVersionId = int(getLastUserVersion(cur, userId) + 1)
@ -383,13 +397,13 @@ def create_user(cur, row, forumId, index):
"signature_user, image_user, dateinserted_user) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
recset = [userId, userVersionId, forumId,
recset[0][2],
recset[0][3],
recset[0][4],
recset[0][5],
recset[0][6],
recset[0][7],
recset[0][8]]
recset[0]['name_user'],
recset[0]['status_user'],
recset[0]['reputation_user'],
recset[0]['interest_user'],
recset[0]['signature_user'],
recset[0]['image_user'],
recset[0]['dateinserted_user']]
cur.execute(sql, recset)
@ -441,8 +455,9 @@ def create_posts(cur, row, forumId, topicId):
recset = cur.fetchall()
if (str(recset[0][3]) != str(row[15][i]) or str(recset[0][4]) != str(row[16][i] if row[16][i] != '-1' else None) or
str(recset[0][5]) != str(row[18][i] if row[18][i] != '-1' else None)): # there was a change in the post information
if (str(recset[0]['content_post']) != str(row[15][i]) or
str(recset[0]['feedback_post']) != str(row[16][i] if row[16][i] != '-1' else None) or
str(recset[0]['image_post']) != str(row[18][i] if row[18][i] != '-1' else None)): # there was a change in the post information
postVersionId = int(getLastPostVersion(cur, postId) + 1)
@ -450,11 +465,11 @@ def create_posts(cur, row, forumId, topicId):
"image_post, dateadded_post, dateinserted_post) Values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
recset = [postId, postVersionId, topicId, userId,
recset[0][3],
recset[0][4],
recset[0][5],
recset[0][6],
recset[0][7]]
recset[0]['content_post'],
recset[0]['feedback_post'],
recset[0]['image_post'],
recset[0]['dateadded_post'],
recset[0]['dateinserted_post']]
cur.execute(sql, recset)
@ -481,7 +496,7 @@ def create_database(cur, con):
sql = "create table users (user_id integer NOT NULL, forum_id integer NOT NULL, name_user character varying(" \
"255) NOT NULL, status_user character varying(255) null, reputation_user character varying(255) null, " \
"interest_user character varying(5000) null, signature_user character varying(1000) null, " \
"image_user character varying(1000000) null, dateinserted_user timestamp(6) with time zone NOT NULL, " \
"image_user character varying(10000000) null, dateinserted_user timestamp(6) with time zone NOT NULL, " \
"constraint users_pk primary key (user_id), " \
"constraint users_forum_id_fkey foreign key (forum_id) references forums (forum_id))"
cur.execute(sql)
@ -492,7 +507,7 @@ def create_database(cur, con):
sql = "create table users_history(user_id integer NOT NULL, version_user integer not null, forum_id integer NOT NULL, " \
"name_user character varying(255) NOT NULL, status_user character varying(255) null, " \
"reputation_user character varying(255) null, interest_user character varying(5000) null, " \
"signature_user character varying(1000) null, image_user character varying(1000000) null, " \
"signature_user character varying(1000) null, image_user character varying(10000000) null, " \
"dateinserted_user timestamp(6) with time zone NOT NULL, " \
"constraint users_history_pk primary key (user_id, version_user), " \
"constraint users_history_user_id_fkey foreign key (user_id) references " \
@ -525,7 +540,7 @@ def create_database(cur, con):
sql = "create table posts(post_id integer NOT NULL, topic_id integer NOT NULL, " \
"user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
"image_post character varying(1000000) null, dateadded_post timestamp(6) with time zone NOT NULL, " \
"image_post character varying(10000000) null, dateadded_post timestamp(6) with time zone NOT NULL, " \
"dateinserted_post timestamp(6) with time zone NOT NULL, " \
"constraint posts_pk primary key (post_id), " \
"constraint posts_user_id_fkey foreign key (user_id) references users (user_id), constraint " \
@ -538,7 +553,7 @@ def create_database(cur, con):
sql = "create table posts_history(post_id integer NOT NULL, version_post integer not null, topic_id integer NOT NULL, " \
"user_id integer NOT NULL, content_post character varying(100000) NOT null, feedback_post integer null, " \
"image_post character varying(1000000) null, dateadded_post timestamp(6) with time zone NOT NULL, " \
"image_post character varying(10000000) null, dateadded_post timestamp(6) with time zone NOT NULL, " \
"dateinserted_post timestamp(6) with time zone NOT NULL, " \
"constraint posts_history_pk primary key (post_id, version_post), " \
"constraint posts_history_user_id_fkey foreign key (user_id) references users (user_id), " \


+ 6
- 1
Forums/Initialization/prepare_parser.py View File

@ -4,6 +4,8 @@ import codecs
import glob
import os, re
import shutil
from psycopg2.extras import RealDictCursor
from Forums.DB_Connection.db_connection import *
from Forums.BestCardingWorld.parser import *
from Forums.Cardingleaks.parser import *
@ -240,7 +242,7 @@ def new_parse(forum, url, createLog):
# Connecting to the database
con = connectDataBase()
cur = con.cursor()
cur = con.cursor(cursor_factory=RealDictCursor)
# Creating the tables (The database should be created manually)
create_database(cur, con)
@ -372,6 +374,9 @@ def new_parse(forum, url, createLog):
if createLog:
logFile.close()
cur.close()
con.close()
print("Parsing the " + forum + " forum and data classification done.")

Loading…
Cancel
Save