|
|
@ -7,6 +7,7 @@ import shutil |
|
|
|
from Forums.DB_Connection.db_connection import * |
|
|
|
from Forums.BestCardingWorld.parser import * |
|
|
|
from Forums.CryptBB.parser import * |
|
|
|
from Forums.OnniForums.parser import * |
|
|
|
|
|
|
|
from Forums.Classifier.classify_product import predict |
|
|
|
# from DarkWebMining_Sample.Forums.Classifier.classify_product import predict_semi |
|
|
@ -23,8 +24,8 @@ def isRussianForum(forum): |
|
|
|
|
|
|
|
iforum = iforum.replace('\n','') |
|
|
|
if iforum == forum: |
|
|
|
result = True |
|
|
|
break |
|
|
|
result = True |
|
|
|
break |
|
|
|
|
|
|
|
return result |
|
|
|
|
|
|
@ -148,6 +149,9 @@ def new_parse(forum, url, createLog): |
|
|
|
rmm = bestcardingworld_description_parser(soup) |
|
|
|
elif forum == "CryptBB": |
|
|
|
rmm = cryptBB_description_parser(soup) |
|
|
|
elif forum == "OnniForums": |
|
|
|
rmm = onniForums_description_parser(soup) |
|
|
|
|
|
|
|
|
|
|
|
# key = u"Top:" + rmm[0].upper().strip() + u" User:" + rmm[2][0].upper().strip() |
|
|
|
key = u"Url:" + os.path.basename(line2).replace(".html", "") |
|
|
@ -225,6 +229,8 @@ def new_parse(forum, url, createLog): |
|
|
|
rw = bestcardingworld_listing_parser(soup) |
|
|
|
elif forum == "CryptBB": |
|
|
|
rw = cryptBB_listing_parser(soup) |
|
|
|
elif forum == "OnniForums": |
|
|
|
rw = onniForums_listing_parser(soup) |
|
|
|
|
|
|
|
except: |
|
|
|
|
|
|
@ -246,9 +252,11 @@ def new_parse(forum, url, createLog): |
|
|
|
for rec in rw: |
|
|
|
|
|
|
|
rec = rec.split(',') |
|
|
|
# print(rec) |
|
|
|
|
|
|
|
# key = u"Top:" + rec[1].upper().strip() + u" User:" + rec[5].upper().strip() |
|
|
|
key = u"Url:" + cleanLink(rec[6]) |
|
|
|
print(key) |
|
|
|
|
|
|
|
if key in detPage: |
|
|
|
|
|
|
|