Browse Source

image tracking edits for forums

main
westernmeadow 1 year ago
parent
commit
0695609303
9 changed files with 56 additions and 24 deletions
  1. +13
    -16
      Forums/Altenens/crawler_selenium.py
  2. +0
    -2
      Forums/Altenens/parser.py
  3. +8
    -0
      Forums/BestCardingWorld/parser.py
  4. +8
    -0
      Forums/Cardingleaks/parser.py
  5. +2
    -0
      Forums/CryptBB/crawler_selenium.py
  6. +8
    -0
      Forums/CryptBB/parser.py
  7. +15
    -0
      Forums/HiddenAnswers/parser.py
  8. +1
    -1
      Forums/Initialization/prepare_parser.py
  9. +1
    -5
      Forums/Utilities/utilities.py

+ 13
- 16
Forums/Altenens/crawler_selenium.py View File

@ -173,18 +173,18 @@ def getNameFromURL(url):
def getInterestedLinks():
links = []
# # Hacking Tools
# Hacking Tools
links.append('https://altenens.is/forums/hacking-tools.469165/')
# # hash cracking
# links.append('https://altenens.is/forums/hash-cracking.469167/')
# # phishing and spamming
# links.append('https://altenens.is/forums/phishing-and-spamming.469223/')
# # pentesting
# links.append('https://altenens.is/forums/pentesting.469169/')
# # cracking tools
# links.append('https://altenens.is/forums/cracking-tools.469204/')
# # Cracking Tools
# links.append('https://altenens.is/forums/cracking-tutorials-other-methods.469205/')
# hash cracking
links.append('https://altenens.is/forums/hash-cracking.469167/')
# phishing and spamming
links.append('https://altenens.is/forums/phishing-and-spamming.469223/')
# pentesting
links.append('https://altenens.is/forums/pentesting.469169/')
# cracking tools
links.append('https://altenens.is/forums/cracking-tools.469204/')
# Cracking Tools
links.append('https://altenens.is/forums/cracking-tutorials-other-methods.469205/')
return links
@ -194,9 +194,7 @@ def crawlForum(driver):
linksToCrawl = getInterestedLinks()
i = 0
while i < len(linksToCrawl):
link = linksToCrawl[i]
for link in linksToCrawl:
print('Crawling :', link)
try:
has_next_page = True
@ -241,7 +239,7 @@ def crawlForum(driver):
driver.back()
# comment out
# break
break
# comment out
if count == 1:
@ -258,7 +256,6 @@ def crawlForum(driver):
except Exception as e:
print(link, e)
i += 1
print("Crawling the Altenens forum done.")


+ 0
- 2
Forums/Altenens/parser.py View File

@ -1,7 +1,5 @@
__author__ = 'DarkWeb'
from cytoolz.functoolz import partial
# Here, we are importing the auxiliary functions to clean or convert data
from Forums.Utilities.utilities import *
from datetime import date


+ 8
- 0
Forums/BestCardingWorld/parser.py View File

@ -25,6 +25,7 @@ def bestcardingworld_description_parser(soup):
sign = [] # 6 all user's signature in each post (usually a standard message after the content of the post)
post = [] # 7 all messages of each post
interest = [] # 8 all user's interest in each post
image = []
image_user = []
# Finding the topic (should be just one coming from the Listing Page)
@ -151,6 +152,13 @@ def bestcardingworld_description_parser(soup):
feedback.append("-1")
img = ipost.find('div', {"class": "content"}).find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]
else:
img = "-1"
image.append(img)
img = ipost.find('div', {"class": "avatar-container"}).find('img', {"class": "avatar"})
img = img.get('src').split('base64,')[-1]
image_user.append(img)


+ 8
- 0
Forums/Cardingleaks/parser.py View File

@ -25,6 +25,7 @@ def cardingleaks_description_parser(soup: Tag):
post = [] # 6 all messages of each post
feedback = [] # 7 all feedbacks of each vendor (this was found in just one Forum and with a number format)
addDate = [] # 8 all dates of each post
image = []
image_user = []
li = soup.find("h1", {"class": "p-title-value"})
@ -64,6 +65,13 @@ def cardingleaks_description_parser(soup: Tag):
datetime_obj = datetime.strptime(datetime_text, "%Y-%m-%dT%H:%M:%S%z")
addDate.append(datetime_obj)
img = ipost.find('div', {"class": "message-content js-messageContent"}).find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]
else:
img = "-1"
image.append(img)
img = ipost.find('div', {"class": "message-avatar"}).find('img')
img = img.get('src').split('base64,')[-1]
image_user.append(img)


+ 2
- 0
Forums/CryptBB/crawler_selenium.py View File

@ -219,6 +219,8 @@ def getInterestedLinks():
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=30')
# # Android Moded pak
# links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53')
# # Sell
# links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=44')
return links


+ 8
- 0
Forums/CryptBB/parser.py View File

@ -25,6 +25,7 @@ def cryptBB_description_parser(soup):
post = [] # 6 all messages of each post
feedback = [] # 7 all feedbacks of each vendor (this was found in just one Forum and with a number format)
addDate = [] # 8 all dates of each post
image = []
image_user = []
# Finding the topic (should be just one coming from the Listing Page)
@ -156,6 +157,13 @@ def cryptBB_description_parser(soup):
feedback.append("-1")
img = ipost.find('div', {"class": "post_body scaleimages"}).find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]
else:
img = "-1"
image.append(img)
img = ipost.find('div', {"class": "author_avatar"}).find('img')
img = img.get('src').split('base64,')[-1]
image_user.append(img)


+ 15
- 0
Forums/HiddenAnswers/parser.py View File

@ -22,6 +22,7 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup):
sign: List[str] = [] # all user's signature in each post (usually a standard message after the content of the post)
post: List[str] = [] # all messages of each post
interest: List[str] = [] # all user's interest in each post
image = []
image_user = []
# Finding the topic (should be just one coming from the Listing Page)
@ -54,6 +55,13 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup):
sign.append("-1")
interest.append("-1")
img = question.find('div', {"class": "qa-q-view-content qa-post-content"}).find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]
else:
img = "-1"
image.append(img)
img = question.find('span', {"class": "qa-q-view-avatar-meta"}).find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]
@ -90,6 +98,13 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup):
sign.append("-1")
interest.append("-1")
img = replies.find("div", {"class": "qa-a-item-content qa-post-content"}).find("div",{"itemprop":"text"}).find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]
else:
img = "-1"
image.append(img)
img = replies.find('span', {"class": "qa-a-item-avatar-meta"}).find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]


+ 1
- 1
Forums/Initialization/prepare_parser.py View File

@ -361,7 +361,7 @@ def new_parse(forum, url, createLog):
if createLog:
logFile.write(
str(nError) + f". There was a problem to locate the file(s) for {listingFile}"
f" in the Description section!\n")
f" in the Description section!\n\n")
if not (readDescriptionError or parseDescriptionError or persistDescriptionError
or moveDescriptionError or findDescriptionError):


+ 1
- 5
Forums/Utilities/utilities.py View File

@ -199,12 +199,10 @@ def organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate,
current_time = datetime.now()
day = current_time.strftime("%m/%d/%Y")
ahora = current_time.strftime("%I:%M:%S")
for n in range(nm):
current_time += timedelta(seconds=2)
ahora = current_time.strftime("%I:%M:%S")
lne = forum # 0
lne += ","
lne += board # 1
@ -224,8 +222,6 @@ def organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate,
lne += day + " " + ahora # 8
lne += ","
lne += "-1" if len(image_author) == 0 else str(image_author[n]) # 9 image_user
lne += ","
lne += "-1" # 10 name_user
lne += ","


Loading…
Cancel
Save