From 0695609303d8a47e59b2e18b6bd739fffc19c743 Mon Sep 17 00:00:00 2001 From: westernmeadow Date: Mon, 11 Sep 2023 14:49:23 -0700 Subject: [PATCH] image tracking edits for forums --- Forums/Altenens/crawler_selenium.py | 29 +++++++++++-------------- Forums/Altenens/parser.py | 2 -- Forums/BestCardingWorld/parser.py | 8 +++++++ Forums/Cardingleaks/parser.py | 8 +++++++ Forums/CryptBB/crawler_selenium.py | 2 ++ Forums/CryptBB/parser.py | 8 +++++++ Forums/HiddenAnswers/parser.py | 15 +++++++++++++ Forums/Initialization/prepare_parser.py | 2 +- Forums/Utilities/utilities.py | 6 +---- 9 files changed, 56 insertions(+), 24 deletions(-) diff --git a/Forums/Altenens/crawler_selenium.py b/Forums/Altenens/crawler_selenium.py index 8f1994f..ec149ba 100644 --- a/Forums/Altenens/crawler_selenium.py +++ b/Forums/Altenens/crawler_selenium.py @@ -173,18 +173,18 @@ def getNameFromURL(url): def getInterestedLinks(): links = [] - # # Hacking Tools + # Hacking Tools links.append('https://altenens.is/forums/hacking-tools.469165/') - # # hash cracking - # links.append('https://altenens.is/forums/hash-cracking.469167/') - # # phishing and spamming - # links.append('https://altenens.is/forums/phishing-and-spamming.469223/') - # # pentesting - # links.append('https://altenens.is/forums/pentesting.469169/') - # # cracking tools - # links.append('https://altenens.is/forums/cracking-tools.469204/') - # # Cracking Tools - # links.append('https://altenens.is/forums/cracking-tutorials-other-methods.469205/') + # hash cracking + links.append('https://altenens.is/forums/hash-cracking.469167/') + # phishing and spamming + links.append('https://altenens.is/forums/phishing-and-spamming.469223/') + # pentesting + links.append('https://altenens.is/forums/pentesting.469169/') + # cracking tools + links.append('https://altenens.is/forums/cracking-tools.469204/') + # Cracking Tools + links.append('https://altenens.is/forums/cracking-tutorials-other-methods.469205/') return links @@ -194,9 +194,7 @@ def crawlForum(driver): linksToCrawl = getInterestedLinks() - i = 0 - while i < len(linksToCrawl): - link = linksToCrawl[i] + for link in linksToCrawl: print('Crawling :', link) try: has_next_page = True @@ -241,7 +239,7 @@ def crawlForum(driver): driver.back() # comment out - # break + break # comment out if count == 1: @@ -258,7 +256,6 @@ def crawlForum(driver): except Exception as e: print(link, e) - i += 1 print("Crawling the Altenens forum done.") diff --git a/Forums/Altenens/parser.py b/Forums/Altenens/parser.py index 2493c96..1d274ac 100644 --- a/Forums/Altenens/parser.py +++ b/Forums/Altenens/parser.py @@ -1,7 +1,5 @@ __author__ = 'DarkWeb' -from cytoolz.functoolz import partial - # Here, we are importing the auxiliary functions to clean or convert data from Forums.Utilities.utilities import * from datetime import date diff --git a/Forums/BestCardingWorld/parser.py b/Forums/BestCardingWorld/parser.py index 5a294c6..7ad385b 100644 --- a/Forums/BestCardingWorld/parser.py +++ b/Forums/BestCardingWorld/parser.py @@ -25,6 +25,7 @@ def bestcardingworld_description_parser(soup): sign = [] # 6 all user's signature in each post (usually a standard message after the content of the post) post = [] # 7 all messages of each post interest = [] # 8 all user's interest in each post + image = [] image_user = [] # Finding the topic (should be just one coming from the Listing Page) @@ -151,6 +152,13 @@ def bestcardingworld_description_parser(soup): feedback.append("-1") + img = ipost.find('div', {"class": "content"}).find('img') + if img is not None: + img = img.get('src').split('base64,')[-1] + else: + img = "-1" + image.append(img) + img = ipost.find('div', {"class": "avatar-container"}).find('img', {"class": "avatar"}) img = img.get('src').split('base64,')[-1] image_user.append(img) diff --git a/Forums/Cardingleaks/parser.py b/Forums/Cardingleaks/parser.py index 7ab139d..4d0b345 100644 --- a/Forums/Cardingleaks/parser.py +++ b/Forums/Cardingleaks/parser.py @@ -25,6 +25,7 @@ def cardingleaks_description_parser(soup: Tag): post = [] # 6 all messages of each post feedback = [] # 7 all feedbacks of each vendor (this was found in just one Forum and with a number format) addDate = [] # 8 all dates of each post + image = [] image_user = [] li = soup.find("h1", {"class": "p-title-value"}) @@ -64,6 +65,13 @@ def cardingleaks_description_parser(soup: Tag): datetime_obj = datetime.strptime(datetime_text, "%Y-%m-%dT%H:%M:%S%z") addDate.append(datetime_obj) + img = ipost.find('div', {"class": "message-content js-messageContent"}).find('img') + if img is not None: + img = img.get('src').split('base64,')[-1] + else: + img = "-1" + image.append(img) + img = ipost.find('div', {"class": "message-avatar"}).find('img') img = img.get('src').split('base64,')[-1] image_user.append(img) diff --git a/Forums/CryptBB/crawler_selenium.py b/Forums/CryptBB/crawler_selenium.py index 637078c..bcef5a8 100644 --- a/Forums/CryptBB/crawler_selenium.py +++ b/Forums/CryptBB/crawler_selenium.py @@ -219,6 +219,8 @@ def getInterestedLinks(): # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=30') # # Android Moded pak # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53') + # # Sell + # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=44') return links diff --git a/Forums/CryptBB/parser.py b/Forums/CryptBB/parser.py index bfe4403..c137336 100644 --- a/Forums/CryptBB/parser.py +++ b/Forums/CryptBB/parser.py @@ -25,6 +25,7 @@ def cryptBB_description_parser(soup): post = [] # 6 all messages of each post feedback = [] # 7 all feedbacks of each vendor (this was found in just one Forum and with a number format) addDate = [] # 8 all dates of each post + image = [] image_user = [] # Finding the topic (should be just one coming from the Listing Page) @@ -156,6 +157,13 @@ def cryptBB_description_parser(soup): feedback.append("-1") + img = ipost.find('div', {"class": "post_body scaleimages"}).find('img') + if img is not None: + img = img.get('src').split('base64,')[-1] + else: + img = "-1" + image.append(img) + img = ipost.find('div', {"class": "author_avatar"}).find('img') img = img.get('src').split('base64,')[-1] image_user.append(img) diff --git a/Forums/HiddenAnswers/parser.py b/Forums/HiddenAnswers/parser.py index e42ace8..bcf8e33 100644 --- a/Forums/HiddenAnswers/parser.py +++ b/Forums/HiddenAnswers/parser.py @@ -22,6 +22,7 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup): sign: List[str] = [] # all user's signature in each post (usually a standard message after the content of the post) post: List[str] = [] # all messages of each post interest: List[str] = [] # all user's interest in each post + image = [] image_user = [] # Finding the topic (should be just one coming from the Listing Page) @@ -54,6 +55,13 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup): sign.append("-1") interest.append("-1") + img = question.find('div', {"class": "qa-q-view-content qa-post-content"}).find('img') + if img is not None: + img = img.get('src').split('base64,')[-1] + else: + img = "-1" + image.append(img) + img = question.find('span', {"class": "qa-q-view-avatar-meta"}).find('img') if img is not None: img = img.get('src').split('base64,')[-1] @@ -90,6 +98,13 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup): sign.append("-1") interest.append("-1") + img = replies.find("div", {"class": "qa-a-item-content qa-post-content"}).find("div",{"itemprop":"text"}).find('img') + if img is not None: + img = img.get('src').split('base64,')[-1] + else: + img = "-1" + image.append(img) + img = replies.find('span', {"class": "qa-a-item-avatar-meta"}).find('img') if img is not None: img = img.get('src').split('base64,')[-1] diff --git a/Forums/Initialization/prepare_parser.py b/Forums/Initialization/prepare_parser.py index d4abdce..5251aad 100644 --- a/Forums/Initialization/prepare_parser.py +++ b/Forums/Initialization/prepare_parser.py @@ -361,7 +361,7 @@ def new_parse(forum, url, createLog): if createLog: logFile.write( str(nError) + f". There was a problem to locate the file(s) for {listingFile}" - f" in the Description section!\n") + f" in the Description section!\n\n") if not (readDescriptionError or parseDescriptionError or persistDescriptionError or moveDescriptionError or findDescriptionError): diff --git a/Forums/Utilities/utilities.py b/Forums/Utilities/utilities.py index 14b2a1e..741ec1f 100644 --- a/Forums/Utilities/utilities.py +++ b/Forums/Utilities/utilities.py @@ -199,12 +199,10 @@ def organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate, current_time = datetime.now() day = current_time.strftime("%m/%d/%Y") + ahora = current_time.strftime("%I:%M:%S") for n in range(nm): - current_time += timedelta(seconds=2) - ahora = current_time.strftime("%I:%M:%S") - lne = forum # 0 lne += "," lne += board # 1 @@ -224,8 +222,6 @@ def organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate, lne += day + " " + ahora # 8 lne += "," lne += "-1" if len(image_author) == 0 else str(image_author[n]) # 9 image_user - - lne += "," lne += "-1" # 10 name_user lne += ","