Image tracking.

1 year ago · cfa4a1c501
--- a/Forums/Altenens/parser.py
+++ b/Forums/Altenens/parser.py
@ -100,7 +100,7 @@ def altenens_listing_parser(soup):
    href = []           # 6 this variable should receive all cleaned urls (we will use this to do the marge between
                        # Listing and Description pages)
    addDate = []        # 7 when the topic was created (difficult to find)
    image_user = []     # 9 all user avatars used in each topic
    image_user = []     # 8 all user avatars used in each topic

    board = soup.find('h1', {"class": "p-title-value"}).text
    board = cleanString(board.strip())
--- a/Forums/Cardingleaks/parser.py
+++ b/Forums/Cardingleaks/parser.py
@ -25,8 +25,8 @@ def cardingleaks_description_parser(soup: Tag):
    post = []               # 6 all messages of each post
    feedback = []           # 7 all feedbacks of each vendor (this was found in just one Forum and with a number format)
    addDate = []            # 8 all dates of each post
    image = []
    image_user = []
    image_user = []         # 9 all user avatars of each post
    image_post = []         # 10 all first images of each post

    li = soup.find("h1", {"class": "p-title-value"})
    topic = cleanString(li.text.strip())
@ -70,15 +70,18 @@ def cardingleaks_description_parser(soup: Tag):
            img = img.get('src').split('base64,')[-1]
        else:
            img = "-1"
        image.append(img)
        image_post.append(img)

        img = ipost.find('div', {"class": "message-avatar"}).find('img')
        img = img.get('src').split('base64,')[-1]
        if img is not None:
            img = img.get('src').split('base64,')[-1]
        else:
            img = "-1"
        image_user.append(img)
        
    # Populate the final variable (this should be a list with all fields scraped)

    row = (topic, user, status, reputation, interest, sign, post, feedback, addDate)
    row = (topic, user, status, reputation, interest, sign, post, feedback, addDate, image_user, image_post)

    # Sending the results

@ -88,17 +91,18 @@ def cardingleaks_description_parser(soup: Tag):

 def cardingleaks_listing_parser(soup: Tag):

    nm = 0              # *this variable should receive the number of topics
    nm = 0                   # *this variable should receive the number of topics
    forum = "Cardingleaks"   # 0 *forum name
    board = "-1"        # 1 *board name (the previous level of the topic in the Forum categorization tree.
                        # For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
    author = []         # 2 *all authors of each topic
    topic = []          # 3 *all topics
    views = []          # 4 number of views of each topic
    posts = []          # 5 number of posts of each topic
    href = []           # 6 this variable should receive all cleaned urls (we will use this to do the marge between
                        # Listing and Description pages)
    addDate = []        # 7 when the topic was created (difficult to find)
    board = "-1"             # 1 *board name (the previous level of the topic in the Forum categorization tree.
                             # For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
    author = []              # 2 *all authors of each topic
    topic = []               # 3 *all topics
    views = []               # 4 number of views of each topic
    posts = []               # 5 number of posts of each topic
    href = []                # 6 this variable should receive all cleaned urls (we will use this to do the marge between
                             # Listing and Description pages)
    addDate = []             # 7 when the topic was created (difficult to find)
    image_user = []          # 8 all user avatars used in each topic

    # Finding the board (should be just one)

@ -115,6 +119,11 @@ def cardingleaks_listing_parser(soup: Tag):
        
        thread_topic = thread.find("div", {"class": "structItem-title"}).text
        topic.append(cleanString(thread_topic.strip()))

        author_icon = thread.find("a", {"class": "avatar avatar--s"}).find("img")
        author_icon = author_icon.get('src')
        author_icon = author_icon.split('base64,')[-1]
        image_user.append(author_icon)
        
        thread_view = thread.find("dl", {"class": "pairs pairs--justified structItem-minor"}).find("dd").text
        # Context text view count (i.e., 8.8K) to numerical (i.e., 8800)
@ -132,7 +141,7 @@ def cardingleaks_listing_parser(soup: Tag):
        datetime_obj = datetime.strptime(thread_date, "%Y-%m-%dT%H:%M:%S%z")
        addDate.append(datetime_obj)

    return organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate)
    return organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate, image_user)


 def cardingleaks_links_parser(soup):
--- a/Forums/CryptBB/parser.py
+++ b/Forums/CryptBB/parser.py
@ -25,8 +25,8 @@ def cryptBB_description_parser(soup):
    post = []               # 6 all messages of each post
    feedback = []           # 7 all feedbacks of each vendor (this was found in just one Forum and with a number format)
    addDate = []            # 8 all dates of each post
    image = []
    image_user = []
    image_user = []         # 9 all user avatars of each post
    image_post = []         # 10 all first images of each post

    # Finding the topic (should be just one coming from the Listing Page)

@ -162,15 +162,18 @@ def cryptBB_description_parser(soup):
            img = img.get('src').split('base64,')[-1]
        else:
            img = "-1"
        image.append(img)
        image_post.append(img)

        img = ipost.find('div', {"class": "author_avatar"}).find('img')
        img = img.get('src').split('base64,')[-1]
        if img is not None:
           img = img.get('src').split('base64,')[-1]
        else:
            img = "-1"
        image_user.append(img)

    # Populate the final variable (this should be a list with all fields scraped)

    row = (topic, user, status, reputation, interest, sign, post, feedback, addDate)
    row = (topic, user, status, reputation, interest, sign, post, feedback, addDate, image_user, image_post)

    # Sending the results

@ -191,6 +194,8 @@ def cryptBB_listing_parser(soup):
    href = []           # 6 this variable should receive all cleaned urls (we will use this to do the marge between
                        # Listing and Description pages)
    addDate = []        # 7 when the topic was created (difficult to find)
    image_user = []     # 8 all user avatars used in each topic


    # Finding the board (should be just one)

@ -214,6 +219,8 @@ def cryptBB_listing_parser(soup):
        topics = re.sub("\[\w*\]", '', topics)
        topic.append(cleanString(topics))

        image_user.append(-1)

        # Counting how many topics we have found so far

        nm = len(topic)
@ -244,7 +251,7 @@ def cryptBB_listing_parser(soup):

        addDate.append("-1")

    return organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate)
    return organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate, image_user)


 def cryptBB_links_parser(soup):
--- a/Forums/HiddenAnswers/parser.py
+++ b/Forums/HiddenAnswers/parser.py
@ -13,17 +13,20 @@ from bs4 import BeautifulSoup, ResultSet, Tag
 # This is the method to parse the Description Pages (one page to each topic in the Listing Pages)

 def HiddenAnswers_description_parser(soup: BeautifulSoup):
    topic: str = "-1"  # topic name
    user: List[str] = []  # all users of each post
    addDate: List[datetime] = []  # all dated of each post
    feedback: List[str] = []  # all feedbacks of each vendor (this was found in just one Forum and with a number format)
    status: List[str] = []  # all user's authority in each post such as (adm, member, dangerous)
    reputation: List[str] = []  # all user's karma in each post (usually found as a number)
    sign: List[str] = []  # all user's signature in each post (usually a standard message after the content of the post)
    post: List[str] = []  # all messages of each post
    interest: List[str] = []  # all user's interest in each post
    image = []
    image_user = []

    # Fields to be parsed

    topic: str = "-1"              # 0 topic name
    user: List[str] = []           # 1 all users of each post
    addDate: List[datetime] = []   # 2 all dated of each post
    feedback: List[str] = []       # 3 all feedbacks of each vendor (this was found in just one Forum and with a number format)
    status: List[str] = []         # 4 all user's authority in each post such as (adm, member, dangerous)
    reputation: List[str] = []     # 5 all user's karma in each post (usually found as a number)
    sign: List[str] = []           # 6 all user's signature in each post (usually a standard message after the content of the post)
    post: List[str] = []           # 7 all messages of each post
    interest: List[str] = []       # 8 all user's interest in each post
    image_user = []                # 9 all user avatars of each post
    image_post = []                # 10 all first images of each post

    # Finding the topic (should be just one coming from the Listing Page)
    li = soup.find("h1").find("span", {"itemprop": "name"})
@ -60,7 +63,7 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup):
        img = img.get('src').split('base64,')[-1]
    else:
        img = "-1"
    image.append(img)
    image_post.append(img)

    img = question.find('span', {"class": "qa-q-view-avatar-meta"}).find('img')
    if img is not None:
@ -103,7 +106,7 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup):
            img = img.get('src').split('base64,')[-1]
        else:
            img = "-1"
        image.append(img)
        image_post.append(img)

        img = replies.find('span', {"class": "qa-a-item-avatar-meta"}).find('img')
        if img is not None:
@ -114,24 +117,27 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup):

    # Populate the final variable (this should be a list with all fields scraped)

    row = (topic, user, status, reputation, interest, sign, post, feedback, addDate)
    row = (topic, user, status, reputation, interest, sign, post, feedback, addDate, image_user, image_post)

    # Sending the results
    return row


 def HiddenAnswers_listing_parser(soup: BeautifulSoup):
    board = "-1"  # board name (the previous level of the topic in the Forum categorization tree.
    # For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
    forum: str = "HiddenAnswers"
    nm: int = 0  # this variable should receive the number of topics
    topic: List[str] = []  # all topics
    user: List[str] = []  # all users of each topic
    post: List[int] = []  # number of posts of each topic
    view: List[int] = []  # number of views of each topic
    addDate: List[str] = []  # when the topic was created (difficult to find)
    href: List[str] = []  # this variable should receive all cleaned urls (we will use this to do the merge between
    # Listing and Description pages)

    nm: int = 0                    # this variable should receive the number of topics
    forum: str = "HiddenAnswers"   # 0 *forum name
    board = "-1"                   # 1 board name (the previous level of the topic in the Forum categorization tree.
                                   # For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
    user: List[str] = []           # 2 all users of each topic
    topic: List[str] = []          # 3 all topics
    view: List[int] = []           # 4 number of views of each topic
    post: List[int] = []           # 5 number of posts of each topic
    href: List[str] = []           # 6 this variable should receive all cleaned urls (we will use this to do the merge between
                                   # Listing and Description pages)
    addDate: List[str] = []        # 7 when the topic was created (difficult to find)
    image_user = []                # 8 all user avatars used in each topic

    # Finding the board
    literature = soup.find("div", {"class": "qa-main-heading"}).find("h1")
    board = literature.text
@ -141,6 +147,8 @@ def HiddenAnswers_listing_parser(soup: BeautifulSoup):
    for queries in queries_by_user:
        topic_of_query = queries.find("div", {"class": "qa-q-item-title"}).find("a").text
        topic.append(cleanString(topic_of_query.strip()))

        image_user.append("-1")
        
        author = queries.find("span", {"class": "qa-q-item-who-data"}).find("a").text
        user.append(cleanString(author.strip()))
@ -167,7 +175,7 @@ def HiddenAnswers_listing_parser(soup: BeautifulSoup):
        
    nm = len(topic)

    return organizeTopics(forum, nm, board, user, topic, view, post, href, addDate)
    return organizeTopics(forum, nm, board, user, topic, view, post, href, addDate, image_user)

 #need to change this method
 def hiddenanswers_links_parser(soup):