Merge remote-tracking branch 'origin/main'

# Conflicts: # Forums/Cardingleaks/parser.py # Forums/HiddenAnswers/parser.py
1 year ago · 07e320a1d3
--- a/.idea/DW_Pipeline_Test.iml
+++ b/.idea/DW_Pipeline_Test.iml
@ -30,6 +30,7 @@
        <option value="$MODULE_DIR$/MarketPlaces/DarkBazar" />
        <option value="$MODULE_DIR$/MarketPlaces/AnonMarket" />
        <option value="$MODULE_DIR$/MarketPlaces/Tor2door" />
        <option value="$MODULE_DIR$/MarketPlaces/MetaVerseMarket" />
      </list>
    </option>
  </component>
--- a/Forums/AbyssForum/crawler_selenium.py
+++ b/Forums/AbyssForum/crawler_selenium.py
@ -225,7 +225,7 @@ def crawlForum(driver):
                            has_next_topic_page = False
                    # end of loop
                    for i in range(counter):
                    for j in range(counter):
                        driver.back()
                    # comment out
--- a/Forums/Altenens/crawler_selenium.py
+++ b/Forums/Altenens/crawler_selenium.py
@ -173,28 +173,33 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []
    # Hacking Tools
    links.append('https://altenens.is/forums/hacking-tools.469165/')
    # hash cracking
    links.append('https://altenens.is/forums/hash-cracking.469167/')
    # phishing and spamming
    links.append('https://altenens.is/forums/phishing-and-spamming.469223/')
    # pentesting
    links.append('https://altenens.is/forums/pentesting.469169/')
    # cracking tools
    # Hacking
    links.append('https://altenens.is/forums/hacking.469162/')
    # Hacking showoff
    links.append('https://altenens.is/forums/hacking-showoff.469232/')
    # Remote administration
    links.append('https://altenens.is/forums/remote-administration.469161/')
    # Cracking tools
    links.append('https://altenens.is/forums/cracking-tools.469204/')
    # Cracking Tools
    # Cracking tutorials
    links.append('https://altenens.is/forums/cracking-tutorials-other-methods.469205/')
    # Combo lists and configs
    links.append('https://altenens.is/forums/combolists-and-configs.469206/')
    # Programming
    links.append('https://altenens.is/forums/programming.469239/')
    return links
 # newest version of crawling
 def crawlForum(driver):
    print("Crawling the Altenens forum")
    linksToCrawl = getInterestedLinks()
    for link in linksToCrawl:
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            has_next_page = True
@ -235,7 +240,7 @@ def crawlForum(driver):
                        except NoSuchElementException:
                            has_next_topic_page = False
                    for i in range(counter):
                    for j in range(counter):
                        driver.back()
                    # comment out
@ -243,7 +248,7 @@ def crawlForum(driver):
                # comment out
                if count == 1:
                   break
                    break
                try:
                    link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
@ -256,6 +261,7 @@ def crawlForum(driver):
        except Exception as e:
            print(link, e)
        i += 1
    print("Crawling the Altenens forum done.")
--- a/Forums/BestCardingWorld/crawler_selenium.py
+++ b/Forums/BestCardingWorld/crawler_selenium.py
@ -235,7 +235,7 @@ def crawlForum(driver):
                            has_next_topic_page = False
                    # end of loop
                    for i in range(counter):
                    for j in range(counter):
                        driver.back()
                    # comment out
--- a/Forums/Cardingleaks/crawler_selenium.py
+++ b/Forums/Cardingleaks/crawler_selenium.py
@ -181,18 +181,18 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []
    # # carding methods
    # carding methods
    links.append('https://leaks.ws/forums/carding-methods.82/')
    # # carding schools
    # links.append('https://leaks.ws/forums/help-desk-carding-school.35/')
    # # carding discussion
    # links.append('https://leaks.ws/forums/carding-discussion-desk.58/')
    # # carding tutorials
    #  links.append('https://leaks.ws/forums/carding-tutorials.13/')
    # # carding tools and software
    # links.append('https://leaks.ws/forums/carding-tools-softwares.10/')
    # # exploits and cracking tools
    # links.append('https://leaks.ws/forums/exploits-cracking-tools.22/')
    # carding schools
    links.append('https://leaks.ws/forums/help-desk-carding-school.35/')
    # carding discussion
    links.append('https://leaks.ws/forums/carding-discussion-desk.58/')
    # carding tutorials
    links.append('https://leaks.ws/forums/carding-tutorials.13/')
    # carding tools and software
    links.append('https://leaks.ws/forums/carding-tools-softwares.10/')
    # exploits and cracking tools
    links.append('https://leaks.ws/forums/exploits-cracking-tools.22/')
    return links
@ -245,11 +245,11 @@ def crawlForum(driver):
                        except NoSuchElementException:
                            has_next_topic_page = False
                    for i in range(counter):
                    for j in range(counter):
                        driver.back()
                    # comment out
                    # break
                    break
                # comment out
                if count == 1:
--- a/Forums/Cardingleaks/parser.py
+++ b/Forums/Cardingleaks/parser.py
@ -102,14 +102,15 @@ def cardingleaks_listing_parser(soup: Tag):
    href = []                # 6 this variable should receive all cleaned urls (we will use this to do the marge between
                             # Listing and Description pages)
    addDate = []             # 7 when the topic was created (difficult to find)
    image_author = []        # 8 all author avatars used in each topic
    image_user = []          # 8 all user avatars used in each topic
    # Finding the board (should be just one)
    li = soup.find("h1", {"class": "p-title-value"})
    board = cleanString(li.text.strip())
    thread_list: ResultSet[Tag] = soup.find("div", {"class": "structItemContainer-group js-threadList"}).find_all("div", {"data-author": True})
    thread_list = soup.find('div', {"class": "structItemContainer-group structItemContainer-group--sticky"}).find_all('div', {"data-author": True}) + \
                  soup.find("div", {"class": "structItemContainer-group js-threadList"}).find_all("div", {"data-author": True})
    nm = len(thread_list)
@ -120,10 +121,13 @@ def cardingleaks_listing_parser(soup: Tag):
        thread_topic = thread.find("div", {"class": "structItem-title"}).text
        topic.append(cleanString(thread_topic.strip()))
        author_icon = thread.find("a", {"class": "avatar avatar--s"}).find("img")
        author_icon = author_icon.get('src')
        author_icon = author_icon.split('base64,')[-1]
        image_author.append(author_icon)
        author_icon = thread.find("a", {"class": "avatar avatar--s"})
        if author_icon is not None:
            author_icon = author_icon.find('img').get('src')
            author_icon = author_icon.split('base64,')[-1]
            image_user.append(author_icon)
        else:
            image_user.append('-1')
        thread_view = thread.find("dl", {"class": "pairs pairs--justified structItem-minor"}).find("dd").text
        # Context text view count (i.e., 8.8K) to numerical (i.e., 8800)
@ -141,7 +145,7 @@ def cardingleaks_listing_parser(soup: Tag):
        datetime_obj = datetime.strptime(thread_date, "%Y-%m-%dT%H:%M:%S%z")
        addDate.append(datetime_obj)
    return organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate, image_author)
    return organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate, image_user)
 def cardingleaks_links_parser(soup):
--- a/Forums/Classifier/classify_test.py
+++ b/Forums/Classifier/classify_test.py
--- a/Forums/CryptBB/crawler_selenium.py
+++ b/Forums/CryptBB/crawler_selenium.py
@ -199,28 +199,24 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []
    # # Beginner Programming
    # Beginner Programming
    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86')
    # # Beginner Carding and Fraud
    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=91')
    # # Beginner Hacking
    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=87')
    # # Newbie
    #  links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=84')
    # # Beginner Hardware
    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=89')
    # # Training Challenges
    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=96')
    # Beginner Carding and Fraud
    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=91')
    # Beginner Hacking
    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=87')
    # Newbie
    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=84')
    # Beginner Hardware
    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=89')
    # Training Challenges
    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=96')
    # Darknet Discussions
    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=88')
    # # Public Leaks and Warez
    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=97')
    # # Hacked Accounts and Database Dumps
    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=30')
    # # Android Moded pak
    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53')
    # # Sell
    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=44')
    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=88')
    # Public Leaks and Warez
    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=97')
    # Sell
    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=44')
    return links
@ -260,9 +256,9 @@ def crawlForum(driver):
                            driver.refresh()
                        savePage(driver, driver.page_source, topic + f"page{counter}")  # very important
                        # comment out
                        if counter == 2:
                            break
                        # # comment out
                        # if counter == 2:
                        #     break
                        try:
                            temp = driver.find_element(By.XPATH, '/html/body/div/div[2]/div/div[2]/div')
@ -275,15 +271,15 @@ def crawlForum(driver):
                        except NoSuchElementException:
                            has_next_topic_page = False
                    for i in range(counter):
                    for j in range(counter):
                        driver.back()
                    # comment out
                    # break
                # comment out
                if count == 1:
                    break
                #     # comment out
                #     break
                #
                # # comment out
                # if count == 1:
                #     break
                try:
                    temp = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/div[2]/div')
--- a/Forums/CryptBB/parser.py
+++ b/Forums/CryptBB/parser.py
@ -40,7 +40,6 @@ def cryptBB_description_parser(soup):
    # Finding the repeated tag that corresponds to the listing of posts
    # try:
    posts = soup.find('table', {"class": "tborder tfixed clear"}).find('td', {"id": "posts_container"}).find_all(
        'div', {"class": "post"})
@ -48,6 +47,9 @@ def cryptBB_description_parser(soup):
    for ipost in posts:
        if ipost.find('div', {"class": "deleted_post_author"}):
            continue
        # Finding a first level of the HTML page
        post_wrapper = ipost.find('span', {"class": "largetext"})
@ -61,56 +63,49 @@ def cryptBB_description_parser(soup):
        smalltext = ipost.find('div', {"class": "post_author"})
        '''
        # Testing here two possibilities to find this status and combine them
        if ipost.find('div', {"class": "deleted_post_author"}):
            status.append(-1)
            interest.append(-1)
            reputation.append(-1)
            addDate.append(-1)
            post.append("THIS POST HAS BEEN REMOVED!")
            sign.append(-1)
            feedback.append(-1)
            continue
        '''
        # CryptBB does have membergroup and postgroup
        if smalltext is not None:
        membergroup = smalltext.find('div', {"class": "profile-rank"})
        postgroup = smalltext.find('div', {"class": "postgroup"})
        if membergroup != None:
            membergroup = membergroup.text.strip()
            if postgroup != None:
                postgroup = postgroup.text.strip()
                membergroup = membergroup + " - " + postgroup
        else:
            if postgroup != None:
                membergroup = postgroup.text.strip()
            # CryptBB does have membergroup and postgroup
            membergroup = smalltext.find('div', {"class": "profile-rank"})
            postgroup = smalltext.find('div', {"class": "postgroup"})
            if membergroup != None:
                membergroup = membergroup.text.strip()
                if postgroup != None:
                    postgroup = postgroup.text.strip()
                    membergroup = membergroup + " - " + postgroup
            else:
                membergroup = "-1"
        status.append(cleanString(membergroup))
        # Finding the interest of the author
        # CryptBB does not have blurb
        blurb = smalltext.find('li', {"class": "blurb"})
        if blurb != None:
            blurb = blurb.text.strip()
        else:
            blurb = "-1"
        interest.append(cleanString(blurb))
        # Finding the reputation of the user
        # CryptBB does have reputation
        author_stats = smalltext.find('div', {"class": "author_statistics"})
        karma = author_stats.find('strong')
        if karma != None:
            karma = karma.text
            karma = karma.replace("Community Rating: ", "")
            karma = karma.replace("Karma: ", "")
            karma = karma.strip()
                if postgroup != None:
                    membergroup = postgroup.text.strip()
                else:
                    membergroup = "-1"
            status.append(cleanString(membergroup))
            # Finding the interest of the author
            # CryptBB does not have blurb
            blurb = smalltext.find('li', {"class": "blurb"})
            if blurb != None:
                blurb = blurb.text.strip()
            else:
                blurb = "-1"
            interest.append(cleanString(blurb))
            # Finding the reputation of the user
            # CryptBB does have reputation
            author_stats = smalltext.find('div', {"class": "author_statistics"})
            karma = author_stats.find('strong')
            if karma != None:
                karma = karma.text
                karma = karma.replace("Community Rating: ", "")
                karma = karma.replace("Karma: ", "")
                karma = karma.strip()
            else:
                karma = "-1"
            reputation.append(cleanString(karma))
        else:
            karma = "-1"
        reputation.append(cleanString(karma))
            status.append('-1')
            interest.append('-1')
            reputation.append('-1')
        # Getting here another good tag to find the post date, post content and users' signature
@ -120,25 +115,30 @@ def cryptBB_description_parser(soup):
        # dt = dt.strip().split()
        dt = dt.strip()
        day=date.today()
        if "Yesterday" in dt:
        if "Today" in dt:
            today = day.strftime('%m-%d-%Y')
            stime = dt.replace('Today,','').strip()
            date_time_obj = today + ', '+stime
            date_time_obj = datetime.strptime(date_time_obj,'%m-%d-%Y, %I:%M %p')
        elif "Yesterday" in dt:
            yesterday = day - timedelta(days=1)
            yesterday = yesterday.strftime('%m-%d-%Y')
            stime = dt.replace('Yesterday,','').strip()
            date_time_obj = yesterday+ ', '+stime
            date_time_obj = yesterday + ', '+stime
            date_time_obj = datetime.strptime(date_time_obj,'%m-%d-%Y, %I:%M %p')
        elif "hour ago" in dt or "hours ago" in dt:
            day = day.strftime('%m-%d-%Y')
        elif "ago" in dt:
            date_time_obj = postarea.find('span', {"class": "post_date"}).find('span')['title']
            date_time_obj = datetime.strptime(date_time_obj, '%m-%d-%Y, %I:%M %p')
        else:
            date_time_obj = datetime.strptime(dt, '%m-%d-%Y, %I:%M %p')
            stime = date_time_obj.strftime('%b %d, %Y')
            sdate = date_time_obj.strftime('%I:%M %p')
        addDate.append(date_time_obj)
        # Finding the post
        inner = postarea.find('div', {"class": "post_body scaleimages"})
        quote = inner.find('blockquote')
        if quote is not None:
            quote.decompose()
        inner = inner.text.strip()
        post.append(cleanString(inner))
@ -210,6 +210,10 @@ def cryptBB_listing_parser(soup):
    itopics = soup.find_all('tr',{"class": "inline_row"})
    # Counting how many topics
    nm = len(itopics)
    for itopic in itopics:
        # For each topic found, the structure to get the rest of the information can be of two types. Testing all of them
@ -225,10 +229,6 @@ def cryptBB_listing_parser(soup):
        image_author.append(-1)
        # Counting how many topics we have found so far
        nm = len(topic)
        # Adding the url to the list of urls
        try:
            link = itopic.find('span', {"class": "subject_old"}).find('a').get('href')
@ -237,19 +237,24 @@ def cryptBB_listing_parser(soup):
        href.append(link)
        # Finding the author of the topic
        ps = itopic.find('div', {"class":"author smalltext"}).find('a').text
        ps = itopic.find('div', {"class":"author smalltext"}).text
        user = ps.strip()
        author.append(cleanString(user))
        # Finding the number of replies
        columns = itopic.findChildren('td',recursive=False)
        replies = columns[3].text
        posts.append(cleanString(replies))
        if replies == '-':
            posts.append('-1')
        else:
            posts.append(cleanString(replies))
        # Finding the number of Views
        tview = columns[4].text
        views.append(cleanString(tview))
        if tview == '-':
            views.append('-1')
        else:
            views.append(cleanString(tview))
        # If no information about when the topic was added, just assign "-1" to the variable
--- a/Forums/HiddenAnswers/crawler_selenium.py
+++ b/Forums/HiddenAnswers/crawler_selenium.py
@ -157,16 +157,20 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []
    # Hacks
    # links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/hacking')
    # links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/darknet-and-tor')
    # links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/internet')
    # hacking
    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/hacking')
    # darknet and tor
    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/darknet-and-tor')
    # internet
    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/internet')
    # links
    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/links')
    # programming
    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/programming')
    # knowledge and information
    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/knowledge-and-information')
    # other
    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/other')
    return links
@ -206,12 +210,12 @@ def crawlForum(driver: webdriver.Firefox):
                            driver.refresh()
                        savePage(driver, driver.page_source, topic + f"page{counter}")  # very important
                        # comment out
                        if counter == 2:
                            break
                        # # comment out
                        # if counter == 2:
                        #     break
                        try:
                            page = ""  # no next page so far may have some later on
                            page = driver.find_element(by=By.CLASS_NAME, value='qa-page-next').get_attribute('href')
                            if page == "":
                                raise NoSuchElementException
                            counter += 1
@ -219,15 +223,15 @@ def crawlForum(driver: webdriver.Firefox):
                        except NoSuchElementException:
                            has_next_topic_page = False
                    for i in range(counter):
                    for j in range(counter):
                        driver.back()
                    # comment out
                    # break
                # comment out
                if count == 1:
                    break
                #     # comment out
                #     break
                #
                # # comment out
                # if count == 1:
                #     break
                try:
                    link = driver.find_element(by=By.CLASS_NAME, value='qa-page-next').get_attribute('href')
@ -248,14 +252,14 @@ def crawlForum(driver: webdriver.Firefox):
 # Returns 'True' if the link is Topic link
 def isDescriptionLink(url):
    if 'index.php' in url and 'questions' not in url:
    if 'http' not in url:
        return True
    return False
 # Returns True if the link is a listingPage link
 def isListingLink(url):
    if 'questions' in url:
    if 'http' in url:
        return True
    return False
--- a/Forums/HiddenAnswers/parser.py
+++ b/Forums/HiddenAnswers/parser.py
@ -12,7 +12,7 @@ from bs4 import BeautifulSoup, ResultSet, Tag
 # This is the method to parse the Description Pages (one page to each topic in the Listing Pages)
 def hiddenAnswers_description_parser(soup: BeautifulSoup):
 def HiddenAnswers_description_parser(soup: BeautifulSoup):
    # Fields to be parsed
@ -42,14 +42,22 @@ def hiddenAnswers_description_parser(soup: BeautifulSoup):
    datetime_obj = datetime.strptime(datetime_string, "%Y-%m-%dT%H:%M:%S")
    addDate.append(datetime_obj)
    question_user_status = question.find("span", {"class": "qa-q-view-who-title"}).text
    status.append(cleanString(question_user_status.strip()))
    question_user_karma = question.find("span", {"class": "qa-q-view-who-points-data"}).text
    # Convert karma to pure numerical string
    if question_user_karma.find("k") > -1:
        question_user_karma = str(float(question_user_karma.replace("k", "")) * 1000)
    reputation.append(cleanString(question_user_karma.strip()))
    question_user_status = question.find("span", {"class": "qa-q-view-who-title"})
    if question_user_status is not None:
        question_user_status = question_user_status.text
        status.append(cleanString(question_user_status.strip()))
    else:
        status.append('-1')
    question_user_karma = question.find("span", {"class": "qa-q-view-who-points-data"})
    if question_user_karma is not None:
        question_user_karma = question_user_karma.text
        # Convert karma to pure numerical string
        if question_user_karma.find("k") > -1:
            question_user_karma = str(float(question_user_karma.replace("k", "")) * 1000)
        reputation.append(cleanString(question_user_karma.strip()))
    else:
        reputation.append('-1')
    question_content = question.find("div", {"class": "qa-q-view-content qa-post-content"}).text
    post.append(cleanString(question_content.strip()))
@ -88,14 +96,22 @@ def hiddenAnswers_description_parser(soup: BeautifulSoup):
        post_data = replies.find("div", {"class": "qa-a-item-content qa-post-content"}).find("div",{"itemprop":"text"}).text
        post.append(cleanString(post_data.strip()))
        user_reputations = replies.find("span", {"class", "qa-a-item-who-title"}).text
        status.append(cleanString(user_reputations.strip()))
        user_reputations = replies.find("span", {"class", "qa-a-item-who-title"})
        if user_reputations is not None:
            user_reputations = user_reputations.text
            status.append(cleanString(user_reputations.strip()))
        else:
            status.append('-1')
        karma = replies.find("span", {"class": "qa-a-item-who-points-data"}).text
        # Convert karma to pure numerical string
        if karma.find("k") > -1:
            karma = str(float(karma.replace("k", "")) * 1000)
        reputation.append(cleanString(karma.strip()))
        karma = replies.find("span", {"class": "qa-a-item-who-points-data"})
        if karma is not None:
            karma = karma.text
            # Convert karma to pure numerical string
            if karma.find("k") > -1:
                karma = str(float(karma.replace("k", "")) * 1000)
            reputation.append(cleanString(karma.strip()))
        else:
            reputation.append('-1')
        feedback.append("-1")
        sign.append("-1")
@ -123,7 +139,7 @@ def hiddenAnswers_description_parser(soup: BeautifulSoup):
    return row
 def hiddenAnswers_listing_parser(soup: BeautifulSoup):
 def HiddenAnswers_listing_parser(soup: BeautifulSoup):
    nm: int = 0                    # this variable should receive the number of topics
    forum: str = "HiddenAnswers"   # 0 *forum name
@ -136,11 +152,12 @@ def hiddenAnswers_listing_parser(soup: BeautifulSoup):
    href: List[str] = []           # 6 this variable should receive all cleaned urls (we will use this to do the merge between
                                   # Listing and Description pages)
    addDate: List[str] = []        # 7 when the topic was created (difficult to find)
    image_author = []              # 8 all author avatars used in each topic
    image_user = []                # 8 all user avatars used in each topic
    # Finding the board
    literature = soup.find("div", {"class": "qa-main-heading"}).find("h1")
    board = literature.text
    board = soup.find("div", {"class": "qa-main-heading"}).find("h1").text
    board = board.replace('Recent questions in', '')
    board = cleanString(board.strip())
    queries_by_user: ResultSet[Tag] = soup.find("div", {"class": "qa-q-list"}).find_all("div", {"class": "qa-q-list-item"})
@ -148,9 +165,9 @@ def hiddenAnswers_listing_parser(soup: BeautifulSoup):
        topic_of_query = queries.find("div", {"class": "qa-q-item-title"}).find("a").text
        topic.append(cleanString(topic_of_query.strip()))
        image_author.append("-1")
        image_user.append("-1") # qa-q-item-where
        author = queries.find("span", {"class": "qa-q-item-who-data"}).find("a").text
        author = queries.find("span", {"class": "qa-q-item-who-data"}).text
        user.append(cleanString(author.strip()))
        num_answers = queries.find("span", {"class": "qa-a-count-data"}).text
@ -175,7 +192,7 @@ def hiddenAnswers_listing_parser(soup: BeautifulSoup):
    nm = len(topic)
    return organizeTopics(forum, nm, board, user, topic, view, post, href, addDate, image_author)
    return organizeTopics(forum, nm, board, user, topic, view, post, href, addDate, image_user)
 #need to change this method
 def hiddenanswers_links_parser(soup):
--- a/Forums/Initialization/forums_mining.py
+++ b/Forums/Initialization/forums_mining.py
@ -102,7 +102,7 @@ def opentor():
 # main method
 if __name__ == '__main__':
    # opentor()
    opentor()
    # assignment from forumsList.txt
    forumsList = getForums()
--- a/Forums/Initialization/prepare_parser.py
+++ b/Forums/Initialization/prepare_parser.py
@ -212,7 +212,7 @@ def persist_record(url, rec, cur, con, createLog, logFile, listingFile, descript
 def move_file(filePath, createLog, logFile):
    source = filePath
    destination = filePath.replace(os.path.basename(filePath), "") + r'Read/'
    destination = filePath.replace(os.path.basename(filePath), "") + r'Read/' + os.path.basename(filePath)
    try:
        shutil.move(source, destination, shutil.copy2)
@ -238,6 +238,9 @@ def new_parse(forum, url, createLog):
    from Forums.Initialization.forums_mining import config, CURRENT_DATE
    global nError
    nError = 0
    print("Parsing the " + forum + " forum and conduct data classification to store the information in the database.")
    # Connecting to the database
--- a/Forums/Libre/crawler_selenium.py
+++ b/Forums/Libre/crawler_selenium.py
@ -239,7 +239,7 @@ def crawlForum(driver):
                        except NoSuchElementException:
                            has_next_topic_page = False
                    for i in range(counter):
                    for j in range(counter):
                        driver.back()
                    # comment out
--- a/Forums/OnniForums/crawler_selenium.py
+++ b/Forums/OnniForums/crawler_selenium.py
@ -250,7 +250,7 @@ def crawlForum(driver):
                        except NoSuchElementException:
                            has_next_topic_page = False
                    for i in range(counter):
                    for j in range(counter):
                        driver.back()
                    # comment out
--- a/Forums/Procrax/crawler_selenium.py
+++ b/Forums/Procrax/crawler_selenium.py
@ -237,7 +237,7 @@ def crawlForum(driver):
                        except NoSuchElementException:
                            has_next_topic_page = False
                    for i in range(counter):
                    for j in range(counter):
                        driver.back()
                    # comment out
--- a/MarketPlaces/Classifier/classify_test.py
+++ b/MarketPlaces/Classifier/classify_test.py
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@ -295,6 +295,9 @@ def new_parse(marketPlace, url, createLog):
    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
    global nError
    nError = 0
    print("Parsing the " + marketPlace + " market and conduct data classification to store the information in the database.")
    # Connecting to the database