From 2dc18e23f4ccc7b1aae62c9e78b0b309379f0d22 Mon Sep 17 00:00:00 2001
From: westernmeadow <wkwan626@gmail.com>
Date: Sun, 17 Sep 2023 19:14:42 -0700
Subject: [PATCH] finished fully running completed CryptBB and HiddenAnswers

---
 .idea/DW_Pipeline_Test.iml                    |   1 +
 Forums/AbyssForum/crawler_selenium.py         |   2 +-
 Forums/Altenens/crawler_selenium.py           |  32 +++--
 Forums/BestCardingWorld/crawler_selenium.py   |   2 +-
 Forums/Cardingleaks/crawler_selenium.py       |  26 ++--
 Forums/Cardingleaks/parser.py                 |  14 +-
 .../{test_classify.py => classify_test.py}    |   0
 Forums/CryptBB/crawler_selenium.py            |  56 ++++----
 Forums/CryptBB/parser.py                      | 129 +++++++++---------
 Forums/HiddenAnswers/crawler_selenium.py      |  48 ++++---
 Forums/HiddenAnswers/parser.py                |  55 +++++---
 Forums/Initialization/forums_mining.py        |   2 +-
 Forums/Initialization/prepare_parser.py       |   5 +-
 Forums/Libre/crawler_selenium.py              |   2 +-
 Forums/OnniForums/crawler_selenium.py         |   2 +-
 Forums/Procrax/crawler_selenium.py            |   2 +-
 .../{test_classify.py => classify_test.py}    |   0
 MarketPlaces/Initialization/prepare_parser.py |   3 +
 18 files changed, 210 insertions(+), 171 deletions(-)
 rename Forums/Classifier/{test_classify.py => classify_test.py} (100%)
 rename MarketPlaces/Classifier/{test_classify.py => classify_test.py} (100%)

diff --git a/.idea/DW_Pipeline_Test.iml b/.idea/DW_Pipeline_Test.iml
index cd99e29..5a5ac36 100644
--- a/.idea/DW_Pipeline_Test.iml
+++ b/.idea/DW_Pipeline_Test.iml
@@ -30,6 +30,7 @@
         <option value="$MODULE_DIR$/MarketPlaces/DarkBazar" />
         <option value="$MODULE_DIR$/MarketPlaces/AnonMarket" />
         <option value="$MODULE_DIR$/MarketPlaces/Tor2door" />
+        <option value="$MODULE_DIR$/MarketPlaces/MetaVerseMarket" />
       </list>
     </option>
   </component>
diff --git a/Forums/AbyssForum/crawler_selenium.py b/Forums/AbyssForum/crawler_selenium.py
index f33b521..071abb0 100644
--- a/Forums/AbyssForum/crawler_selenium.py
+++ b/Forums/AbyssForum/crawler_selenium.py
@@ -225,7 +225,7 @@ def crawlForum(driver):
                             has_next_topic_page = False
 
                     # end of loop
-                    for i in range(counter):
+                    for j in range(counter):
                         driver.back()
 
                     # comment out
diff --git a/Forums/Altenens/crawler_selenium.py b/Forums/Altenens/crawler_selenium.py
index ec149ba..d13847a 100644
--- a/Forums/Altenens/crawler_selenium.py
+++ b/Forums/Altenens/crawler_selenium.py
@@ -173,28 +173,33 @@ def getNameFromURL(url):
 def getInterestedLinks():
     links = []
 
-    # Hacking Tools
-    links.append('https://altenens.is/forums/hacking-tools.469165/')
-    # hash cracking
-    links.append('https://altenens.is/forums/hash-cracking.469167/')
-    # phishing and spamming
-    links.append('https://altenens.is/forums/phishing-and-spamming.469223/')
-    # pentesting
-    links.append('https://altenens.is/forums/pentesting.469169/')
-    # cracking tools
+    # Hacking
+    links.append('https://altenens.is/forums/hacking.469162/')
+    # Hacking showoff
+    links.append('https://altenens.is/forums/hacking-showoff.469232/')
+    # Remote administration
+    links.append('https://altenens.is/forums/remote-administration.469161/')
+    # Cracking tools
     links.append('https://altenens.is/forums/cracking-tools.469204/')
-    # Cracking Tools
+    # Cracking tutorials
     links.append('https://altenens.is/forums/cracking-tutorials-other-methods.469205/')
+    # Combo lists and configs
+    links.append('https://altenens.is/forums/combolists-and-configs.469206/')
+    # Programming
+    links.append('https://altenens.is/forums/programming.469239/')
 
     return links
 
+
 # newest version of crawling
 def crawlForum(driver):
     print("Crawling the Altenens forum")
 
     linksToCrawl = getInterestedLinks()
 
-    for link in linksToCrawl:
+    i = 0
+    while i < len(linksToCrawl):
+        link = linksToCrawl[i]
         print('Crawling :', link)
         try:
             has_next_page = True
@@ -235,7 +240,7 @@ def crawlForum(driver):
                         except NoSuchElementException:
                             has_next_topic_page = False
 
-                    for i in range(counter):
+                    for j in range(counter):
                         driver.back()
 
                     # comment out
@@ -243,7 +248,7 @@ def crawlForum(driver):
 
                 # comment out
                 if count == 1:
-                   break
+                    break
 
                 try:
                     link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
@@ -256,6 +261,7 @@ def crawlForum(driver):
 
         except Exception as e:
             print(link, e)
+        i += 1
 
     print("Crawling the Altenens forum done.")
 
diff --git a/Forums/BestCardingWorld/crawler_selenium.py b/Forums/BestCardingWorld/crawler_selenium.py
index 487863b..6c3bdc9 100644
--- a/Forums/BestCardingWorld/crawler_selenium.py
+++ b/Forums/BestCardingWorld/crawler_selenium.py
@@ -235,7 +235,7 @@ def crawlForum(driver):
                             has_next_topic_page = False
 
                     # end of loop
-                    for i in range(counter):
+                    for j in range(counter):
                         driver.back()
 
                     # comment out
diff --git a/Forums/Cardingleaks/crawler_selenium.py b/Forums/Cardingleaks/crawler_selenium.py
index caf4a9a..1e89751 100644
--- a/Forums/Cardingleaks/crawler_selenium.py
+++ b/Forums/Cardingleaks/crawler_selenium.py
@@ -181,18 +181,18 @@ def getNameFromURL(url):
 def getInterestedLinks():
     links = []
 
-    # # carding methods
+    # carding methods
     links.append('https://leaks.ws/forums/carding-methods.82/')
-    # # carding schools
-    # links.append('https://leaks.ws/forums/help-desk-carding-school.35/')
-    # # carding discussion
-    # links.append('https://leaks.ws/forums/carding-discussion-desk.58/')
-    # # carding tutorials
-    #  links.append('https://leaks.ws/forums/carding-tutorials.13/')
-    # # carding tools and software
-    # links.append('https://leaks.ws/forums/carding-tools-softwares.10/')
-    # # exploits and cracking tools
-    # links.append('https://leaks.ws/forums/exploits-cracking-tools.22/')
+    # carding schools
+    links.append('https://leaks.ws/forums/help-desk-carding-school.35/')
+    # carding discussion
+    links.append('https://leaks.ws/forums/carding-discussion-desk.58/')
+    # carding tutorials
+    links.append('https://leaks.ws/forums/carding-tutorials.13/')
+    # carding tools and software
+    links.append('https://leaks.ws/forums/carding-tools-softwares.10/')
+    # exploits and cracking tools
+    links.append('https://leaks.ws/forums/exploits-cracking-tools.22/')
 
     return links
 
@@ -245,11 +245,11 @@ def crawlForum(driver):
                         except NoSuchElementException:
                             has_next_topic_page = False
 
-                    for i in range(counter):
+                    for j in range(counter):
                         driver.back()
 
                     # comment out
-                    # break
+                    break
 
                 # comment out
                 if count == 1:
diff --git a/Forums/Cardingleaks/parser.py b/Forums/Cardingleaks/parser.py
index 022fbe1..f913243 100644
--- a/Forums/Cardingleaks/parser.py
+++ b/Forums/Cardingleaks/parser.py
@@ -109,7 +109,8 @@ def cardingleaks_listing_parser(soup: Tag):
     li = soup.find("h1", {"class": "p-title-value"})
     board = cleanString(li.text.strip())
 
-    thread_list: ResultSet[Tag] = soup.find("div", {"class": "structItemContainer-group js-threadList"}).find_all("div", {"data-author": True})
+    thread_list = soup.find('div', {"class": "structItemContainer-group structItemContainer-group--sticky"}).find_all('div', {"data-author": True}) + \
+                  soup.find("div", {"class": "structItemContainer-group js-threadList"}).find_all("div", {"data-author": True})
     
     nm = len(thread_list)
     
@@ -120,10 +121,13 @@ def cardingleaks_listing_parser(soup: Tag):
         thread_topic = thread.find("div", {"class": "structItem-title"}).text
         topic.append(cleanString(thread_topic.strip()))
 
-        author_icon = thread.find("a", {"class": "avatar avatar--s"}).find("img")
-        author_icon = author_icon.get('src')
-        author_icon = author_icon.split('base64,')[-1]
-        image_user.append(author_icon)
+        author_icon = thread.find("a", {"class": "avatar avatar--s"})
+        if author_icon is not None:
+            author_icon = author_icon.find('img').get('src')
+            author_icon = author_icon.split('base64,')[-1]
+            image_user.append(author_icon)
+        else:
+            image_user.append('-1')
         
         thread_view = thread.find("dl", {"class": "pairs pairs--justified structItem-minor"}).find("dd").text
         # Context text view count (i.e., 8.8K) to numerical (i.e., 8800)
diff --git a/Forums/Classifier/test_classify.py b/Forums/Classifier/classify_test.py
similarity index 100%
rename from Forums/Classifier/test_classify.py
rename to Forums/Classifier/classify_test.py
diff --git a/Forums/CryptBB/crawler_selenium.py b/Forums/CryptBB/crawler_selenium.py
index bcef5a8..40255ce 100644
--- a/Forums/CryptBB/crawler_selenium.py
+++ b/Forums/CryptBB/crawler_selenium.py
@@ -199,28 +199,24 @@ def getNameFromURL(url):
 def getInterestedLinks():
     links = []
 
-    # # Beginner Programming
+    # Beginner Programming
     links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86')
-    # # Beginner Carding and Fraud
-    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=91')
-    # # Beginner Hacking
-    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=87')
-    # # Newbie
-    #  links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=84')
-    # # Beginner Hardware
-    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=89')
-    # # Training Challenges
-    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=96')
+    # Beginner Carding and Fraud
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=91')
+    # Beginner Hacking
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=87')
+    # Newbie
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=84')
+    # Beginner Hardware
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=89')
+    # Training Challenges
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=96')
     # Darknet Discussions
-    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=88')
-    # # Public Leaks and Warez
-    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=97')
-    # # Hacked Accounts and Database Dumps
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=30')
-    # # Android Moded pak
-    # links.append('http://bestteermb42clir6ux7xm76d4jjodh3fpahjqgbddbmfrgp4skg2wqd.onion/viewforum.php?f=53')
-    # # Sell
-    # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=44')
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=88')
+    # Public Leaks and Warez
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=97')
+    # Sell
+    links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=44')
 
     return links
 
@@ -260,9 +256,9 @@ def crawlForum(driver):
                             driver.refresh()
                         savePage(driver, driver.page_source, topic + f"page{counter}")  # very important
 
-                        # comment out
-                        if counter == 2:
-                            break
+                        # # comment out
+                        # if counter == 2:
+                        #     break
 
                         try:
                             temp = driver.find_element(By.XPATH, '/html/body/div/div[2]/div/div[2]/div')
@@ -275,15 +271,15 @@ def crawlForum(driver):
                         except NoSuchElementException:
                             has_next_topic_page = False
 
-                    for i in range(counter):
+                    for j in range(counter):
                         driver.back()
 
-                    # comment out
-                    # break
-
-                # comment out
-                if count == 1:
-                    break
+                #     # comment out
+                #     break
+                #
+                # # comment out
+                # if count == 1:
+                #     break
 
                 try:
                     temp = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/div[2]/div')
diff --git a/Forums/CryptBB/parser.py b/Forums/CryptBB/parser.py
index d725a98..60c513b 100644
--- a/Forums/CryptBB/parser.py
+++ b/Forums/CryptBB/parser.py
@@ -40,7 +40,6 @@ def cryptBB_description_parser(soup):
 
     # Finding the repeated tag that corresponds to the listing of posts
 
-    # try:
     posts = soup.find('table', {"class": "tborder tfixed clear"}).find('td', {"id": "posts_container"}).find_all(
         'div', {"class": "post"})
 
@@ -48,6 +47,9 @@ def cryptBB_description_parser(soup):
 
     for ipost in posts:
 
+        if ipost.find('div', {"class": "deleted_post_author"}):
+            continue
+
         # Finding a first level of the HTML page
 
         post_wrapper = ipost.find('span', {"class": "largetext"})
@@ -61,56 +63,49 @@ def cryptBB_description_parser(soup):
 
         smalltext = ipost.find('div', {"class": "post_author"})
 
-        '''
-        # Testing here two possibilities to find this status and combine them
-        if ipost.find('div', {"class": "deleted_post_author"}):
-            status.append(-1)
-            interest.append(-1)
-            reputation.append(-1)
-            addDate.append(-1)
-            post.append("THIS POST HAS BEEN REMOVED!")
-            sign.append(-1)
-            feedback.append(-1)
-            continue
-        '''
-
-        # CryptBB does have membergroup and postgroup
+        if smalltext is not None:
 
-        membergroup = smalltext.find('div', {"class": "profile-rank"})
-        postgroup = smalltext.find('div', {"class": "postgroup"})
-        if membergroup != None:
-            membergroup = membergroup.text.strip()
-            if postgroup != None:
-                postgroup = postgroup.text.strip()
-                membergroup = membergroup + " - " + postgroup
-        else:
-            if postgroup != None:
-                membergroup = postgroup.text.strip()
+            # CryptBB does have membergroup and postgroup
+            membergroup = smalltext.find('div', {"class": "profile-rank"})
+            postgroup = smalltext.find('div', {"class": "postgroup"})
+            if membergroup != None:
+                membergroup = membergroup.text.strip()
+                if postgroup != None:
+                    postgroup = postgroup.text.strip()
+                    membergroup = membergroup + " - " + postgroup
             else:
-                membergroup = "-1"
-        status.append(cleanString(membergroup))
-
-        # Finding the interest of the author
-        # CryptBB does not have blurb
-        blurb = smalltext.find('li', {"class": "blurb"})
-        if blurb != None:
-            blurb = blurb.text.strip()
-        else:
-            blurb = "-1"
-        interest.append(cleanString(blurb))
-
-        # Finding the reputation of the user
-        # CryptBB does have reputation
-        author_stats = smalltext.find('div', {"class": "author_statistics"})
-        karma = author_stats.find('strong')
-        if karma != None:
-            karma = karma.text
-            karma = karma.replace("Community Rating: ", "")
-            karma = karma.replace("Karma: ", "")
-            karma = karma.strip()
+                if postgroup != None:
+                    membergroup = postgroup.text.strip()
+                else:
+                    membergroup = "-1"
+            status.append(cleanString(membergroup))
+
+            # Finding the interest of the author
+            # CryptBB does not have blurb
+            blurb = smalltext.find('li', {"class": "blurb"})
+            if blurb != None:
+                blurb = blurb.text.strip()
+            else:
+                blurb = "-1"
+            interest.append(cleanString(blurb))
+
+            # Finding the reputation of the user
+            # CryptBB does have reputation
+            author_stats = smalltext.find('div', {"class": "author_statistics"})
+            karma = author_stats.find('strong')
+            if karma != None:
+                karma = karma.text
+                karma = karma.replace("Community Rating: ", "")
+                karma = karma.replace("Karma: ", "")
+                karma = karma.strip()
+            else:
+                karma = "-1"
+            reputation.append(cleanString(karma))
+
         else:
-            karma = "-1"
-        reputation.append(cleanString(karma))
+            status.append('-1')
+            interest.append('-1')
+            reputation.append('-1')
 
         # Getting here another good tag to find the post date, post content and users' signature
 
@@ -120,25 +115,30 @@ def cryptBB_description_parser(soup):
         # dt = dt.strip().split()
         dt = dt.strip()
         day=date.today()
-        if "Yesterday" in dt:
+        if "Today" in dt:
+            today = day.strftime('%m-%d-%Y')
+            stime = dt.replace('Today,','').strip()
+            date_time_obj = today + ', '+stime
+            date_time_obj = datetime.strptime(date_time_obj,'%m-%d-%Y, %I:%M %p')
+        elif "Yesterday" in dt:
             yesterday = day - timedelta(days=1)
             yesterday = yesterday.strftime('%m-%d-%Y')
             stime = dt.replace('Yesterday,','').strip()
-            date_time_obj = yesterday+ ', '+stime
+            date_time_obj = yesterday + ', '+stime
             date_time_obj = datetime.strptime(date_time_obj,'%m-%d-%Y, %I:%M %p')
-        elif "hour ago" in dt or "hours ago" in dt:
-            day = day.strftime('%m-%d-%Y')
+        elif "ago" in dt:
             date_time_obj = postarea.find('span', {"class": "post_date"}).find('span')['title']
             date_time_obj = datetime.strptime(date_time_obj, '%m-%d-%Y, %I:%M %p')
         else:
             date_time_obj = datetime.strptime(dt, '%m-%d-%Y, %I:%M %p')
-            stime = date_time_obj.strftime('%b %d, %Y')
-            sdate = date_time_obj.strftime('%I:%M %p')
         addDate.append(date_time_obj)
 
         # Finding the post
 
         inner = postarea.find('div', {"class": "post_body scaleimages"})
+        quote = inner.find('blockquote')
+        if quote is not None:
+            quote.decompose()
         inner = inner.text.strip()
         post.append(cleanString(inner))
 
@@ -210,6 +210,10 @@ def cryptBB_listing_parser(soup):
 
     itopics = soup.find_all('tr',{"class": "inline_row"})
 
+    # Counting how many topics
+
+    nm = len(itopics)
+
     for itopic in itopics:
 
         # For each topic found, the structure to get the rest of the information can be of two types. Testing all of them
@@ -225,10 +229,6 @@ def cryptBB_listing_parser(soup):
 
         image_user.append(-1)
 
-        # Counting how many topics we have found so far
-
-        nm = len(topic)
-
         # Adding the url to the list of urls
         try:
             link = itopic.find('span', {"class": "subject_old"}).find('a').get('href')
@@ -237,19 +237,24 @@ def cryptBB_listing_parser(soup):
         href.append(link)
 
         # Finding the author of the topic
-        ps = itopic.find('div', {"class":"author smalltext"}).find('a').text
+        ps = itopic.find('div', {"class":"author smalltext"}).text
         user = ps.strip()
         author.append(cleanString(user))
 
         # Finding the number of replies
         columns = itopic.findChildren('td',recursive=False)
         replies = columns[3].text
-
-        posts.append(cleanString(replies))
+        if replies == '-':
+            posts.append('-1')
+        else:
+            posts.append(cleanString(replies))
 
         # Finding the number of Views
         tview = columns[4].text
-        views.append(cleanString(tview))
+        if tview == '-':
+            views.append('-1')
+        else:
+            views.append(cleanString(tview))
 
         # If no information about when the topic was added, just assign "-1" to the variable
 
diff --git a/Forums/HiddenAnswers/crawler_selenium.py b/Forums/HiddenAnswers/crawler_selenium.py
index a7f37ea..f369347 100644
--- a/Forums/HiddenAnswers/crawler_selenium.py
+++ b/Forums/HiddenAnswers/crawler_selenium.py
@@ -157,16 +157,20 @@ def getNameFromURL(url):
 def getInterestedLinks():
     links = []
 
-    # Hacks
-    # links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/hacking')
-    
-    # links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/darknet-and-tor')
-    
-    # links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/internet')
-    
+    # hacking
+    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/hacking')
+    # darknet and tor
+    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/darknet-and-tor')
+    # internet
+    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/internet')
+    # links
     links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/questions/links')
-    
-    
+    # programming
+    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/programming')
+    # knowledge and information
+    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/knowledge-and-information')
+    # other
+    links.append('http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/index.php/other')
 
     return links
 
@@ -206,12 +210,12 @@ def crawlForum(driver: webdriver.Firefox):
                             driver.refresh()
                         savePage(driver, driver.page_source, topic + f"page{counter}")  # very important
 
-                        # comment out
-                        if counter == 2:
-                            break
+                        # # comment out
+                        # if counter == 2:
+                        #     break
 
                         try:
-                            page = ""  # no next page so far may have some later on
+                            page = driver.find_element(by=By.CLASS_NAME, value='qa-page-next').get_attribute('href')
                             if page == "":
                                 raise NoSuchElementException
                             counter += 1
@@ -219,15 +223,15 @@ def crawlForum(driver: webdriver.Firefox):
                         except NoSuchElementException:
                             has_next_topic_page = False
 
-                    for i in range(counter):
+                    for j in range(counter):
                         driver.back()
 
-                    # comment out
-                    # break
-
-                # comment out
-                if count == 1:
-                    break
+                #     # comment out
+                #     break
+                #
+                # # comment out
+                # if count == 1:
+                #     break
 
                 try:
                     link = driver.find_element(by=By.CLASS_NAME, value='qa-page-next').get_attribute('href')
@@ -248,14 +252,14 @@ def crawlForum(driver: webdriver.Firefox):
 
 # Returns 'True' if the link is Topic link
 def isDescriptionLink(url):
-    if 'index.php' in url and 'questions' not in url:
+    if 'http' not in url:
         return True
     return False
 
 
 # Returns True if the link is a listingPage link
 def isListingLink(url):
-    if 'questions' in url:
+    if 'http' in url:
         return True
     return False
 
diff --git a/Forums/HiddenAnswers/parser.py b/Forums/HiddenAnswers/parser.py
index 995a7f0..0f2647f 100644
--- a/Forums/HiddenAnswers/parser.py
+++ b/Forums/HiddenAnswers/parser.py
@@ -42,14 +42,22 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup):
     datetime_obj = datetime.strptime(datetime_string, "%Y-%m-%dT%H:%M:%S")
     addDate.append(datetime_obj)
     
-    question_user_status = question.find("span", {"class": "qa-q-view-who-title"}).text
-    status.append(cleanString(question_user_status.strip()))
-    
-    question_user_karma = question.find("span", {"class": "qa-q-view-who-points-data"}).text
-    # Convert karma to pure numerical string
-    if question_user_karma.find("k") > -1:
-        question_user_karma = str(float(question_user_karma.replace("k", "")) * 1000)
-    reputation.append(cleanString(question_user_karma.strip()))
+    question_user_status = question.find("span", {"class": "qa-q-view-who-title"})
+    if question_user_status is not None:
+        question_user_status = question_user_status.text
+        status.append(cleanString(question_user_status.strip()))
+    else:
+        status.append('-1')
+
+    question_user_karma = question.find("span", {"class": "qa-q-view-who-points-data"})
+    if question_user_karma is not None:
+        question_user_karma = question_user_karma.text
+        # Convert karma to pure numerical string
+        if question_user_karma.find("k") > -1:
+            question_user_karma = str(float(question_user_karma.replace("k", "")) * 1000)
+        reputation.append(cleanString(question_user_karma.strip()))
+    else:
+        reputation.append('-1')
     
     question_content = question.find("div", {"class": "qa-q-view-content qa-post-content"}).text
     post.append(cleanString(question_content.strip()))
@@ -88,14 +96,22 @@ def HiddenAnswers_description_parser(soup: BeautifulSoup):
         post_data = replies.find("div", {"class": "qa-a-item-content qa-post-content"}).find("div",{"itemprop":"text"}).text
         post.append(cleanString(post_data.strip()))
         
-        user_reputations = replies.find("span", {"class", "qa-a-item-who-title"}).text
-        status.append(cleanString(user_reputations.strip()))
+        user_reputations = replies.find("span", {"class", "qa-a-item-who-title"})
+        if user_reputations is not None:
+            user_reputations = user_reputations.text
+            status.append(cleanString(user_reputations.strip()))
+        else:
+            status.append('-1')
         
-        karma = replies.find("span", {"class": "qa-a-item-who-points-data"}).text
-        # Convert karma to pure numerical string
-        if karma.find("k") > -1:
-            karma = str(float(karma.replace("k", "")) * 1000)
-        reputation.append(cleanString(karma.strip()))
+        karma = replies.find("span", {"class": "qa-a-item-who-points-data"})
+        if karma is not None:
+            karma = karma.text
+            # Convert karma to pure numerical string
+            if karma.find("k") > -1:
+                karma = str(float(karma.replace("k", "")) * 1000)
+            reputation.append(cleanString(karma.strip()))
+        else:
+            reputation.append('-1')
         
         feedback.append("-1")
         sign.append("-1")
@@ -139,8 +155,9 @@ def HiddenAnswers_listing_parser(soup: BeautifulSoup):
     image_user = []                # 8 all user avatars used in each topic
 
     # Finding the board
-    literature = soup.find("div", {"class": "qa-main-heading"}).find("h1")
-    board = literature.text
+    board = soup.find("div", {"class": "qa-main-heading"}).find("h1").text
+    board = board.replace('Recent questions in', '')
+    board = cleanString(board.strip())
     
     queries_by_user: ResultSet[Tag] = soup.find("div", {"class": "qa-q-list"}).find_all("div", {"class": "qa-q-list-item"})
     
@@ -148,9 +165,9 @@ def HiddenAnswers_listing_parser(soup: BeautifulSoup):
         topic_of_query = queries.find("div", {"class": "qa-q-item-title"}).find("a").text
         topic.append(cleanString(topic_of_query.strip()))
 
-        image_user.append("-1")
+        image_user.append("-1") # qa-q-item-where
         
-        author = queries.find("span", {"class": "qa-q-item-who-data"}).find("a").text
+        author = queries.find("span", {"class": "qa-q-item-who-data"}).text
         user.append(cleanString(author.strip()))
         
         num_answers = queries.find("span", {"class": "qa-a-count-data"}).text
diff --git a/Forums/Initialization/forums_mining.py b/Forums/Initialization/forums_mining.py
index 311ac6c..4d68840 100644
--- a/Forums/Initialization/forums_mining.py
+++ b/Forums/Initialization/forums_mining.py
@@ -102,7 +102,7 @@ def opentor():
 # main method
 if __name__ == '__main__':
 
-    # opentor()
+    opentor()
 
     # assignment from forumsList.txt
     forumsList = getForums()
diff --git a/Forums/Initialization/prepare_parser.py b/Forums/Initialization/prepare_parser.py
index 267f887..ac1523f 100644
--- a/Forums/Initialization/prepare_parser.py
+++ b/Forums/Initialization/prepare_parser.py
@@ -212,7 +212,7 @@ def persist_record(url, rec, cur, con, createLog, logFile, listingFile, descript
 def move_file(filePath, createLog, logFile):
 
     source = filePath
-    destination = filePath.replace(os.path.basename(filePath), "") + r'Read/'
+    destination = filePath.replace(os.path.basename(filePath), "") + r'Read/' + os.path.basename(filePath)
 
     try:
         shutil.move(source, destination, shutil.copy2)
@@ -238,6 +238,9 @@ def new_parse(forum, url, createLog):
 
     from Forums.Initialization.forums_mining import config, CURRENT_DATE
 
+    global nError
+    nError = 0
+
     print("Parsing the " + forum + " forum and conduct data classification to store the information in the database.")
 
     # Connecting to the database
diff --git a/Forums/Libre/crawler_selenium.py b/Forums/Libre/crawler_selenium.py
index 98b5517..58274ec 100644
--- a/Forums/Libre/crawler_selenium.py
+++ b/Forums/Libre/crawler_selenium.py
@@ -239,7 +239,7 @@ def crawlForum(driver):
                         except NoSuchElementException:
                             has_next_topic_page = False
 
-                    for i in range(counter):
+                    for j in range(counter):
                         driver.back()
 
                     # comment out
diff --git a/Forums/OnniForums/crawler_selenium.py b/Forums/OnniForums/crawler_selenium.py
index 806f869..03f2367 100644
--- a/Forums/OnniForums/crawler_selenium.py
+++ b/Forums/OnniForums/crawler_selenium.py
@@ -250,7 +250,7 @@ def crawlForum(driver):
                         except NoSuchElementException:
                             has_next_topic_page = False
 
-                    for i in range(counter):
+                    for j in range(counter):
                         driver.back()
 
                     # comment out
diff --git a/Forums/Procrax/crawler_selenium.py b/Forums/Procrax/crawler_selenium.py
index 7115d6c..7c15483 100644
--- a/Forums/Procrax/crawler_selenium.py
+++ b/Forums/Procrax/crawler_selenium.py
@@ -241,7 +241,7 @@ def crawlForum(driver):
                         except NoSuchElementException:
                             has_next_topic_page = False
 
-                    for i in range(counter):
+                    for j in range(counter):
                         driver.back()
 
                     # comment out
diff --git a/MarketPlaces/Classifier/test_classify.py b/MarketPlaces/Classifier/classify_test.py
similarity index 100%
rename from MarketPlaces/Classifier/test_classify.py
rename to MarketPlaces/Classifier/classify_test.py
diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py
index c5af58b..b94723f 100644
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@@ -295,6 +295,9 @@ def new_parse(marketPlace, url, createLog):
 
     from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
 
+    global nError
+    nError = 0
+
     print("Parsing the " + marketPlace + " market and conduct data classification to store the information in the database.")
 
     # Connecting to the database