updated all forum crawlers

2 years ago · b3188f4716
--- a/Forums/Altenens/crawler_selenium.py
+++ b/Forums/Altenens/crawler_selenium.py
@ -199,7 +199,7 @@ def getInterestedLinks():

    return links


 # newest version of crawling
 def crawlForum(driver):
    print("Crawling the Altenens forum")

@ -233,7 +233,7 @@ def crawlForum(driver):
                            driver.get(itemURL)
                        except:
                            driver.refresh()
                        savePage(driver.page_source, topic + f"page{counter}")
                        savePage(driver.page_source, topic + f"page{counter}") # very important

                        # comment out
                        if counter == 2:
--- a/Forums/Cardingleaks/crawler_selenium.py
+++ b/Forums/Cardingleaks/crawler_selenium.py
@ -2,7 +2,7 @@ __author__ = 'DarkWeb'

 '''
 Cardingleaks Forum Crawler (Selenium)
 FIXED
 Crawler updated and fixed
 '''

 from selenium import webdriver
@ -207,67 +207,53 @@ def getInterestedLinks():


 def crawlForum(driver):
    print("Crawling the Cardingleaks forum")
    print("Crawling the Cardinglinks forum")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    i = 0
    count = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                list = topicPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver.page_source, item)
                    driver.back()

                    #variable to check if there is a next page for the topic
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                topics = topicPages(html)
                for topic in topics:
                    has_next_topic_page = True
                    counter = 1
                    page = topic

                    # check if there is a next page for the topics
                    while has_next_topic_page:
                        # try to access next page of th topic
                        itemURL = urlparse.urljoin(baseURL, str(item))
                        itemURL = urlparse.urljoin(baseURL, str(page))
                        try:
                            driver.get(itemURL)
                        except:
                            driver.refresh()
                        savePage(driver.page_source, item)
                        savePage(driver.page_source, topic + f"page{counter}")  # very important

                        # if there is a next page then go and save....
                        # Spec
                        try:
                            # temp = driver.find_element(By.XPATH, '/html/body/div[2]/div[4]/div/div[5]/div[2]/div/div[1]/div[1]/div/nav/div[1]') # /html/body/div/div[2]/div/div[2]/div/
                            item = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href') #/html/body/div/div[2]/div/div[2]/div
                        # comment out
                        if counter == 2:
                            break

                            if item == "":
                        try:
                            page = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
                            if page == "":
                                raise NoSuchElementException
                            else:
                                counter += 1
                            counter += 1

                        except NoSuchElementException:
                            has_next_topic_page = False

                    # end of loop
                    for i in range(counter):
                        driver.back()

@ -276,21 +262,12 @@ def crawlForum(driver):

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
                    # temp = driver.find_element(by=By.XPATH, value = '/html/body/div[2]/div[4]/div/div[5]/div[2]/div/div/div[1]/div/nav/div[1]')
                    link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')

                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -300,10 +277,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling Cardingleaks forum done successfully. Press ENTER to continue\n")
    input("Crawling Cardingleaksforum done successfully. Press ENTER to continue\n")


 # Returns 'True' if the link is Topic link, may need to change for every website
--- a/Forums/CryptBB/crawler_selenium.py
+++ b/Forums/CryptBB/crawler_selenium.py
@ -238,65 +238,55 @@ def getInterestedLinks():
 def crawlForum(driver):
    print("Crawling the CryptBB forum")

    print("Crawling the CryptBB forum")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    i = 0
    count = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                list = topicPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver.page_source, item)
                    driver.back()

                    #variable to check if there is a next page for the topic
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                topics = topicPages(html)
                for topic in topics:
                    has_next_topic_page = True
                    counter = 1
                    page = topic

                    # check if there is a next page for the topics
                    while has_next_topic_page:
                        # try to access next page of th topic
                        itemURL = urlparse.urljoin(baseURL, str(item))
                        itemURL = urlparse.urljoin(baseURL, str(page))
                        try:
                            driver.get(itemURL)
                        except:
                            driver.refresh()
                        savePage(driver.page_source, item)
                        savePage(driver.page_source, topic + f"page{counter}")  # very important

                        # comment out
                        if counter == 2:
                            break

                        # if there is a next page then go and save....
                        # next page in the topic?
                        try:
                            temp = driver.find_element(By.XPATH, '/html/body/div/div[2]/div/div[2]/div') # /html/body/div/div[2]/div/div[2]/div/
                            item = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href') #/html/body/div/div[2]/div/div[2]/div
                            temp = driver.find_element(By.XPATH, '/html/body/div/div[2]/div/div[2]/div')
                            page = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')

                            if item == "":
                            if page == "":
                                raise NoSuchElementException
                            else:
                                counter += 1
                            counter += 1

                        except NoSuchElementException:
                            has_next_topic_page = False

                    # end of loop
                    for i in range(counter):
                        driver.back()

@ -305,21 +295,14 @@ def crawlForum(driver):

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
                    temp = driver.find_element(by=By.XPATH, value = '/html/body/div/div[2]/div/div[2]/div')
                    temp = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/div[2]/div')
                    link = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')

                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -329,10 +312,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling CryptBB forum done successfully. Press ENTER to continue\n")
    input("Crawling CrypttBB done successfully. Press ENTER to continue\n")


 # Returns 'True' if the link is Topic link, may need to change for every website
--- a/Forums/HiddenAnswers/crawler_selenium.py
+++ b/Forums/HiddenAnswers/crawler_selenium.py
@ -179,86 +179,65 @@ def crawlForum(driver):
    print("Crawling the HiddenAnswers forum")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    i = 0
    count = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                list = topicPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver.page_source, item)
                    driver.back()
                    '''
                    #variable to check if there is a next page for the topic
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                topics = topicPages(html)
                for topic in topics:
                    has_next_topic_page = True
                    counter = 1
                    page = topic

                    # check if there is a next page for the topics
                    while has_next_topic_page:
                        # try to access next page of th topic
                        itemURL = urlparse.urljoin(baseURL, str(item))
                        itemURL = urlparse.urljoin(baseURL, str(page))
                        try:
                            driver.get(itemURL)
                        except:
                            driver.refresh()
                        savePage(driver.page_source, item)
                        savePage(driver.page_source, topic + f"page{counter}")  # very important

                        # if there is a next page then go and save....
                        # next page in the topic?
                        try:
                            temp = driver.find_element(By.XPATH, '/html/body/div/div[2]/div/div[2]/div') # /html/body/div/div[2]/div/div[2]/div/
                            item = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href') #/html/body/div/div[2]/div/div[2]/div
                        # comment out
                        if counter == 2:
                            break

                            if item == "":
                        try:
                            page = ""  # no next page so far may have some later on
                            if page == "":
                                raise NoSuchElementException
                                has_next_topic_page = False
                            else:
                                counter += 1
                            counter += 1

                        except NoSuchElementException:
                            has_next_topic_page = False

                    # end of loop
                    for i in range(counter):
                        driver.back()
                    '''

                    # comment out
                    break

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
                    link = driver.find_element(by=By.XPATH, value = '/html/body/div[2]/div[2]/div/div[3]/div[3]/ul/li[7]/a').get_attribute('href')
                    link = driver.find_element(by=By.XPATH, value='/html/body/div[2]/div[2]/div/div[3]/div[3]/ul/li[7]/a').get_attribute('href')

                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -268,11 +247,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime


    input("Crawling HiddenAnswers forum done sucessfully. Press ENTER to continue\n")
    input("Crawling HiddenAnswers done successfully. Press ENTER to continue\n")


 # Returns 'True' if the link is Topic link
--- a/Forums/Initialization/forumsList.txt
+++ b/Forums/Initialization/forumsList.txt
@ -1 +1 @@
 Altenens
 Procrax
--- a/Forums/Initialization/forums_mining.py
+++ b/Forums/Initialization/forums_mining.py
@ -14,6 +14,7 @@ from Forums.Procrax.crawler_selenium import crawler as crawlerProcraxForum
 from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
 from Forums.Cardingleaks.crawler_selenium import crawler as crawlerCardingleaks
 from Forums.Altenens.crawler_selenium import crawler as crawlerAltenens
 from Forums.Libre.crawler_selenium import crawler as crawlerLibre

 import configparser
 import time
@ -119,6 +120,8 @@ if __name__ == '__main__':
            crawlerCardingleaks()
        elif forum == 'Altenens':
            crawlerAltenens()
        elif forum == 'Libre':
            crawlerLibre()



--- a/Forums/Initialization/geckodriver.log
+++ b/Forums/Initialization/geckodriver.log
@ -10951,3 +10951,250 @@ unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
 1689363209615	geckodriver	INFO	Listening on 127.0.0.1:60532
 1689363216981	mozrunner::runner	INFO	Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "60533" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofile278pEs"
 console.log: "TorSettings: loadFromPrefs()"
 console.log: "TorConnect: init()"
 console.log: "TorConnect: Entering Initial state"
 console.log: "TorConnect: Observed profile-after-change"
 console.log: "TorConnect: Observing topic 'TorProcessExited'"
 console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
 console.log: "TorConnect: Observing topic 'torsettings:ready'"
 console.log: "TorSettings: Observed profile-after-change"
 1689363219049	Marionette	INFO	Marionette enabled
 console.log: "TorConnect: Will load after bootstrap => [about:blank]"
 console.error: "Could not load engine [email protected]: Error: Extension is invalid"
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
 DevTools listening on ws://localhost:60533/devtools/browser/8c990d4b-44eb-425d-b226-b8d4c1cffc2d
 1689363224682	Marionette	INFO	Listening on port 60540
 1689363225068	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 JavaScript error: , line 0: NotFoundError: No such JSWindowActor 'MarionetteEvents'
 JavaScript error: , line 0: NotFoundError: No such JSWindowActor 'MarionetteEvents'
 1689363820376	Marionette	INFO	Stopped listening on port 60540
 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
 !!! error running onStopped callback: TypeError: callback is not a function
 JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
 JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofile278pEs\thumbnails) because it does not exist
 [Parent 5080, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167
 1689363820593	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
 unwatchForTabs()@TargetList.jsm:70
 unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
 resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
 DevTools listening on ws://localhost:60789/devtools/browser/8539d316-2b33-4477-9e35-2f9e6eab09b6
 1689363569998	Marionette	INFO	Listening on port 60796
 1689363570244	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/, line 2: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 2: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 5: ReferenceError: lang is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/member.php?action=login, line 9: ReferenceError: use_xmlhttprequest is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86, line 3: ReferenceError: lang is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 6: ReferenceError: lang is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 19: ReferenceError: use_xmlhttprequest is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 25: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628&page=2, line 6: ReferenceError: lang is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628&page=2, line 19: ReferenceError: use_xmlhttprequest is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628&page=2, line 25: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 6: ReferenceError: lang is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 19: ReferenceError: use_xmlhttprequest is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=2628, line 25: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86, line 3: ReferenceError: lang is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86&page=2, line 3: ReferenceError: lang is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=16778, line 6: ReferenceError: lang is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/report.js?ver=1804, line 4: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/thread.js?ver=1809, line 4: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=16778, line 19: ReferenceError: use_xmlhttprequest is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/showthread.php?tid=16778, line 25: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86&page=2, line 3: ReferenceError: lang is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=86, line 3: ReferenceError: lang is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/jeditable/jeditable.min.js, line 38: ReferenceError: jQuery is not defined
 JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/jscripts/inline_edit.js?ver=1808, line 6: ReferenceError: $ is not defined
 1689363752505	Marionette	INFO	Stopped listening on port 60796
 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
 JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
 !!! error running onStopped callback: TypeError: callback is not a function
 JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
 JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofilecgBCTA\thumbnails) because it does not exist

 ###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost

 Crash Annotation GraphicsCriticalError: |[C0][GFX1-]: Receive IPC close with reason=AbnormalShutdown (t=1346.28) 
 ###!!! [Child][MessageChannel] Error: (msgtype=0x3900E5,name=PContent::Msg_GraphicsError) Channel closing: too late to send/recv, messages will be lost

 [GFX1-]: Receive IPC close with reason=AbnormalShutdown
 1689363753315	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
 unwatchForTabs()@TargetList.jsm:70
 unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
 1689364130030	geckodriver	INFO	Listening on 127.0.0.1:61129
 1689364135033	mozrunner::runner	INFO	Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "61130" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileZXcPSi"
 console.log: "TorSettings: loadFromPrefs()"
 console.log: "TorConnect: init()"
 console.log: "TorConnect: Entering Initial state"
 console.log: "TorConnect: Observed profile-after-change"
 console.log: "TorConnect: Observing topic 'TorProcessExited'"
 console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
 console.log: "TorConnect: Observing topic 'torsettings:ready'"
 console.log: "TorSettings: Observed profile-after-change"
 1689364136375	Marionette	INFO	Marionette enabled
 console.log: "TorConnect: Will load after bootstrap => [about:blank]"
 console.error: "Could not load engine [email protected]: Error: Extension is invalid"
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
 DevTools listening on ws://localhost:61130/devtools/browser/d0a00e7f-efab-4092-ba43-3afb5ec55bcc
 1689364140122	Marionette	INFO	Listening on port 61138
 1689364140225	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 1689364164357	Marionette	INFO	Stopped listening on port 61138
 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
 !!! error running onStopped callback: TypeError: callback is not a function
 JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
 JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileZXcPSi\thumbnails) because it does not exist
 [Parent 5336, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167
 [Parent 5336, IPC I/O Parent] WARNING: pipe error: 232: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/chrome/common/ipc_channel_win.cc:544
 1689364165253	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
 unwatchForTabs()@TargetList.jsm:70
 unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
 1689364952139	geckodriver	INFO	Listening on 127.0.0.1:61327
 1689364958550	mozrunner::runner	INFO	Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "61328" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileeX31Bg"
 console.log: "TorSettings: loadFromPrefs()"
 console.log: "TorConnect: init()"
 console.log: "TorConnect: Entering Initial state"
 console.log: "TorConnect: Observed profile-after-change"
 console.log: "TorConnect: Observing topic 'TorProcessExited'"
 console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
 console.log: "TorConnect: Observing topic 'torsettings:ready'"
 console.log: "TorSettings: Observed profile-after-change"
 1689364960322	Marionette	INFO	Marionette enabled
 console.log: "TorConnect: Will load after bootstrap => [about:blank]"
 console.error: "Could not load engine [email protected]: Error: Extension is invalid"
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
 DevTools listening on ws://localhost:61328/devtools/browser/d98ca77f-1ca8-49c2-b3d0-7c98e39d55e8
 1689364964835	Marionette	INFO	Listening on port 61336
 1689364965449	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 1689365065931	Marionette	INFO	Stopped listening on port 61336
 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
 !!! error running onStopped callback: TypeError: callback is not a function
 JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
 JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileeX31Bg\thumbnails) because it does not exist

 ###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost

 1689365066887	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
 unwatchForTabs()@TargetList.jsm:70
 unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
 1689365596202	geckodriver	INFO	Listening on 127.0.0.1:61665
 1689365603047	mozrunner::runner	INFO	Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "61666" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofilegVxGn8"
 console.log: "TorSettings: loadFromPrefs()"
 console.log: "TorConnect: init()"
 console.log: "TorConnect: Entering Initial state"
 console.log: "TorConnect: Observed profile-after-change"
 console.log: "TorConnect: Observing topic 'TorProcessExited'"
 console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
 console.log: "TorConnect: Observing topic 'torsettings:ready'"
 console.log: "TorSettings: Observed profile-after-change"
 1689365604946	Marionette	INFO	Marionette enabled
 console.log: "TorConnect: Will load after bootstrap => [about:blank]"
 console.error: "Could not load engine [email protected]: Error: Extension is invalid"
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
 DevTools listening on ws://localhost:61666/devtools/browser/3f945d28-11cd-436c-832e-2085f8bb57e1
 1689365609901	Marionette	INFO	Listening on port 61676
 1689365610315	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 1689365827541	Marionette	INFO	Stopped listening on port 61676
 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
 JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
 !!! error running onStopped callback: TypeError: callback is not a function
 JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
 JavaScript error: resource://gre/modules/PageThumbs.jsm, line 709: AbortError: IOUtils.profileBeforeChange getter: IOUtils: profileBeforeChange phase has already finished
 [Parent 7204, IPC I/O Parent] WARNING: file /var/tmp/build/firefox-b6010b1466c9/ipc/chromium/src/base/process_util_win.cc:167
 1689365828066	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
 unwatchForTabs()@TargetList.jsm:70
 unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
 1689366358424	geckodriver	INFO	Listening on 127.0.0.1:62059
 1689366363521	mozrunner::runner	INFO	Running command: "C:\\\\Users\\\\Helium\\\\Desktop\\\\Tor Browser\\\\Browser\\\\firefox.exe" "--marionette" "--remote-debugging-port" "62060" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileSRNF4S"
 console.log: "TorSettings: loadFromPrefs()"
 console.log: "TorConnect: init()"
 console.log: "TorConnect: Entering Initial state"
 console.log: "TorConnect: Observed profile-after-change"
 console.log: "TorConnect: Observing topic 'TorProcessExited'"
 console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
 console.log: "TorConnect: Observing topic 'torsettings:ready'"
 console.log: "TorSettings: Observed profile-after-change"
 1689366364862	Marionette	INFO	Marionette enabled
 console.log: "TorConnect: Will load after bootstrap => [about:blank]"
 console.error: "Could not load engine [email protected]: Error: Extension is invalid"
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
 DevTools listening on ws://localhost:62060/devtools/browser/38410e90-6408-4c6e-a78a-4d8c6dabe5f5
 1689366368448	Marionette	INFO	Listening on port 62067

 ###!!! [Child][MessageChannel] Error: (msgtype=0x390097,name=PContent::Msg_InitBackground) Channel closing: too late to send/recv, messages will be lost


 ###!!! [Child][MessageChannel] Error: (msgtype=0x390097,name=PContent::Msg_InitBackground) Channel closing: too late to send/recv, messages will be lost

 1689366368939	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 1689366462907	Marionette	INFO	Stopped listening on port 62067
 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]

 ###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost

 JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
 !!! error running onStopped callback: TypeError: callback is not a function
 JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
 JavaScript error: resource://gre/modules/PageThumbs.jsm, line 709: AbortError: IOUtils.profileBeforeChange getter: IOUtils: profileBeforeChange phase has already finished
 1689366464131	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
 unwatchForTabs()@TargetList.jsm:70
 unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
--- a/Forums/Libre/crawler_selenium.py
+++ b/Forums/Libre/crawler_selenium.py
@ -62,16 +62,14 @@ def login(driver):
    input('Press enter when CAPTCHA is completed, and you\'re at the login page')

    #entering username and password into input boxes
    usernameBox = driver.find_element(by=By.NAME, value='login')
    usernameBox = driver.find_element(by=By.NAME, value='username')
    #Username here
    usernameBox.send_keys('ct1234')#sends string to the username box
    passwordBox = driver.find_element(by=By.NAME, value='password')
    #Password here
    passwordBox.send_keys('r5o0wqmw')# sends string to passwordBox

    login = driver.find_element(by=By.CLASS_NAME, value='block-container')
    login_link = login.find_element(by=By.TAG_NAME, value='button')
    login_link.click()
    input("Press the login button and solve the CAPTCHA then press enter\n")

    # input('input')

@ -209,87 +207,65 @@ def crawlForum(driver):
    print("Crawling the Libre forum")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    i = 0
    count = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                list = topicPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver.page_source, item)
                    driver.back()

                    #variable to check if there is a next page for the topic
                    # has_next_topic_page = True
                    # counter = 1

                    # # check if there is a next page for the topics
                    # while has_next_topic_page:
                    #     # try to access next page of th topic
                    #     itemURL = urlparse.urljoin(baseURL, str(item))
                    #     try:
                    #         driver.get(itemURL)
                    #     except:
                    #         driver.refresh()
                    #     savePage(driver.page_source, item)
                    #
                    #     # if there is a next page then go and save....
                    #     # Spec
                    #     try:
                    #         # temp = driver.find_element(By.XPATH, '/html/body/div[2]/div[4]/div/div[5]/div[2]/div/div[1]/div[1]/div/nav/div[1]') # /html/body/div/div[2]/div/div[2]/div/
                    #         item = driver.find_element(by=By.LINK_TEXT, value='>').get_attribute('href') #/html/body/div/div[2]/div/div[2]/div
                    #
                    #         if item == "":
                    #             raise NoSuchElementException
                    #         else:
                    #             counter += 1
                    #
                    #     except NoSuchElementException:
                    #         has_next_topic_page = False
                    #
                    # # end of loop
                    # for i in range(counter):
                    #     driver.back()
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                topics = topicPages(html)
                for topic in topics:
                    has_next_topic_page = True
                    counter = 1
                    page = topic

                    while has_next_topic_page:
                        itemURL = urlparse.urljoin(baseURL, str(page))
                        try:
                            driver.get(itemURL)
                        except:
                            driver.refresh()
                        savePage(driver.page_source, topic + f"page{counter}")  # very important

                        # comment out
                        if counter == 2:
                            break

                        try:
                            page = ""  # no next page so far may have some later on
                            if page == "":
                                raise NoSuchElementException
                            counter += 1

                        except NoSuchElementException:
                            has_next_topic_page = False

                    for i in range(counter):
                        driver.back()

                    # comment out
                    break

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
                    # temp = driver.find_element(by=By.XPATH, value = '/html/body/div[2]/div[4]/div/div[5]/div[2]/div/div/div[1]/div/nav/div[1]')
                    link = driver.find_element(by=By.LINK_TEXT, value='>').get_attribute('href')

                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -299,10 +275,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling Libre forum done successfully. Press ENTER to continue\n")
    input("Crawling Libre done successfully. Press ENTER to continue\n")


 # Returns 'True' if the link is Topic link, may need to change for every website
--- a/Forums/OnniForums/crawler_selenium.py
+++ b/Forums/OnniForums/crawler_selenium.py
@ -214,92 +214,71 @@ def getInterestedLinks():


 def crawlForum(driver):
    print("Crawling the OnniForums forum")
    print("Crawling the OnniForums")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    i = 0
    count = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                list = topicPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver.page_source, item)

                    #next page for topic
                    # variable to check if there is a next page for the topic
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                topics = topicPages(html)
                for topic in topics:
                    has_next_topic_page = True
                    counter = 1
                    page = topic

                    # check if there is a next page for the topics
                    while has_next_topic_page:
                        # try to access next page of th topic
                        itemURL = urlparse.urljoin(baseURL, str(item))
                        itemURL = urlparse.urljoin(baseURL, str(page))
                        try:
                            driver.get(itemURL)
                        except:
                            driver.refresh()
                        savePage(driver.page_source, item)
                        savePage(driver.page_source, topic + f"page{counter}")  # very important

                        # comment out
                        if counter == 2:
                            break

                        # if there is a next page then go and save....
                        # next page in the topic?
                        try:
                            temp = driver.find_element(By.XPATH,
                                                       '/html/body/div/div[2]/div/div[3]/div')  # /html/body/div/div[2]/div/div[2]/div/
                            item = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute(
                                'href')  # /html/body/div/div[2]/div/div[2]/div
                            temp = driver.find_element(By.XPATH,'/html/body/div/div[2]/div/div[3]/div')  # /html/body/div/div[2]/div/div[2]/div/
                            page = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')  # /html/body/div/div[2]/div/div[2]/div

                            if item == "":
                            if page == "":
                                raise NoSuchElementException
                                has_next_topic_page = False
                            else:
                                counter += 1
                            counter += 1

                        except NoSuchElementException:
                            has_next_topic_page = False

                    # end of loop
                    for i in range(counter):
                        driver.back()

                    # comment out, one topic per page
                    # comment out
                    break

                # comment out, go through all pages
                # comment out
                if count == 1:
                    count = 0
                    break

                try:
                    temp = driver.find_element(by=By.XPATH, value=
                        '/html/body/div/div[2]/div/div[3]/div') # /html/body/div/div[2]/div/div[3]/div
                    temp = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/div[3]/div')  # /html/body/div/div[2]/div/div[3]/div
                    link = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')

                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -309,11 +288,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime


    input("Crawling OnniForums forum done sucessfully. Press ENTER to continue\n")
    input("Crawling OnniForums done successfully. Press ENTER to continue\n")


 # Returns 'True' if the link is Topic link
--- a/Forums/Procrax/crawler_selenium.py
+++ b/Forums/Procrax/crawler_selenium.py
@ -202,83 +202,70 @@ def getInterestedLinks():


 def crawlForum(driver):
    print("Crawling the Procrax forum")
    print("Crawling the Procrax")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    i = 0
    count = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)# open
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            #loop through the topics
            while has_next_page:
                list = topicPages(html)# for multiple pages
                for item in list:
                    #variable to check if there is a next page for the topic
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                topics = topicPages(html)
                for topic in topics:
                    has_next_topic_page = True
                    counter = 1
                    page = topic

                    # check if there is a next page for the topics
                    while has_next_topic_page:
                        # try to access next page of th topic
                        itemURL = urlparse.urljoin(baseURL, str(item))
                        itemURL = urlparse.urljoin(baseURL, str(page))
                        try:
                            driver.get(itemURL)
                        except:
                            driver.refresh()
                        savePage(driver.page_source, item)
                        savePage(driver.page_source, topic + f"page{counter}")  # very important

                        # comment out
                        if counter == 2:
                            break

                        # if there is a next page then go and save....
                        # specific
                        try:
                            # temp = driver.find_element(By.XPATH, value='/html/body/div[1]/div[3]/div[2]/div[3]/div/div')
                            item = driver.find_element(By.LINK_TEXT, value='Next').get_attribute('href')
                            page = driver.find_element(By.LINK_TEXT, value='Next').get_attribute('href')

                            if item == "":
                            if page == "":
                                raise NoSuchElementException
                                has_next_topic_page = False
                            else:
                                counter += 1
                            counter += 1

                        except NoSuchElementException:
                            has_next_topic_page = False

                    #end of loop
                    for i in range(counter):
                        driver.back()
                #     # comment out
                #     break
                #
                # # comment out
                # if count == 1:
                #    count = 0
                #    break

                try:# change depending on web page, #general
                    # /html/body/div[1]/div[3]/div[2]/div[3]/div/div/div/div[1]/div/nav/div[1]
                    # temp = driver.find_element(By.XPATH, value='/html/body/div[1]/div[3]/div[2]/div[3]/div/div/div/div[1]/div/nav/div[1]')

                    # comment out
                    break

                # comment out
                if count == 1:
                    break

                try:

                    link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')

                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -288,10 +275,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling Procrax forum done successfully. Press ENTER to continue\n")
    input("Crawling Procrax done successfully. Press ENTER to continue\n")


 # Returns 'True' if the link is Topic link, may need to change for every website
--- a/MarketPlaces/Initialization/marketsList.txt
+++ b/MarketPlaces/Initialization/marketsList.txt
@ -1 +1 @@
 AnonymousMarketplace
 M00nkeyMarket
--- a/MarketPlaces/M00nkeyMarket/parser.py
+++ b/MarketPlaces/M00nkeyMarket/parser.py
@ -35,50 +35,33 @@ def m00nkey_description_parser(soup):
    shipTo = "-1"  # 18 Product_ShippedTo

    #vendor name
    try:
        temp = soup.find('div', {'class': 'box rounded mb-0'}).find('a').text
        vendor = (cleanString(temp.strip()))
    except:
        print("Error in  vendor")
    temp = soup.find('div', {'class': 'box rounded mb-0'}).find('a').text
    vendor = (cleanString(temp.strip()))

    #successful transaction
    try:
        temp = soup.findAll('div', {'class','text-center text-truncate column-flex ml-1 mr-1'}) #card sidebar-menu mb-4 card sidebar-menu mb-4
        temp2 = temp[1].findAll('span', {'class', 'float-right font-weight-bold'})
        temp = temp2[1].text
        success = (cleanString(temp.strip()))
    except:
        print("Error in successful")
        sucess = "-1"
    temp = soup.findAll('div', {'class','text-center text-truncate column-flex ml-1 mr-1'}) #card sidebar-menu mb-4 card sidebar-menu mb-4
    temp2 = temp[1].findAll('span', {'class', 'float-right font-weight-bold'})
    temp = temp2[1].text
    success = (cleanString(temp.strip()))


    #vendor rating 5
    try:
        temp = soup.findAll('div', {'class', 'text-center text-truncate column-flex ml-1 mr-1'})  # card sidebar-menu mb-4 card sidebar-menu mb-4
        temp2 = temp[1].findAll('span', {'class', 'float-right font-weight-bold'})
        temp = temp2[5].text
        rating_vendor = (cleanString(temp.strip()))
    except:
        print("Error in vendor rating")
        rating_vendor = "-1"
    temp = soup.findAll('div', {'class', 'text-center text-truncate column-flex ml-1 mr-1'})  # card sidebar-menu mb-4 card sidebar-menu mb-4
    temp2 = temp[1].findAll('span', {'class', 'float-right font-weight-bold'})
    temp = temp2[5].text
    rating_vendor = (cleanString(temp.strip()))

    # product name
    try:
        temp = soup.find('h3', {'class', 'h3 rounded card-title'}).find('span').text
        name = (cleanString(temp.strip()))
    except:
        print("Error in product name")
        name = "-1"
    temp = soup.find('h3', {'class', 'h3 rounded card-title'}).find('span').text
    name = (cleanString(temp.strip()))


    # product description
    try:
        describe = soup.find('div', {'class': "box rounded flex-fill"}).find('pre').text
        if "\n" in describe:
            describe = describe.replace("\n", " ")
            describe = describe.replace("\r", " ")
        describe = cleanString(describe.strip())
    except:
        print("Product description")
        describe = "-1"
    describe = soup.find('div', {'class': "box rounded flex-fill"}).find('pre').text
    if "\n" in describe:
        describe = describe.replace("\n", " ")
        describe = describe.replace("\r", " ")
    describe = cleanString(describe.strip())

    CVE = "-1"  # 5 Product_CVE_Classification (Common Vulnerabilities and Exposures) dont worry about that much
    MS = "-1"  # 6 Product_MS_Classification (Microsoft Security) dont worry about that much
@ -90,14 +73,10 @@ def m00nkey_description_parser(soup):
        temp = temp2[1].text
        category = cleanString(temp.strip())
    except:
        try:
            temp = soup.find('table', {'class', 'table table-hover'})
            temp2 = temp.find('tbody').find('tr').findAll('td')
            temp = temp2[1].text
            category = cleanString(temp.strip())
        except:
            print('Product category')
            category = "-1"
        temp = soup.find('table', {'class', 'table table-hover'})
        temp2 = temp.find('tbody').find('tr').findAll('td')
        temp = temp2[1].text
        category = cleanString(temp.strip())

    # product number of view
    try:
@ -107,47 +86,35 @@ def m00nkey_description_parser(soup):
        views = cleanString((temp.strip()))
    except:
        print('Product number of view')
        views = "-1"
        # views = "-1"

    reviews = "-1"  # 9 Product_Number_Of_Reviews
    rating_item = "-1"  # 10 Product_Rating
    addDate = "-1"  # 11 Product_AddedDate

    #BTC selling price box box-rounded mt-2
    try:
        temp = soup.find('div', {'class', 'box box-rounded mt-2'})
        temp2 = temp.findAll('i', {'class', 'float-right color-prices'})
        temp = temp2[1].text
        BTC = cleanString((temp.strip()))
    except:
        print('Product BTC')
        BTC = "-1"
    temp = soup.find('div', {'class', 'box box-rounded mt-2'})
    temp2 = temp.findAll('i', {'class', 'float-right color-prices'})
    temp = temp2[1].text
    BTC = cleanString((temp.strip()))

    # USD selling price
    try:
        temp = soup.find('div', {'class', 'box box-rounded mt-2'})
        temp2 = temp.findAll('center')
        temp = temp2[1].find('i').text
        if "$" in temp:
            temp = temp.replace("$", "")
        USD = cleanString((temp.strip()))
    except:
        print('Product USD')
        USD = "-1"
    temp = soup.find('div', {'class', 'box box-rounded mt-2'})
    temp2 = temp.findAll('center')
    temp = temp2[1].find('i').text
    if "$" in temp:
        temp = temp.replace("$", "")
    USD = cleanString((temp.strip()))

    EURO = "-1"  # 14 Product_EURO_SellingPrice


   # product sold
    try:
        temp = soup.find('div', {'class', 'box rounded mb-0'})  # card sidebar-menu mb-4 card sidebar-menu mb-4
        temp2 = temp.find('i')
        temp = temp2.text
        sold = (cleanString(temp.strip()))

    except:
        print("Error in successful")
        sold = "-1"
    temp = soup.find('div', {'class', 'box rounded mb-0'})  # card sidebar-menu mb-4 card sidebar-menu mb-4
    temp2 = temp.find('i')
    temp = temp2.text
    sold = (cleanString(temp.strip()))
    # sold = "-1"

    # product quantatiy left  ###ERRROR
    try:
@ -157,15 +124,12 @@ def m00nkey_description_parser(soup):
        temp = temp3[1].text
        left = cleanString(temp.strip())
    except:
        try:
            temp = soup.find('table', {'class', 'table table-hover'})
            temp2 = temp.findAll('tr')
            temp3 = temp2[1].findAll('td')
            temp = temp3[1].text
            left = cleanString(temp.strip())
        except:
            print('Product quantity')
            left = "-1"
        temp = soup.find('table', {'class', 'table table-hover'})
        temp2 = temp.findAll('tr')
        temp3 = temp2[1].findAll('td')
        temp = temp3[1].text
        left = cleanString(temp.strip())


    shipFrom = "-1"  # 17 Product_ShippedFrom
    shipTo = "-1"  # 18 Product_ShippedTo
@ -229,20 +193,25 @@ def m00nkey_listing_parser(soup):
            temp = a.find('col-5 justify-content-between mx-auto').find('div').text
            success.append(cleanString(temp.strip()))
        except:
            print('vendor')
            print('successful transactions')

        # product name
        try:
            temp = a.find('card-title rounded text-truncate').find('a').text
            name.append(cleanString(temp.strip()))
        except:
            print('vendor')
            print('product name')


        CVE.append('-1')
        MS.append('-1')
        rating_vendor.append("-1")

        try:
            temp = a.findAll('btn btn-block btn-primary')
        except:
            print("Error in product category")

    category = []  # 7 Product_Category y
    describe = []  # 8 Product_Description
    views = []  # 9 Product_Number_Of_Views