Completed and tested all parsers for Procrax

1 year ago · a6bdb89850
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@
 /shelf/
 .idea/workspace.xml
 selenium/geckodriver.exe
 __pycache__
 setup.ini
 *.html
 *.log
--- a/Forums/AbyssForum/pycache/crawler_selenium.cpython-310.pyc
+++ b/Forums/AbyssForum/pycache/crawler_selenium.cpython-310.pyc
--- a/Forums/AbyssForum/pycache/crawler_selenium.cpython-311.pyc
+++ b/Forums/AbyssForum/pycache/crawler_selenium.cpython-311.pyc
--- a/Forums/AbyssForum/pycache/parser.cpython-310.pyc
+++ b/Forums/AbyssForum/pycache/parser.cpython-310.pyc
--- a/Forums/AbyssForum/pycache/parser.cpython-311.pyc
+++ b/Forums/AbyssForum/pycache/parser.cpython-311.pyc
--- a/Forums/Altenens/pycache/crawler_selenium.cpython-310.pyc
+++ b/Forums/Altenens/pycache/crawler_selenium.cpython-310.pyc
--- a/Forums/Altenens/pycache/crawler_selenium.cpython-311.pyc
+++ b/Forums/Altenens/pycache/crawler_selenium.cpython-311.pyc
--- a/Forums/Altenens/pycache/parser.cpython-310.pyc
+++ b/Forums/Altenens/pycache/parser.cpython-310.pyc
--- a/Forums/Altenens/pycache/parser.cpython-311.pyc
+++ b/Forums/Altenens/pycache/parser.cpython-311.pyc
--- a/Forums/CryptBB/pycache/init.cpython-311.pyc
+++ b/Forums/CryptBB/pycache/init.cpython-311.pyc
--- a/Forums/CryptBB/pycache/crawler_selenium.cpython-310.pyc
+++ b/Forums/CryptBB/pycache/crawler_selenium.cpython-310.pyc
--- a/Forums/CryptBB/pycache/crawler_selenium.cpython-311.pyc
+++ b/Forums/CryptBB/pycache/crawler_selenium.cpython-311.pyc
--- a/Forums/CryptBB/pycache/parser.cpython-310.pyc
+++ b/Forums/CryptBB/pycache/parser.cpython-310.pyc
--- a/Forums/CryptBB/pycache/parser.cpython-311.pyc
+++ b/Forums/CryptBB/pycache/parser.cpython-311.pyc
--- a/Forums/HiddenAnswers/pycache/crawler_selenium.cpython-310.pyc
+++ b/Forums/HiddenAnswers/pycache/crawler_selenium.cpython-310.pyc
--- a/Forums/HiddenAnswers/pycache/crawler_selenium.cpython-311.pyc
+++ b/Forums/HiddenAnswers/pycache/crawler_selenium.cpython-311.pyc
--- a/Forums/HiddenAnswers/pycache/parser.cpython-310.pyc
+++ b/Forums/HiddenAnswers/pycache/parser.cpython-310.pyc
--- a/Forums/HiddenAnswers/pycache/parser.cpython-311.pyc
+++ b/Forums/HiddenAnswers/pycache/parser.cpython-311.pyc
--- a/Forums/Initialization/pycache/init.cpython-310.pyc
+++ b/Forums/Initialization/pycache/init.cpython-310.pyc
--- a/Forums/Initialization/pycache/init.cpython-311.pyc
+++ b/Forums/Initialization/pycache/init.cpython-311.pyc
--- a/Forums/Initialization/pycache/forums_mining.cpython-310.pyc
+++ b/Forums/Initialization/pycache/forums_mining.cpython-310.pyc
--- a/Forums/Initialization/pycache/forums_mining.cpython-311.pyc
+++ b/Forums/Initialization/pycache/forums_mining.cpython-311.pyc
--- a/Forums/Initialization/pycache/prepare_parser.cpython-310.pyc
+++ b/Forums/Initialization/pycache/prepare_parser.cpython-310.pyc
--- a/Forums/Initialization/pycache/prepare_parser.cpython-311.pyc
+++ b/Forums/Initialization/pycache/prepare_parser.cpython-311.pyc
--- a/Forums/Initialization/forums_mining.py
+++ b/Forums/Initialization/forums_mining.py
@ -99,9 +99,9 @@ if __name__ == '__main__':
        forum = forum.replace('\n','')

        print("Creating listing and description directories ... for " + forum)
        createDirectory(forum)
        time.sleep(5)  # wait for directories to be created
        input("Directories created successfully. Press ENTER to continue\n")
        # createDirectory(forum)
        # time.sleep(5)  # wait for directories to be created
        # input("Directories created successfully. Press ENTER to continue\n")
        

        if forum == "BestCardingWorld":
--- a/Forums/Initialization/geckodriver.log
+++ b/Forums/Initialization/geckodriver.log
@ -11198,3 +11198,80 @@ unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
 1689622469580	geckodriver	INFO	Listening on 127.0.0.1:58866
 1689622474728	mozrunner::runner	INFO	Running command: "C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "58867" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\MINHKH~1\\AppData\\Local\\Temp\\rust_mozprofile5gOLDP"
 console.log: "TorSettings: loadFromPrefs()"
 console.log: "TorConnect: init()"
 console.log: "TorConnect: Entering Initial state"
 console.log: "TorConnect: Observed profile-after-change"
 console.log: "TorConnect: Observing topic 'TorProcessExited'"
 console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
 console.log: "TorConnect: Observing topic 'torsettings:ready'"
 console.log: "TorSettings: Observed profile-after-change"
 1689622475417	Marionette	INFO	Marionette enabled
 console.log: "TorConnect: Will load after bootstrap => [about:blank]"
 console.error: "Could not load engine [email protected]: Error: Extension is invalid"
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
 DevTools listening on ws://localhost:58867/devtools/browser/9a3a8de2-439e-425e-b415-f975abd86b65
 1689622476941	Marionette	INFO	Listening on port 58873
 1689622477054	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\minhkhoitran\AppData\Local\Temp\rust_mozprofile5gOLDP\thumbnails) because it does not exist
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: undefined, line 0: Error: Missing host permission for the tab
 JavaScript error: undefined, line 0: Error: Missing host permission for the tab
 1689624030995	Marionette	INFO	Stopped listening on port 58873
 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
 !!! error running onStopped callback: TypeError: callback is not a function
 JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
 JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\minhkhoitran\AppData\Local\Temp\rust_mozprofile5gOLDP\thumbnails) because it does not exist

 ###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost

 1689624031467	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
 unwatchForTabs()@TargetList.jsm:70
 unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
 1689624276336	geckodriver	INFO	Listening on 127.0.0.1:59792
 1689624280979	mozrunner::runner	INFO	Running command: "C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "59793" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\MINHKH~1\\AppData\\Local\\Temp\\rust_mozprofileSTe5EC"
 console.log: "TorSettings: loadFromPrefs()"
 console.log: "TorConnect: init()"
 console.log: "TorConnect: Entering Initial state"
 console.log: "TorConnect: Observed profile-after-change"
 console.log: "TorConnect: Observing topic 'TorProcessExited'"
 console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
 console.log: "TorConnect: Observing topic 'torsettings:ready'"
 console.log: "TorSettings: Observed profile-after-change"
 1689624281509	Marionette	INFO	Marionette enabled
 console.log: "TorConnect: Will load after bootstrap => [about:blank]"
 console.error: "Could not load engine [email protected]: Error: Extension is invalid"
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
 DevTools listening on ws://localhost:59793/devtools/browser/222a61fa-a958-4978-8048-bb632f658131
 1689624283001	Marionette	INFO	Listening on port 59799
 1689624283405	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 1689624692072	Marionette	INFO	Stopped listening on port 59799
 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
 JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
 !!! error running onStopped callback: TypeError: callback is not a function
 JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
 JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\minhkhoitran\AppData\Local\Temp\rust_mozprofileSTe5EC\thumbnails) because it does not exist

 ###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost

 1689624692916	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
 unwatchForTabs()@TargetList.jsm:70
 unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
--- a/Forums/Initialization/prepare_parser.py
+++ b/Forums/Initialization/prepare_parser.py
@ -9,6 +9,7 @@ from Forums.BestCardingWorld.parser import *
 from Forums.CryptBB.parser import *
 from Forums.OnniForums.parser import *
 from Forums.Altenens.parser import *
 from Forums.Procrax.parser import *

 from Forums.Classifier.classify_product import predict
 # from DarkWebMining_Sample.Forums.Classifier.classify_product import predict_semi
@ -154,6 +155,8 @@ def new_parse(forum, url, createLog):
                rmm = onniForums_description_parser(soup)
            elif forum == "Altenens":
                rmm = altenens_description_parser(soup)
            elif forum == "Procrax":
                rmm = procrax_description_parser(soup)

            # key = u"Top:" + rmm[0].upper().strip() + u" User:" + rmm[2][0].upper().strip()
            key = u"Url:" + os.path.basename(line2).replace(".html", "")
@ -233,6 +236,8 @@ def new_parse(forum, url, createLog):
                    rw = onniForums_listing_parser(soup)
                elif forum == "Altenens":
                    rw = altenens_listing_parser(soup)
                elif forum == "Procrax":
                    rw = procrax_listing_parser(soup)

            except:

--- a/Forums/OnniForums/pycache/crawler_selenium.cpython-310.pyc
+++ b/Forums/OnniForums/pycache/crawler_selenium.cpython-310.pyc
--- a/Forums/OnniForums/pycache/crawler_selenium.cpython-311.pyc
+++ b/Forums/OnniForums/pycache/crawler_selenium.cpython-311.pyc
--- a/Forums/OnniForums/pycache/parser.cpython-310.pyc
+++ b/Forums/OnniForums/pycache/parser.cpython-310.pyc
--- a/Forums/OnniForums/pycache/parser.cpython-311.pyc
+++ b/Forums/OnniForums/pycache/parser.cpython-311.pyc
--- a/Forums/OnniForums/pycache/parser_script.cpython-311.pyc
+++ b/Forums/OnniForums/pycache/parser_script.cpython-311.pyc
--- a/Forums/Procrax/crawler_selenium.py
+++ b/Forums/Procrax/crawler_selenium.py
@ -26,24 +26,28 @@ from Forums.Procrax.parser import procrax_links_parser
 from Forums.Utilities.utilities import cleanHTML

 counter = 1
 baseURL = 'https://procrax.cx/'
 BASE_URL = 'https://procrax.cx/'
 FORUM_NAME = 'Procrax'


 # Opens Tor Browser, crawls the website
 def startCrawling():
    opentor()
    # forumName = getForumName()
    driver = getAccess()
    # opentor()
    # driver = getAccess()

    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)

    # new_parse(forumName, False)
    new_parse(
        forum=FORUM_NAME,
        url=BASE_URL, 
        createLog=False
    )


 # Opens Tor Browser
@ -139,10 +143,9 @@ def createFFDriver():
    return driver

 def getAccess():
    url = getFixedURL()
    driver = createFFDriver()
    try:
        driver.get(url)# open url in browser
        driver.get(BASE_URL)# open url in browser
        return driver
    except:
        driver.close()# close tab
@ -162,7 +165,7 @@ def savePage(page, url):
 def getFullPathName(url):
    from Forums.Initialization.forums_mining import config, CURRENT_DATE

    mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
    mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + FORUM_NAME + "/HTML_Pages")
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
@ -185,17 +188,17 @@ def getInterestedLinks():
    links = []

    # # general hacking
    # links.append('https://procrax.cx/forums/general-hacking.24/')
    links.append('https://procrax.cx/forums/general-hacking.24/')
    # # hacking security tools
    # links.append('https://procrax.cx/forums/hacking-security-tools.20/')
    links.append('https://procrax.cx/forums/hacking-security-tools.20/')
    # # hacktube
    # links.append('https://procrax.cx/forums/hacktube.22/')
    links.append('https://procrax.cx/forums/hacktube.22/')
    # # cardable
    #  links.append('https://procrax.cx/forums/cardable-websites.28/')
    # #  tools
    # links.append('https://procrax.cx/forums/tools-bots-validators.73/')
    # general forum
    links.append('https://procrax.cx/forums/forum-discussions-updates.7/')
    # links.append('https://procrax.cx/forums/forum-discussions-updates.7/')


    return links
@ -229,7 +232,7 @@ def crawlForum(driver):
                    page = topic

                    while has_next_topic_page:
                        itemURL = urlparse.urljoin(baseURL, str(page))
                        itemURL = urlparse.urljoin(BASE_URL, str(page))
                        try:
                            driver.get(itemURL)
                        except:
@ -237,8 +240,8 @@ def crawlForum(driver):
                        savePage(driver.page_source, topic + f"page{counter}")  # very important

                        # comment out
                        if counter == 2:
                            break
                        # if counter == 2:
                        #     break

                        try:
                            page = driver.find_element(By.LINK_TEXT, value='Next').get_attribute('href')
@ -254,10 +257,10 @@ def crawlForum(driver):
                        driver.back()

                    # comment out
                    break
                    # break

                # comment out
                if count == 1:
                if count == 20:
                    break

                try:
--- a/Forums/Procrax/parser.py
+++ b/Forums/Procrax/parser.py
@ -7,11 +7,12 @@ from datetime import timedelta
 import re

 # Here, we are importing BeautifulSoup to search through the HTML tree
 from bs4 import BeautifulSoup
 from bs4 import BeautifulSoup, ResultSet, Tag

 # This is the method to parse the Description Pages (one page to each topic in the Listing Pages)

 def cryptBB_description_parser(soup):

 def procrax_description_parser(soup: Tag):

    # Fields to be parsed

@ -27,146 +28,36 @@ def cryptBB_description_parser(soup):

    # Finding the topic (should be just one coming from the Listing Page)

    li = soup.find("td", {"class": "thead"}).find('strong')
    li = soup.find("h1", {"class": "p-title-value"})
    topic = li.text
    topic = re.sub("\[\w*\]", '', topic)

    topic = topic.replace(",","")
    topic = topic.replace("\n","")
    topic = cleanString(topic.strip())

    # Finding the repeated tag that corresponds to the listing of posts

    # try:
    posts = soup.find('table', {"class": "tborder tfixed clear"}).find('td', {"id": "posts_container"}).find_all(
        'div', {"class": "post"})

    # For each message (post), get all the fields we are interested to:

    for ipost in posts:

        # Finding a first level of the HTML page

        post_wrapper = ipost.find('span', {"class": "largetext"})

        # Finding the author (user) of the post

        author = post_wrapper.text.strip()
        user.append(cleanString(author))  # Remember to clean the problematic characters

        # Finding the status of the author

        smalltext = ipost.find('div', {"class": "post_author"})

        '''
        # Testing here two possibilities to find this status and combine them
        if ipost.find('div', {"class": "deleted_post_author"}):
            status.append(-1)
            interest.append(-1)
            reputation.append(-1)
            addDate.append(-1)
            post.append("THIS POST HAS BEEN REMOVED!")
            sign.append(-1)
            feedback.append(-1)
            continue
        '''

        # CryptBB does have membergroup and postgroup

        membergroup = smalltext.find('div', {"class": "profile-rank"})
        postgroup = smalltext.find('div', {"class": "postgroup"})
        if membergroup != None:
            membergroup = membergroup.text.strip()
            if postgroup != None:
                postgroup = postgroup.text.strip()
                membergroup = membergroup + " - " + postgroup
        else:
            if postgroup != None:
                membergroup = postgroup.text.strip()
            else:
                membergroup = "-1"
        status.append(cleanString(membergroup))

        # Finding the interest of the author
        # CryptBB does not have blurb
        blurb = smalltext.find('li', {"class": "blurb"})
        if blurb != None:
            blurb = blurb.text.strip()
        else:
            blurb = "-1"
        interest.append(cleanString(blurb))

        # Finding the reputation of the user
        # CryptBB does have reputation
        author_stats = smalltext.find('div', {"class": "author_statistics"})
        karma = author_stats.find('strong')
        if karma != None:
            karma = karma.text
            karma = karma.replace("Community Rating: ", "")
            karma = karma.replace("Karma: ", "")
            karma = karma.strip()
        else:
            karma = "-1"
        reputation.append(cleanString(karma))

        # Getting here another good tag to find the post date, post content and users' signature

        postarea = ipost.find('div', {"class": "post_content"})

        dt = postarea.find('span', {"class": "post_date"}).text
        # dt = dt.strip().split()
        dt = dt.strip()
        day=date.today()
        if "Yesterday" in dt:
            yesterday = day - timedelta(days=1)
            yesterday = yesterday.strftime('%m-%d-%Y')
            stime = dt.replace('Yesterday,','').strip()
            date_time_obj = yesterday+ ', '+stime
            date_time_obj = datetime.strptime(date_time_obj,'%m-%d-%Y, %I:%M %p')
        elif "hours ago" in dt:
            day = day.strftime('%m-%d-%Y')
            date_time_obj = postarea.find('span', {"class": "post_date"}).find('span')['title']
            date_time_obj = datetime.strptime(date_time_obj, '%m-%d-%Y, %I:%M %p')
        else:
            date_time_obj = datetime.strptime(dt, '%m-%d-%Y, %I:%M %p')
            stime = date_time_obj.strftime('%b %d, %Y')
            sdate = date_time_obj.strftime('%I:%M %p')
        addDate.append(date_time_obj)

        # Finding the post

        inner = postarea.find('div', {"class": "post_body scaleimages"})
        inner = inner.text.strip()
        post.append(cleanString(inner))

        # Finding the user's signature

        # signature = ipost.find('div', {"class": "post_wrapper"}).find('div', {"class": "moderatorbar"}).find('div', {"class": "signature"})
        signature = ipost.find('div', {"class": "signature scaleimages"})
        if signature != None:
            signature = signature.text.strip()
            # print(signature)
        else:
            signature = "-1"
        sign.append(cleanString(signature))

        # As no information about user's feedback was found, just assign "-1" to the variable

    
    thread: ResultSet[Tag] = soup.find("div", {"class": "block-body js-replyNewMessageContainer"}).find_all("article", {"data-author": True})

    for ipost in thread:
        username = ipost.find("h4", {"class": "message-name"}).text
        user.append(cleanString(username.strip()))
        
        date_posted = ipost.find("ul", {"class": "message-attribution-main listInline"}).find("time").get("datetime")
        datetime_obj = datetime.strptime(date_posted, "%Y-%m-%dT%H:%M:%S%z")
        addDate.append(datetime_obj)
        
        
        feedback.append("-1")

    '''
    except:
        if soup.find('td', {"class": "trow1"}).text == " You do not have permission to access this page. ":
            user.append("-1")
            status.append(-1)
            interest.append(-1)
            reputation.append(-1)
            addDate.append(-1)
            post.append("NO ACCESS TO THIS PAGE!")
            sign.append(-1)
            feedback.append(-1)
    '''

        
        user_status = ipost.find("h5", {"class": "userTitle message-userTitle"}).text
        status.append(cleanString(user_status.strip()))
        
        user_lvl = ipost.find("div", {"class": "afAwardLevel"}).text
        reputation.append(cleanString(user_lvl.strip()))
        
        sign.append("-1")
        
        user_post = ipost.find("article", {"class": "message-body js-selectToQuote"}).text
        post.append(cleanString(user_post.strip()))
        
        interest.append("-1")
        
        

    # Populate the final variable (this should be a list with all fields scraped)

@ -178,7 +69,7 @@ def cryptBB_description_parser(soup):

 # This is the method to parse the Listing Pages (one page with many posts)

 def cryptBB_listing_parser(soup):
 def procrax_listing_parser(soup: Tag):

    board = "-1"       # board name (the previous level of the topic in the Forum categorization tree.
                       # For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
@ -193,59 +84,47 @@ def cryptBB_listing_parser(soup):
                       # Listing and Description pages)

    # Finding the board (should be just one)

    board = soup.find('span', {"class": "active"}).text
    board = cleanString(board.strip())

    # Finding the repeated tag that corresponds to the listing of topics

    itopics = soup.find_all('tr',{"class": "inline_row"})

    for itopic in itopics:

        # For each topic found, the structure to get the rest of the information can be of two types. Testing all of them
        # to don't miss any topic

        # Adding the topic to the topic list
        try:
            topics = itopic.find('span', {"class": "subject_old"}).find('a').text
        except:
            topics = itopic.find('span', {"class": "subject_new"}).find('a').text
        topics = re.sub("\[\w*\]", '', topics)
        topic.append(cleanString(topics))

        # Counting how many topics we have found so far

        nm = len(topic)

        # Adding the url to the list of urls
        try:
            link = itopic.find('span', {"class": "subject_old"}).find('a').get('href')
        except:
            link = itopic.find('span',{"class": "subject_new"}).find('a').get('href')
        link = cleanLink(link)
        href.append(link)

        # Finding the author of the topic
        ps = itopic.find('div', {"class":"author smalltext"}).find('a').text
        user = ps.strip()
        author.append(cleanString(user))

        # Finding the number of replies
        columns = itopic.findChildren('td',recursive=False)
        replies = columns[3].text

        posts.append(cleanString(replies))

        # Finding the number of Views
        tview = columns[4].text
        views.append(cleanString(tview))

        # If no information about when the topic was added, just assign "-1" to the variable

        addDate.append("-1")

    return organizeTopics("CryptBB", nm, topic, board, author, views, posts, href, addDate)
    li = soup.find("h1", {"class": "p-title-value"})
    board = cleanString(li.text.strip())

    threads_list: ResultSet[Tag] = soup.find("div", {"class": "structItemContainer-group js-threadList"}).find_all("div", {"data-author": True})
    
    nm = len(threads_list)
    
    for thread in threads_list:
        thread_title = thread.find("div", {"class": "structItem-title"}).text
        topic.append(cleanString(thread_title.strip()))
        
        thread_author = thread.get("data-author")
        author.append(cleanString(thread_author))
        
        thread_views = thread.find("dl", {"class": "pairs pairs--justified structItem-minor"}).find('dd').text
        views.append(cleanString(thread_views.strip()))
        
        thread_replies = thread.find("dl", {"class": "pairs pairs--justified"}).find('dd').text
        # All threads contain one topic post and reply posts
        thread_total_posts = str(1 + int(thread_replies))
        posts.append(thread_total_posts)
        
        thread_date = thread.find("li", {"class": "structItem-startDate"}).find("time").get("datetime")
        datetime_obj = datetime.strptime(thread_date, "%Y-%m-%dT%H:%M:%S%z")
        addDate.append(datetime_obj)
        
        thread_link = thread.find("div", {"class": "structItem-title"}).find('a').get('href')
        href.append(thread_link)
    
    
    return organizeTopics(
        forum="Procrax",
        nm=nm,
        board=board,
        author=author,
        topic=topic,
        views=views,
        posts=posts,
        addDate=addDate,
        href=href
    )


 def procrax_links_parser(soup):
--- a/MarketPlaces/AnonymousMarketplace/pycache/crawler_selenium.cpython-310.pyc
+++ b/MarketPlaces/AnonymousMarketplace/pycache/crawler_selenium.cpython-310.pyc
--- a/MarketPlaces/AnonymousMarketplace/pycache/crawler_selenium.cpython-311.pyc
+++ b/MarketPlaces/AnonymousMarketplace/pycache/crawler_selenium.cpython-311.pyc
--- a/MarketPlaces/AnonymousMarketplace/pycache/parser.cpython-310.pyc
+++ b/MarketPlaces/AnonymousMarketplace/pycache/parser.cpython-310.pyc
--- a/MarketPlaces/AnonymousMarketplace/pycache/parser.cpython-311.pyc
+++ b/MarketPlaces/AnonymousMarketplace/pycache/parser.cpython-311.pyc
--- a/MarketPlaces/DB_Connection/pycache/db_connection.cpython-311.pyc
+++ b/MarketPlaces/DB_Connection/pycache/db_connection.cpython-311.pyc
--- a/MarketPlaces/Initialization/pycache/init.cpython-310.pyc
+++ b/MarketPlaces/Initialization/pycache/init.cpython-310.pyc
--- a/MarketPlaces/Initialization/pycache/init.cpython-311.pyc
+++ b/MarketPlaces/Initialization/pycache/init.cpython-311.pyc
--- a/MarketPlaces/Initialization/pycache/markets_mining.cpython-310.pyc
+++ b/MarketPlaces/Initialization/pycache/markets_mining.cpython-310.pyc
--- a/MarketPlaces/Initialization/pycache/markets_mining.cpython-311.pyc
+++ b/MarketPlaces/Initialization/pycache/markets_mining.cpython-311.pyc
--- a/MarketPlaces/Initialization/pycache/prepare_parser.cpython-310.pyc
+++ b/MarketPlaces/Initialization/pycache/prepare_parser.cpython-310.pyc
--- a/MarketPlaces/Initialization/pycache/prepare_parser.cpython-311.pyc
+++ b/MarketPlaces/Initialization/pycache/prepare_parser.cpython-311.pyc
--- a/MarketPlaces/Initialization/geckodriver.log
+++ b/MarketPlaces/Initialization/geckodriver.log
@ -15617,3 +15617,73 @@ unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
 1689619116242	geckodriver	INFO	Listening on 127.0.0.1:57366
 1689619118954	mozrunner::runner	INFO	Running command: "C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "57367" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\MINHKH~1\\AppData\\Local\\Temp\\rust_mozprofile0Dg5aD"
 console.log: "TorSettings: loadFromPrefs()"
 console.log: "TorConnect: init()"
 console.log: "TorConnect: Entering Initial state"
 console.log: "TorConnect: Observed profile-after-change"
 console.log: "TorConnect: Observing topic 'TorProcessExited'"
 console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
 console.log: "TorConnect: Observing topic 'torsettings:ready'"
 console.log: "TorSettings: Observed profile-after-change"
 1689619119382	Marionette	INFO	Marionette enabled
 console.log: "TorConnect: Will load after bootstrap => [about:blank]"
 console.error: "Could not load engine [email protected]: Error: Extension is invalid"
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
 DevTools listening on ws://localhost:57367/devtools/browser/26c42825-1d86-4c6a-ad3b-817e084e0b36
 1689619120284	Marionette	INFO	Listening on port 57373
 1689619120428	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\minhkhoitran\AppData\Local\Temp\rust_mozprofile0Dg5aD\thumbnails) because it does not exist
 1689619308722	Marionette	INFO	Stopped listening on port 57373
 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
 JavaScript error: chrome://remote/content/marionette/cert.js, line 55: NS_ERROR_NOT_AVAILABLE: Component returned failure code: 0x80040111 (NS_ERROR_NOT_AVAILABLE) [nsICertOverrideService.setDisableAllSecurityChecksAndLetAttackersInterceptMyData]
 !!! error running onStopped callback: TypeError: callback is not a function
 JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
 JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\minhkhoitran\AppData\Local\Temp\rust_mozprofile0Dg5aD\thumbnails) because it does not exist
 Crash Annotation GraphicsCriticalError: |[C0][GFX1-]: Receive IPC close with reason=AbnormalShutdown (t=1960.99) 
 ###!!! [Child][MessageChannel] Error: (msgtype=0x3900E5,name=PContent::Msg_GraphicsError) Channel closing: too late to send/recv, messages will be lost

 [GFX1-]: Receive IPC close with reason=AbnormalShutdown
 1689619309292	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
 unwatchForTabs()@TargetList.jsm:70
 unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
 1689619356214	geckodriver	INFO	Listening on 127.0.0.1:57526
 1689619360407	mozrunner::runner	INFO	Running command: "C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "57527" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\MINHKH~1\\AppData\\Local\\Temp\\rust_mozprofileUEfwdk"
 console.log: "TorSettings: loadFromPrefs()"
 console.log: "TorConnect: init()"
 console.log: "TorConnect: Entering Initial state"
 console.log: "TorConnect: Observed profile-after-change"
 console.log: "TorConnect: Observing topic 'TorProcessExited'"
 console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
 console.log: "TorConnect: Observing topic 'torsettings:ready'"
 console.log: "TorSettings: Observed profile-after-change"
 1689619360903	Marionette	INFO	Marionette enabled
 console.log: "TorConnect: Will load after bootstrap => [about:blank]"
 console.error: "Could not load engine [email protected]: Error: Extension is invalid"
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
 JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
 DevTools listening on ws://localhost:57527/devtools/browser/85530b1c-e7e2-4313-8c36-704d0f5ce7da
 1689619362005	Marionette	INFO	Listening on port 57534
 1689619362321	RemoteAgent	WARN	TLS certificate errors will be ignored for this session
 1689619608554	Marionette	INFO	Stopped listening on port 57534
 JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
 !!! error running onStopped callback: TypeError: callback is not a function
 JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
 JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\minhkhoitran\AppData\Local\Temp\rust_mozprofileUEfwdk\thumbnails) because it does not exist
 1689619609120	RemoteAgent	ERROR	unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]"  nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)"  location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64"  data: no] Stack trace: stop()@TargetObserver.jsm:64
 unwatchForTabs()@TargetList.jsm:70
 unwatchForTargets()@TargetList.jsm:37
 destructor()@TargetList.jsm:109
 stop()@CDP.jsm:104
 close()@RemoteAgent.jsm:138
--- a/MarketPlaces/Initialization/marketsList.txt
+++ b/MarketPlaces/Initialization/marketsList.txt
@ -1 +1 @@
 AnonymousMarketplace
 M00nkeyMarket
--- a/MarketPlaces/M00nkeyMarket/pycache/crawler_selenium.cpython-310.pyc
+++ b/MarketPlaces/M00nkeyMarket/pycache/crawler_selenium.cpython-310.pyc
--- a/MarketPlaces/M00nkeyMarket/pycache/crawler_selenium.cpython-311.pyc
+++ b/MarketPlaces/M00nkeyMarket/pycache/crawler_selenium.cpython-311.pyc
--- a/MarketPlaces/M00nkeyMarket/pycache/parser.cpython-310.pyc
+++ b/MarketPlaces/M00nkeyMarket/pycache/parser.cpython-310.pyc
--- a/MarketPlaces/M00nkeyMarket/pycache/parser.cpython-311.pyc
+++ b/MarketPlaces/M00nkeyMarket/pycache/parser.cpython-311.pyc
--- a/MarketPlaces/M00nkeyMarket/crawler_selenium.py
+++ b/MarketPlaces/M00nkeyMarket/crawler_selenium.py
@ -27,25 +27,24 @@ from MarketPlaces.M00nkeyMarket.parser import m00nkey_links_parser
 from MarketPlaces.Utilities.utilities import cleanHTML

 counter = 1
 baseURL = 'http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/'

 BASE_URL = 'http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/'
 MARKET_NAME = 'M00nkeyMarket'

 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    opentor()
    driver = getAccess()
    
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)

    new_parse(mktName, baseURL, False)
    new_parse(MARKET_NAME, BASE_URL, False)


 # Opens Tor Browser
@ -64,16 +63,16 @@ def opentor():

 # Returns the name of the website
 #return: name of site in string type
 def getMKTName():
    name = 'M00nkeyMarket'
    return name
 # def getMKTName():
 #     name = 'M00nkeyMarket'
 #     return name


 # Return the base link of the website
 #return: url of base site in string type
 def getFixedURL():
    url = 'http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/'
    return url
 # def getFixedURL():
 #     url = 'http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/'
 #     return url


 # Closes Tor Browser
@ -127,10 +126,9 @@ def createFFDriver():
 #the driver 'gets' the url, attempting to get on the site, if it can't access return 'down'
 #return: return the selenium driver or string 'down'
 def getAccess():
    url = getFixedURL()
    driver = createFFDriver()
    try:
        driver.get(url)
        driver.get(BASE_URL)
        return driver
    except:
        driver.close()
@ -175,7 +173,7 @@ def savePage(page, url):
 def getFullPathName(url):
    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE

    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + MARKET_NAME + "/HTML_Pages")
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
@ -237,7 +235,7 @@ def crawlForum(driver):
            while has_next_page:
                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    itemURL = urlparse.urljoin(BASE_URL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
--- a/MarketPlaces/Tor2door/pycache/crawler_selenium.cpython-310.pyc
+++ b/MarketPlaces/Tor2door/pycache/crawler_selenium.cpython-310.pyc
--- a/MarketPlaces/Tor2door/pycache/crawler_selenium.cpython-311.pyc
+++ b/MarketPlaces/Tor2door/pycache/crawler_selenium.cpython-311.pyc
--- a/MarketPlaces/Tor2door/pycache/parser.cpython-310.pyc
+++ b/MarketPlaces/Tor2door/pycache/parser.cpython-310.pyc
--- a/MarketPlaces/Tor2door/pycache/parser.cpython-311.pyc
+++ b/MarketPlaces/Tor2door/pycache/parser.cpython-311.pyc
--- a/MarketPlaces/TorBay/pycache/crawler_selenium.cpython-310.pyc
+++ b/MarketPlaces/TorBay/pycache/crawler_selenium.cpython-310.pyc
--- a/MarketPlaces/TorBay/pycache/crawler_selenium.cpython-311.pyc
+++ b/MarketPlaces/TorBay/pycache/crawler_selenium.cpython-311.pyc
--- a/MarketPlaces/TorBay/pycache/parser.cpython-310.pyc
+++ b/MarketPlaces/TorBay/pycache/parser.cpython-310.pyc
--- a/MarketPlaces/TorBay/pycache/parser.cpython-311.pyc
+++ b/MarketPlaces/TorBay/pycache/parser.cpython-311.pyc