added new prepare_parser.py and cleaned output for marketplaces

1 year ago · 1a1e887d35
--- a/Forums/BestCardingWorld/init.py
+++ b/Forums/BestCardingWorld/init.py
--- a/Forums/CryptBB/init.py
+++ b/Forums/CryptBB/init.py
--- a/Forums/CryptBB/captcha.png
+++ b/Forums/CryptBB/captcha.png
--- a/Forums/Initialization/forums_mining.py
+++ b/Forums/Initialization/forums_mining.py
@ -135,7 +135,7 @@ if __name__ == '__main__':
        elif forum == 'Libre':
            crawlerLibre()
    print("Scraping process completed!")
    print("\nScraping process completed!")
--- a/Forums/Initialization/prepare_parser.py
+++ b/Forums/Initialization/prepare_parser.py
@ -112,8 +112,6 @@ def parse_listing(forum, listingFile, soup, createLog, logFile):
    try:
        rw = []
        if forum == "BestCardingWorld":
            rw = bestcardingworld_listing_parser(soup)
        elif forum == "Cardingleaks":
@ -128,16 +126,19 @@ def parse_listing(forum, listingFile, soup, createLog, logFile):
            rw = procrax_listing_parser(soup)
        elif forum == "Libre":
            rw = libre_listing_parser(soup)
        else:
            print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
            raise Exception
        return rw
    except:
        incrementError()
        print("There was a problem to read the file " + listingFile + " in the listing section!")
        print("There was a problem to parse the file " + listingFile + " in the listing section!")
        traceback.print_exc()
        if createLog:
            logFile.write(
                str(nError) + ". There was a problem to read the file " + listingFile + " in the Listing section.\n")
                str(nError) + ". There was a problem to parse the file " + listingFile + " in the Listing section.\n")
        return None
@ -145,8 +146,6 @@ def parse_description(forum, descriptionFile, soup, createLog, logFile):
    try:
        rmm = []
        if forum == "BestCardingWorld":
            rmm = bestcardingworld_description_parser(soup)
        elif forum == "Cardingleaks":
@ -161,6 +160,9 @@ def parse_description(forum, descriptionFile, soup, createLog, logFile):
            rmm = procrax_description_parser(soup)
        elif forum == "Libre":
            rmm = libre_description_parser(soup)
        else:
            print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!")
            raise Exception
        return rmm
    except:
@ -239,7 +241,9 @@ def new_parse(forum, url, createLog):
            logFile = open(mainDir + f"/{CURRENT_DATE}/" + forum + "_" + CURRENT_DATE + ".log", "w")
        except:
            print("Could not open log file!")
            raise SystemExit
            createLog = False
            logFile = None
            # raise SystemExit
    else:
        logFile = None
--- a/MarketPlaces/AnonymousMarketplace/crawler_selenium.py
+++ b/MarketPlaces/AnonymousMarketplace/crawler_selenium.py
@ -32,19 +32,19 @@ baseURL = 'http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
    if driver != 'down':
        try:
            login(driver)
            # login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
    # new_parse(mktName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -104,7 +104,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 2)
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -120,6 +120,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -185,11 +187,11 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []
    # carding
    links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/carding/')
    # # carding
    # links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/carding/')
    # # hacked paypal
    links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacked-paypal-accounts/')
    # # hacking services
    # links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacked-paypal-accounts/')
    # hacking services
    links.append('http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/product-category/hacking-services/')
    return links
@ -250,7 +252,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling AnonymousMarketplace forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the AnonymousMarketplace market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/AnonymousMarketplace/parser.py
+++ b/MarketPlaces/AnonymousMarketplace/parser.py
@ -88,14 +88,14 @@ def anonymousMarketplace_listing_parser(soup: Tag):
    href = []                                 # 20 Product_Links
    product_list: ResultSet[Tag] = soup.find("ul", {"class": "products columns-4"}).find_all("li")
    product_list: ResultSet[Tag] = soup.find("ul", {"class": "product_list_widget"}).find_all("li")
    for item in product_list:
        item_href = item.find("a").get("href")
        href.append(item_href)
        item_name = item.find("h2", {"class": "woocommerce-loop-product__title"}).text
        name.append(cleanString(>'item_name'.strip()))
        item_name = item.find("span", {"class": "product-title"}).text
        name.append(cleanString(item_name.strip()))
        item_rating = item.find("div", {"class": "star-rating"}).find("strong", {"class": "rating"}).text
        rating_item.append(cleanString(item_rating.strip()))
@ -167,10 +167,10 @@ def anonymous_links_parser(soup):
    # Returning all links that should be visited by the Crawler
    href = []
    listing = soup.find('ul', {"class": "products columns-4"}).findAll('li')
    listing = soup.find('ul', {"class": "product_list_widget"}).findAll('li')
    for a in listing:
        bae = a.find('a', {"class": "woocommerce-LoopProduct-link woocommerce-loop-product__link"}, href=True)
        bae = a.find('a', href=True)
        link = bae['href']
        href.append(link)
--- a/MarketPlaces/Apocalypse/crawler_selenium.py
+++ b/MarketPlaces/Apocalypse/crawler_selenium.py
@ -34,17 +34,17 @@ baseURL = 'http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    # driver = getAccess()
    driver = getAccess()
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
    new_parse(mktName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -120,6 +120,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -201,8 +203,8 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []
    # Hacking Services
    links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/subcategory/19')
    # # Hacking Services
    # links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/subcategory/19')
    # software and malware
    links.append('http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/subcategory/30')
@ -244,7 +246,7 @@ def crawlForum(driver):
                    driver.back()
                    # comment out
                    break
                    # break
                # comment out
                if count == 1:
@ -264,7 +266,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling Apocalypse forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the Apocalypse market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/BlackPyramid/crawler_selenium.py
+++ b/MarketPlaces/BlackPyramid/crawler_selenium.py
@ -26,8 +26,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
 from MarketPlaces.BlackPyramid.parser import blackpyramid_links_parser
 from MarketPlaces.Utilities.utilities import cleanHTML
 config = configparser.ConfigParser()
 config.read('../../setup.ini')
 counter = 1
 baseURL = 'http://blackpyoc3gbnrlvxqvvytd3kxqj7pd226i2gvfyhysj24ne2snkmnyd.onion/login/'
@ -35,8 +33,8 @@ baseURL = 'http://blackpyoc3gbnrlvxqvvytd3kxqj7pd226i2gvfyhysj24ne2snkmnyd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # mktName = getMKTName()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
    if driver != 'down':
@ -47,12 +45,14 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    # new_parse(forumName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config
    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -91,6 +91,8 @@ def closetor(driver):
 # Creates FireFox 'driver' and configure its 'Profile'
 # to use Tor proxy and socket
 def createFFDriver():
    from MarketPlaces.Initialization.markets_mining import config
    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -119,6 +121,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -171,12 +175,14 @@ def savePage(driver, page, url):
 # Gets the full path of the page to be saved along with its appropriate file name
 #@param: raw url as crawler crawls through every site
 def getFullPathName(url):
    from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        fullPath = r'..\BlackPyramid\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
    else:
        fullPath = r'..\BlackPyramid\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
    return fullPath
@ -210,6 +216,7 @@ def getInterestedLinks():
    # links.append('http://blackpyoc3gbnrlvxqvvytd3kxqj7pd226i2gvfyhysj24ne2snkmnyd.onion/search/results/')
    # # Services
    # links.append('http://blackpyoc3gbnrlvxqvvytd3kxqj7pd226i2gvfyhysj24ne2snkmnyd.onion/search/results/')
    return links
@ -270,7 +277,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling BlackPyramid forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the BlackPyramid market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/CityMarket/crawler_selenium.py
+++ b/MarketPlaces/CityMarket/crawler_selenium.py
@ -33,8 +33,8 @@ baseURL = 'http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # mktName = getMKTName()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
    if driver != 'down':
@ -45,7 +45,7 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    # new_parse(forumName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -121,6 +121,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -198,18 +200,18 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []
    # Hiring hacker
    links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3')
    # # Hiring hacker
    # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3')
    # virus and malware
    links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=15')
    # ddos
    links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16')
    # software
    links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=17')
    # botnets
    links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=18')
    # hacking service
    links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=31')
    # # ddos
    # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16')
    # # software
    # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=17')
    # # botnets
    # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=18')
    # # hacking service
    # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=31')
    return links
@ -269,7 +271,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling CityMarket forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the CityMarket market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/CypherMarketplace/crawler_selenium.py
+++ b/MarketPlaces/CypherMarketplace/crawler_selenium.py
@ -32,8 +32,8 @@ baseURL = 'http://6c5qa2ke2esh6ake6u6yoxjungz2czbbl7hqxl75v5k37frtzhxuk7ad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # mktName = getMKTName()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
    if driver != 'down':
@ -44,7 +44,7 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    # new_parse(forumName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -120,6 +120,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -263,7 +265,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling CypherMarketplace forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the CypherMarketplace market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/DarkFox/captcha.png
+++ b/MarketPlaces/DarkFox/captcha.png
--- a/MarketPlaces/DarkFox/crawler_selenium.py
+++ b/MarketPlaces/DarkFox/crawler_selenium.py
@ -30,7 +30,7 @@ baseURL = 'http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
@ -42,7 +42,7 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    new_parse(mktName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -124,6 +124,7 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -145,6 +146,7 @@ def getAccess():
 # then allows for manual solving of captcha in the terminal
 #@param: current selenium web driver
 def captcha(driver):
    '''
    # wait for captcha page show up
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div/div/form/button[1]")))
@ -168,6 +170,9 @@ def captcha(driver):
    # click the verify(submit) button
    driver.find_element(by=By.XPATH, value="/html/body/div/div/form/button[1]").click()
    '''
    input("Press ENTER when CAPTCHA is completed\n")
    # wait for listing page show up (This Xpath may need to change based on different seed url)
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
@ -220,8 +225,7 @@ def getInterestedLinks():
    # # Digital Products
    # links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/0e384d5f-26ef-4561-b5a3-ff76a88ab781')
    # Software and Malware
    # links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/6b71210f-f1f9-4aa3-8f89-bd9ee28f7afc')
    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/6b71210f-f1f9-4aa3-8f89-bd9ee28f7afc?page=15')
    links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/6b71210f-f1f9-4aa3-8f89-bd9ee28f7afc')
    # # Services
    # links.append('http://57d5j6bbwlpxbxe5tsjjy3vziktv3fo2o5j3nheo4gpg6lzpsimzqzid.onion/category/b9dc5846-5024-421e-92e6-09ba96a03280')
    # # Miscellaneous
@ -270,7 +274,7 @@ def crawlForum(driver):
                    break
                # comment out
                if count == 0:
                if count == 1:
                    break
                try:
@ -287,7 +291,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling BestCardingWorld forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the DarkFox market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/DarkMatter/crawler_selenium.py
+++ b/MarketPlaces/DarkMatter/crawler_selenium.py
@ -32,7 +32,7 @@ baseURL = 'http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
@ -44,7 +44,7 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    new_parse(mktName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -121,6 +121,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -185,15 +187,15 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []
    # digital fraud software
    links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=76')
    # legit
    links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=78')
    # # digital fraud software
    # links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=76')
    # # legit
    # links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=78')
    # # hack guides
    links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=94')
    # links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=94')
    # # services
    links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=117')
    # # software/malware
    # links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=117')
    # software/malware
    links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=121')
    return links
@ -236,16 +238,14 @@ def crawlForum(driver):
                    driver.back()
                     # to keep from detecting click speed
                #     # comment out
                #     break
                #
                # # comment out
                # if count == 1:
                #     break
                    # comment out
                    break
                # comment out
                if count == 1:
                    break
                try:
                    # nav = driver.find_element(by=By.XPATH, value='/html/body/table[1]/tbody/tr/td/form/div/div[2]/table[2]')
                    # a = nav.find_element(by=By.LINK_TEXT, value=">")
                    link = driver.find_element(by=By.LINK_TEXT, value=">").get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
@ -258,7 +258,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling DarkMatter forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the DarkMatter market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/DarkTor/crawler_selenium.py
+++ b/MarketPlaces/DarkTor/crawler_selenium.py
@ -31,8 +31,8 @@ baseURL = 'http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # mktName = getMKTName()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
    if driver != 'down':
@ -43,7 +43,7 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    # new_parse(forumName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -119,6 +119,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -186,10 +188,10 @@ def getInterestedLinks():
    # Hacking
    links.append('http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion/product-category/hacking-services/')
    # Carding
    links.append('http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion/product-category/carding/')
    # hacked paypals
    links.append('http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion/product-category/hacked-paypal-accounts/')
    # # Carding
    # links.append('http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion/product-category/carding/')
    # # hacked paypals
    # links.append('http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion/product-category/hacked-paypal-accounts/')
    return links
@ -248,7 +250,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling DarkTor forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the DarkTor market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/DigitalThriftShop/crawler_selenium.py
+++ b/MarketPlaces/DigitalThriftShop/crawler_selenium.py
@ -34,17 +34,17 @@ baseURL = 'http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    # driver = getAccess()
    driver = getAccess()
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
    new_parse(mktName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -91,7 +91,6 @@ def closetor(driver):
 def createFFDriver():
    from MarketPlaces.Initialization.markets_mining import config
    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -120,6 +119,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -189,8 +190,8 @@ def getInterestedLinks():
    links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/botnets/')
    # # data leak
    # links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/dataleak/')
    # databases
    links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/databases/')
    # # databases
    # links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/databases/')
    # # ransomware
    # links.append('http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/product-category/ransomware/')
    # # rats
@ -234,10 +235,10 @@ def crawlForum(driver):
                    driver.back()
                    # comment out
                    # break
                    break
                # comment out
                if count == 10:
                if count == 1:
                    break
                try:
@ -254,7 +255,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling DigitalThriftShop forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the DigitalThriftShop market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/HiddenMarket/crawler_selenium.py
+++ b/MarketPlaces/HiddenMarket/crawler_selenium.py
@ -29,7 +29,7 @@ baseURL = 'http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion
 # Opens Tor Browser, crawls the website
 def startCrawling():
    opentor()
    # opentor()
    marketName = getMKTName()
    driver = getAccess()
@ -41,7 +41,7 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    new_parse(marketName, baseURL, False)
    new_parse(marketName, baseURL, True)
 # Opens Tor Browser
@ -161,6 +161,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -211,27 +213,27 @@ def getInterestedLinks():
    links = []
    # # Civil Software
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/civil_softwares')
    # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/civil_softwares')
    # # Tutorials - Carding
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/carding')
    # # Digital - Hacks
    # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/carding')
    # Digital - Hacks
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/hacks')
    # Digital - Exploit Kit
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/exploit_kit')
    # # Digital - Exploit Kit
    # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/exploit_kit')
    # # 0Day
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/0day')
    # Digital Forensics
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/digital_forensics')
    # Tutorials - Mining
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/mining')
    # Tutorials - Worms
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/worms')
    # Tutorials - Viruses
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/viruses')
    # Tutorials - Trojans
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/trojans')
    # Tutorials - Botnets
    links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/botnets')
    # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/0day')
    # # Digital Forensics
    # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/digital_forensics')
    # # Tutorials - Mining
    # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/mining')
    # # Tutorials - Worms
    # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/worms')
    # # Tutorials - Viruses
    # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/viruses')
    # # Tutorials - Trojans
    # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/trojans')
    # # Tutorials - Botnets
    # links.append('http://mipx6eedtsvfgfcmbm3utjedgjez2w4dzjdrbhtd2mt3cicwhhzspxqd.onion/category/botnets')
    return links
@ -275,11 +277,11 @@ def crawlForum(driver):
                    driver.back()
                    # comment out
                    # break
                    break
                # comment out
                # if count == 2:
                #     break
                if count == 1:
                    break
                try:
                    pageCount += 1
@ -296,7 +298,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling HiddenMarket market done sucessfully. Press ENTER to continue\n")
    print("Crawling the HiddenMarket market done.")
 # Returns 'True' if the link is Topic link
--- a/MarketPlaces/Initialization/marketsList.txt
+++ b/MarketPlaces/Initialization/marketsList.txt
@ -1 +1,8 @@
 HiddenMarket
 Apocalypse
 DarkMatter
 DigitalThriftShop
 HiddenMarket
 Nexus
 Robinhood
 TorBay
 ViceCity
--- a/MarketPlaces/Initialization/markets_mining.py
+++ b/MarketPlaces/Initialization/markets_mining.py
@ -4,7 +4,6 @@ __author__ = 'DarkWeb'
 Starting point of the Darkweb Markets Mining
 '''
 import os
 from datetime import *
 from MarketPlaces.DarkFox.crawler_selenium import crawler as crawlerDarkFox
 from MarketPlaces.Tor2door.crawler_selenium import crawler as crawlerTor2door
@ -24,9 +23,11 @@ from MarketPlaces.ViceCity.crawler_selenium import crawler as crawlerViceCity
 from MarketPlaces.HiddenMarket.crawler_selenium import crawler as crawlerHiddenMarket
 from MarketPlaces.RobinhoodMarket.crawler_selenium import crawler as crawlerRobinhoodMarket
 from MarketPlaces.Nexus.crawler_selenium import crawler as crawlerNexus
 from MarketPlaces.CypherMarketplace.crawler_selenium import crawler as crawlerCypher
 import configparser
 import time
 import os
 import subprocess
 config = configparser.ConfigParser()
 config.read('../../setup.ini')
@ -71,18 +72,34 @@ def createDirectory(mkt):
        os.mkdir(descReadDir)
 # Opens Tor Browser
 def opentor():
    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
    pid = pro.pid
    # time.sleep(7.5)
    input('Press ENTER when Tor is connected to continue')
    return
 if __name__ == '__main__':
    # opentor()
    mktsList = getMarkets()
    for mkt in mktsList:
        mkt = mkt.replace('\n','')
        print(f"Creating listing and description directories of {mkt} ...")
        print("\nCreating listing and description directories ... for " + mkt)
        createDirectory(mkt)
        time.sleep(5)
        input("Directories created successfully. Press ENTER to continue\n")
        print("Directories created.")
        if mkt == "DarkFox":
            # for base in json["DarkFox"]["base"]:
            #     if crawlerDarkFox(base["url"], base["categories"]):
            #         break
            crawlerDarkFox()
        elif mkt == 'Tor2door':
            crawlerTor2door()
@ -118,7 +135,7 @@ if __name__ == '__main__':
            crawlerRobinhoodMarket()
        elif mkt == "Nexus":
            crawlerNexus()
        elif mkt == "CypherMarketplace":
            crawlerCypher()
    print("Scraping process completed successfully!")
    print("\nScraping process completed!")
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@ -20,9 +20,12 @@ from MarketPlaces.TorMarket.parser import *
 from MarketPlaces.HiddenMarket.parser import *
 from MarketPlaces.RobinhoodMarket.parser import *
 from MarketPlaces.Nexus.parser import *
 from MarketPlaces.MikesGrandStore.parser import *
 from MarketPlaces.Classifier.classify_product import predict
 nError = 0
 def mergePages(rmm, rec):
@ -82,13 +85,182 @@ def persist_data(url, row, cur):
    create_items(cur, row, marketPlace, vendor)
 def incrementError():
    global nError
    nError += 1
 def read_file(filePath, createLog, logFile):
    try:
        html = codecs.open(filePath.strip('\n'), encoding='utf8')
        soup = BeautifulSoup(html, "html.parser")
        html.close()
        return soup
    except:
        try:
            html = open(filePath.strip('\n'))
            soup = BeautifulSoup(html, "html.parser")
            html.close()
            return soup
        except:
            incrementError()
            print("There was a problem to read the file " + filePath)
            if createLog:
                logFile.write(
                    str(nError) + ". There was a problem to read the file " + filePath + "\n")
            return None
 def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
    try:
        if marketPlace == "DarkFox":
            rw = darkfox_listing_parser(soup)
        elif marketPlace == "Tor2door":
            rw = tor2door_listing_parser(soup)
        elif marketPlace == "Apocalypse":
            rw = apocalypse_listing_parser(soup)
        elif marketPlace == "ThiefWorld":
            rw = thiefWorld_listing_parser(soup)
        elif marketPlace == "AnonymousMarketplace":
            rw = anonymousMarketplace_listing_parser(soup)
        elif marketPlace == "ViceCity":
            rw = vicecity_listing_parser(soup)
        elif marketPlace == "TorBay":
            rw = torbay_listing_parser(soup)
        elif marketPlace == "M00nkeyMarket":
            rw = m00nkey_listing_parser(soup)
        elif marketPlace == "HiddenMarket":
            rw = hiddenmarket_listing_parser(soup)
        elif marketPlace == "DarkMatter":
            rw = darkmatter_listing_parser(soup)
        elif marketPlace == "DigitalThriftShop":
            rw = digitalThriftShop_listing_parser(soup)
        elif marketPlace == "LionMarketplace":
            rw = lionmarketplace_listing_parser(soup)
        elif marketPlace == "TorMarket":
            rw = tormarket_listing_parser(soup)
        elif marketPlace == "RobinhoodMarket":
            rw = Robinhood_listing_parser(soup)
        elif marketPlace == "Nexus":
            rw = nexus_listing_parser(soup)
        elif marketPlace == "MikesGrandStore":
            rw = mikesGrandStore_listing_parser(soup)
        else:
            print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
            raise Exception
        return rw
    except:
        incrementError()
        print("There was a problem to parse the file " + listingFile + " in the listing section!")
        traceback.print_exc()
        if createLog:
            logFile.write(
                str(nError) + ". There was a problem to parse the file " + listingFile + " in the Listing section.\n")
        return None
 def parse_description(marketPlace, descriptionFile, soup, createLog, logFile):
    try:
        if marketPlace == "DarkFox":
            rmm = darkfox_description_parser(soup)
        elif marketPlace == "Tor2door":
            rmm = tor2door_description_parser(soup)
        elif marketPlace == "Apocalypse":
            rmm = apocalypse_description_parser(soup)
        elif marketPlace == "ThiefWorld":
            rmm = thiefWorld_description_parser(soup)
        elif marketPlace == "AnonymousMarketplace":
            rmm = anonymousMarketplace_description_parser(soup)
        elif marketPlace == "ViceCity":
            rmm = vicecity_description_parser(soup)
        elif marketPlace == "TorBay":
            rmm = torbay_description_parser(soup)
        elif marketPlace == "M00nkeyMarket":
            rmm = m00nkey_description_parser(soup)
        elif marketPlace == "HiddenMarket":
            rmm = hiddenmarket_description_parser(soup)
        elif marketPlace == "DarkMatter":
            rmm = darkmatter_description_parser(soup)
        elif marketPlace == "DigitalThriftShop":
            rmm = digitalThriftShop_description_parser(soup)
        elif marketPlace == "LionMarketplace":
            rmm = lionmarketplace_description_parser(soup)
        elif marketPlace == "TorMarket":
            rmm = tormarket_description_parser(soup)
        elif marketPlace == "RobinhoodMarket":
            rmm = Robinhood_description_parser(soup)
        elif marketPlace == "Nexus":
            rmm = nexus_description_parser(soup)
        elif marketPlace == "MikesGrandStore":
            rmm = mikesGrandStore_description_parser(soup)
        else:
            print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!")
            raise Exception
        return rmm
    except:
        incrementError()
        print("There was a problem to parse the file " + descriptionFile + " in the Description section!")
        traceback.print_exc()
        if createLog:
            logFile.write(
                str(nError) + ". There was a problem to parse the file " + descriptionFile + " in the Description section.\n")
        return None
 def persist_record(url, rec, cur, con, createLog, logFile, listingFile, descriptionFile):
    try:
        persist_data(url, tuple(rec), cur)
        con.commit()
        return True
    except:
        con.rollback()
        trace = traceback.format_exc()
        if trace.find("already exists") == -1:
            incrementError()
            print(f"There was a problem to persist the files ({listingFile} + {descriptionFile}) in the database!")
            traceback.print_exc()
            if createLog:
                logFile.write(
                    str(nError) + f". There was a problem to persist the files ({listingFile} + {descriptionFile}) in the database!\n")
            return False
        else:
            return True
 def move_file(filePath, createLog, logFile):
    # source = line2.replace(os.path.basename(line2), "") + filename
    source = filePath
    destination = filePath.replace(os.path.basename(filePath), "") + r'Read/'
    try:
        shutil.move(source, destination)
        return True
    except:
        print("There was a problem to move the file " + filePath)
        incrementError()
        if createLog:
            logFile.write(
                str(nError) + ". There was a problem to move the file " + filePath + "\n")
        return False
 def new_parse(marketPlace, url, createLog):
    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
    print("Parsing the " + marketPlace + " marketplace and conduct data classification to store the information in the database.")
    # ini = time.time()
    print("Parsing the " + marketPlace + " market and conduct data classification to store the information in the database.")
    # Connecting to the database
    con = connectDataBase()
@ -97,271 +269,131 @@ def new_parse(marketPlace, url, createLog):
    # Creating the tables (The database should be created manually)
    create_database(cur, con)
    nError = 0
    lines = []  # listing pages
    lns = []    # description pages
    detPage = {}
    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + marketPlace + "/HTML_Pages")
    #Creating the log file for each Market Place
    # Creating the log file for each Forum
    if createLog:
        if not os.path.exists("./" + marketPlace + "/Logs/" + marketPlace + "_" + CURRENT_DATE + ".log"):
            logFile = open("./" + marketPlace + "/Logs/" + marketPlace + "_" + CURRENT_DATE + ".log", "w")
        else:
            print("Files of the date " + CURRENT_DATE + " from the Market Place " + marketPlace +
                  " were already read. Delete the referent information in the Data Base and also delete the log file"
                  " in the _Logs folder to read files from this Market Place of this date again.")
            raise SystemExit
    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + marketPlace + "/HTML_Pages")
        try:
            logFile = open(mainDir + f"/{CURRENT_DATE}/" + marketPlace + "_" + CURRENT_DATE + ".log", "w")
        except:
            print("Could not open log file!")
            createLog = False
            logFile = None
            # raise SystemExit
    else:
        logFile = None
    # Reading the Listing Html Pages
    for fileListing in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing", '*.html')):
        lines.append(fileListing)
    listings = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing", '*.html'))
    for listingIndex, listingFile in enumerate(listings):
    # Reading the Description Html Pages
    for fileDescription in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", '*.html')):
        lns.append(fileDescription)
        print("Reading listing folder of '" + marketPlace + "', file '" + os.path.basename(listingFile) + "', index= " + str(
            listingIndex + 1) + " ... " + str(len(listings)))
    # Parsing the Description Pages and put the tag's content into a dictionary (Hash table)
    for index, line2 in enumerate(lns):
        listingSoup = read_file(listingFile, createLog, logFile)
        print("Reading description folder of '" + marketPlace + "', file '" + os.path.basename(line2) + "', index= " + str(index + 1) + " ... " + str(len(lns)))
        # listing flags
        doParseListing = listingSoup is not None
        doDescription = False
        try:
            html = codecs.open(line2.strip('\n'), encoding='utf8')
            soup = BeautifulSoup(html, "html.parser")
            html.close()
        except:
        readDescriptionError = False
        parseDescriptionError = False
        persistDescriptionError = False
        moveDescriptionError = False
        findDescriptionError = False
            try:
                html = open(line2.strip('\n'))
                soup = BeautifulSoup(html, "html.parser")
                html.close()
            except:
        rw = []
                nError += 1
                print("There was a problem to read the file " + line2 + " in the Description section!")
                if createLog:
                    logFile.write(str(nError) + ". There was a problem to read the file " + line2 + " in the Description section.\n")
                continue
        if doParseListing:
        try:
            rw = parse_listing(marketPlace, listingFile, listingSoup, createLog, logFile)
            if marketPlace == "DarkFox":
                rmm = darkfox_description_parser(soup)
            elif marketPlace == "Tor2door":
                rmm = tor2door_description_parser(soup)
            elif marketPlace == "Apocalypse":
                rmm = apocalypse_description_parser(soup)
            elif marketPlace == "ThiefWorld":
                rmm = thiefWorld_description_parser(soup)
            elif marketPlace =="AnonymousMarketplace":
                rmm = anonymousMarketplace_description_parser(soup)
            elif marketPlace == "ViceCity":
                rmm = vicecity_description_parser(soup)
            elif marketPlace == "TorBay":
                rmm = torbay_description_parser(soup)
            elif marketPlace == "M00nkeyMarket":
                rmm = m00nkey_description_parser(soup)
            elif marketPlace == "HiddenMarket":
                rmm = hiddenmarket_description_parser(soup)
            elif marketPlace == "DarkMatter":
                rmm = darkmatter_description_parser(soup)
            elif marketPlace == "DigitalThriftShop":
                rmm = digitalThriftShop_description_parser(soup)
            elif marketPlace == "LionMarketplace":
                rmm = lionmarketplace_description_parser(soup)
            elif marketPlace == "TorMarket":
                rmm = tormarket_description_parser(soup)
            elif marketPlace == "RobinhoodMarket":
                rmm = Robinhood_description_parser(soup)
            elif marketPlace == "Nexus":
                rmm = nexus_description_parser(soup)
            # key = u"Pr:" + rmm[0].upper()[:desc_lim1] + u" Vendor:" + rmm[13].upper()[:desc_lim2]
            key = u"Url:" + os.path.basename(line2).replace(".html", "")
            # save file address with description record in memory
            detPage[key] = {'rmm': rmm, 'filename': os.path.basename(line2)}
        except Exception as e:
            raise e
            nError += 1
            print("There was a problem to parse the file " + line2 + " in the Description section!")
            if createLog:
                logFile.write(str(nError) + ". There was a problem to parse the file " + line2 + " in the Description section.\n")
            doDescription = rw is not None
    # Parsing the Listing Pages and put the tag's content into a list
    for index, line1 in enumerate(lines):
        if doDescription:
        print("Reading listing folder of '" + marketPlace + "', file '" + os.path.basename(line1) + "', index= " + str(index + 1) + " ... " + str(len(lines)))
            nFound = 0
        readError = False
        try:
            html = codecs.open(line1.strip('\n'), encoding='utf8')
            soup = BeautifulSoup(html, "html.parser")
            html.close()
        except:
            for rec in rw:
            try:
                html = open(line1.strip('\n'))
                soup = BeautifulSoup(html, "html.parser")
                html.close()
            except Exception as e:
                raise e
                nError += 1
                print("There was a problem to read the file " + line1 + " in the Listing section!")
                if createLog:
                    logFile.write(str(nError) + ". There was a problem to read the file " + line1 + " in the Listing section.\n")
                readError = True
        if not readError:
            parseError = False
            try:
                if marketPlace == "DarkFox":
                    rw = darkfox_listing_parser(soup)
                elif marketPlace == "Tor2door":
                    rw = tor2door_listing_parser(soup)
                elif marketPlace == "Apocalypse":
                    rw = apocalypse_listing_parser(soup)
                elif marketPlace == "ThiefWorld":
                    rw = thiefWorld_listing_parser(soup)
                elif marketPlace == "AnonymousMarketplace":
                    rw = anonymousMarketplace_listing_parser(soup)
                elif marketPlace == "ViceCity":
                    rw = vicecity_listing_parser(soup)
                elif marketPlace == "TorBay":
                    rw = torbay_listing_parser(soup)
                elif marketPlace == "M00nkeyMarket":
                    rw = m00nkey_listing_parser(soup)
                elif marketPlace == "HiddenMarket":
                    rw =hiddenmarket_listing_parser(soup)
                elif marketPlace == "DarkMatter":
                    rw = darkmatter_listing_parser(soup)
                elif marketPlace == "DigitalThriftShop":
                    rw = digitalThriftShop_listing_parser(soup)
                elif marketPlace == "LionMarketplace":
                    rw = lionmarketplace_listing_parser(soup)
                elif marketPlace == "TorMarket":
                    rw = tormarket_listing_parser(soup)
                elif marketPlace == "RobinhoodMarket":
                    rw = Robinhood_listing_parser(soup)
                elif marketPlace == "Nexus":
                    rw = nexus_listing_parser(soup)
                else:
                    parseError = True
            except Exception as e:
                nError += 1
                print("There was a problem to parse the file " + line1 + " in the listing section!")
                if createLog:
                    logFile.write(
                        str(nError) + ". There was a problem to parse the file " + line1 + " in the Listing section.\n")
                parseError = True
                rec = rec.split(',')
            if not parseError:
                descriptionPattern = cleanLink(rec[20]) + ".html"
                persistError = False
                moveError = False
                num_in_db = 0
                num_persisted_moved = 0
                # Reading the associated description Html Pages
                descriptions = glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", descriptionPattern))
                for rec in rw:
                nFound += len(descriptions)
                    rec = rec.split(',')
                for descriptionIndex, descriptionFile in enumerate(descriptions):
                    # if len(detPage) > 0: #It was created here just because Zeroday Market does not have Description Pages
                    #   key = rec[23]
                    print("Reading description folder of '" + marketPlace + "', file '" + os.path.basename(
                        descriptionFile) + "', index= " + str(descriptionIndex + 1) + " ... " + str(len(descriptions)))
                    # key = u"Pr:" + rec[1].upper()[:list_lim1] + u" Vendor:" + rec[18].upper()[:list_lim2]
                    key = u"Url:" + cleanLink(rec[20])
                    descriptionSoup = read_file(descriptionFile, createLog, logFile)
                    # if the associated description page is parsed
                    if key in detPage:
                    # description flags
                    doParseDescription = descriptionSoup is not None
                    doPersistRecord = False
                    doMoveDescription = False
                        # rec = mergePages(detPage, rec)
                    rmm = []
                    if doParseDescription:
                        rmm = parse_description(marketPlace, descriptionFile, descriptionSoup, createLog, logFile)
                        doPersistRecord = rmm is not None
                    else:
                        readDescriptionError = True
                        parseDescriptionError = True
                    if doPersistRecord:
                        # Combining the information from Listing and Description Pages
                        rmm = detPage[key]['rmm']
                        rec = mergePages(rmm, rec)
                        # Append to the list the classification of the product
                        # rec.append(str(predict(rec[1], rec[5], language='markets')))
                        # Append to the list the classification of the topic
                        rec.append(str(predict(rec[4], rec[5], language='sup_english')))
                        # Persisting the information in the database
                        try:
                            persist_data(url, tuple(rec), cur)
                            con.commit()
                        except Exception as e:
                            trace = traceback.format_exc()
                            if trace.find("already exists") == -1:
                                nError += 1
                                print("There was a problem to persist the file " + detPage[key]['filename'] + " in the database!")
                                if createLog:
                                    logFile.write(
                                        str(nError) + ". There was a problem to persist the file " + detPage[key]['filename'] + " in the database.\n")
                                persistError = True
                            con.rollback()
                        if not persistError:
                            # move description files of completed folder
                            source = line2.replace(os.path.basename(line2), "") + detPage[key]['filename']
                            destination = line2.replace(os.path.basename(line2), "") + r'Read/'
                            try:
                                shutil.move(source, destination)
                                num_persisted_moved += 1
                            except:
                                print("There was a problem to move the file " + detPage[key]['filename'] + " in the Description section!")
                                nError += 1
                                if createLog:
                                    logFile.write(
                                        str(nError) + ". There was a problem to move the file " + detPage[key]['filename'] + " in the Description section!.\n")
                                moveError = True
                    # if the associated description page is not read or not parsed
                        persistSuccess = persist_record(url, rec, cur, con, createLog, logFile, listingFile,
                                                        descriptionFile)
                        doMoveDescription = persistSuccess
                    else:
                        # query database
                        # if the product already exists:
                        #     num_in_db += 1
                        pass
                        parseDescriptionError = True
                # if number of products on listing page is equal to
                # the number of merged, persisted, and moved products plus
                # the number of products already in the database
                if not persistError and not moveError and len(rw) == (num_persisted_moved + num_in_db):
                    if doMoveDescription:
                    # move listing file to completed folder
                    source = line1
                    destination = line1.replace(os.path.basename(line1), "") + r'Read/'
                        # move description files of completed folder
                        moveSuccess = move_file(descriptionFile, createLog, logFile)
                    try:
                        shutil.move(source, destination)
                    except:
                        if not moveSuccess:
                            moveDescriptionError = True
                        nError += 1
                        print("There was a problem to move the file " + line1 + " in the Listing section!")
                        if createLog:
                            logFile.write(str(nError) + ". There was a problem to move the file " + line1 + " in the Listing section!.\n")
                    else:
                        moveDescriptionError = True
    # g.close ()
            if not (nFound > 0):
    if createLog:
        logFile.close()
                findDescriptionError = True
                incrementError()
                print(f"There was a problem to locate the file(s) for {listingFile} in the Description section!")
                if createLog:
                    logFile.write(
                        str(nError) + f". There was a problem to locate the file(s) for {listingFile}"
                                      f" in the Description section!\n")
    # end = time.time()
            if not (readDescriptionError or parseDescriptionError or persistDescriptionError
                    or moveDescriptionError or findDescriptionError):
                # move listing files of completed folder
                move_file(listingFile, createLog, logFile)
    # finalTime = float(end-ini)
    if createLog:
        logFile.close()
    # print (marketPlace + " Parsing Perfomed Succesfully in %.2f" %finalTime + "!")
    input("Parsing the " + marketPlace + " marketplace and data classification done successfully. Press ENTER to continue\n")
    print("Parsing the " + marketPlace + " market and data classification done.")
--- a/MarketPlaces/LionMarketplace/crawler_selenium.py
+++ b/MarketPlaces/LionMarketplace/crawler_selenium.py
@ -31,19 +31,19 @@ baseURL = 'http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
    if driver != 'down':
        try:
            login(driver)
            # login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
    new_parse(mktName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -103,7 +103,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 2)
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -119,6 +119,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -187,12 +189,12 @@ def getInterestedLinks():
    # Software/Malware
    links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/16')
    # Carding
    links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/20')
    # Hacking
    links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/ba142ac0-c7e7-11ec-9bd1-fdd89c3d3f91')
    # tutorial
    links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/19')
    # # Carding
    # links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/20')
    # # Hacking
    # links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/ba142ac0-c7e7-11ec-9bd1-fdd89c3d3f91')
    # # tutorial
    # links.append('http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/category/19')
    return links
@ -231,12 +233,12 @@ def crawlForum(driver):
                    savePage(driver, driver.page_source, item)
                    driver.back()
                #     # comment out
                #     break
                #
                # # comment out
                # if count == 1:
                #     break
                    # comment out
                    break
                # comment out
                if count == 1:
                    break
                try:
                    link = driver.find_element(by=By.XPATH, value=
@ -252,7 +254,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling LionMarketplace forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the LionMarketplace market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/M00nkeyMarket/crawler_selenium.py
+++ b/MarketPlaces/M00nkeyMarket/crawler_selenium.py
@ -34,17 +34,17 @@ MARKET_NAME = 'M00nkeyMarket'
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    # opentor()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    driver = getAccess()
    new_parse(MARKET_NAME, BASE_URL, False)
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
    new_parse(MARKET_NAME, BASE_URL, True)
 # Opens Tor Browser
@ -120,6 +120,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -203,7 +205,7 @@ def getInterestedLinks():
    # software
    links.append('http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/search/subcategories?subcategory=30')
    # # guides
    links.append('http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/search/subcategories?subcategory=17')
    # links.append('http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/search/subcategories?subcategory=17')
    return links
@ -243,11 +245,11 @@ def crawlForum(driver):
                    driver.back()
                    # comment out
                    # break
                    break
                # comment out
                # if count == 1:
                #     break
                if count == 1:
                    break
                try:
                    link = driver.find_element(by=By.LINK_TEXT, value='Next ›').get_attribute('href')
@ -262,7 +264,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling M00nkeyMarket done sucessfully. Press ENTER to continue\n")
    print("Crawling the M00nkeyMarket done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/MikesGrandStore/crawler_selenium.py
+++ b/MarketPlaces/MikesGrandStore/crawler_selenium.py
@ -31,47 +31,19 @@ baseURL = 'http://4yx2akutmkhwfgzlpdxiah7cknurw6vlddlq24fxa3r3ebophwgpvhyd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    print("Welcome to the darkweb pipeline. Do you want to run:")
    print("[A] Entire pipeline\t[B] Crawler only\t[C] Parser only")
    choice = input()
    while choice not in {'A', 'B', 'C'}:
        print("Choose the options below only!")
        print("[A] Entire pipeline\t[B] Crawler only\t[C] Parser only")
        choice = input()
    if choice == 'A':
        opentor()
        mktName = getMKTName()
        driver = getAccess()
        if driver != 'down':
            try:
                login(driver)
                crawlForum(driver)
            except Exception as e:
                print(driver.current_url, e)
            closetor(driver)
        new_parse(mktName, baseURL, False)
    if choice == 'B':
        opentor()
        driver = getAccess()
        if driver != 'down':
            try:
                login(driver)
                crawlForum(driver)
            except Exception as e:
                print(driver.current_url, e)
            closetor(driver)
    if choice == 'C':
        mktName = getMKTName()
        new_parse(mktName, baseURL, False)
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -131,7 +103,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 2)
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -147,6 +119,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -275,7 +249,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling MikesGrandStore forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the MikesGrandStore market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/Nexus/crawler_selenium.py
+++ b/MarketPlaces/Nexus/crawler_selenium.py
@ -31,7 +31,7 @@ baseURL = 'http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
@ -40,9 +40,9 @@ def startCrawling():
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
            closetor(driver)
        closetor(driver)
    new_parse(mktName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
 #prompts for ENTER input to continue
@ -116,6 +116,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -131,8 +133,8 @@ def getAccess():
        driver.close()
        return 'down'
 def savePage(page, url):
    cleanPage = cleanHTML(page)
 def savePage(driver, page, url):
    cleanPage = cleanHTML(driver, page)
    filePath = getFullPathName(url)
    os.makedirs(os.path.dirname(filePath), exist_ok=True)
    open(filePath, 'wb').write(cleanPage.encode('utf-8'))
@ -173,14 +175,14 @@ def getInterestedLinks():
    # Bot nets
    links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/botnets/')
    # Rats
    links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/rats/')
    # Ransomware
    links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/ransomware/')
    # Other Malware
    links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/outros-malware/')
    # Hacking Tools & Scripting
    links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/hacking-spam/ferramentas-de-hacking-scripts/')
    # # Rats
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/rats/')
    # # Ransomware
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/ransomware/')
    # # Other Malware
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/malware/outros-malware/')
    # # Hacking Tools & Scripting
    # links.append('http://nexus2bmba34euohk3xo7og2zelkgbtc2p7rjsbxrjjknlecja2tdvyd.onion/categoria-produto/hacking-spam/ferramentas-de-hacking-scripts/')
    return links
@ -207,7 +209,7 @@ def crawlForum(driver):
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)
                savePage(driver, html, link)
                list = productPages(html)
                for item in list:
@ -216,9 +218,16 @@ def crawlForum(driver):
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver.page_source, item)
                    savePage(driver, driver.page_source, item)
                    driver.back()
                    # comment out
                    break
                    # comment out
                if count == 1:
                    break
                try:
                    link = driver.find_element(by=By.XPATH, value=
                        '/html/body/div[1]/div[2]/div/div/main/nav/ul/li[3]/a').get_attribute('href')
@ -233,7 +242,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling Nexus done sucessfully. Press ENTER to continue\n")
    print("Crawling the Nexus market done.")
 # Returns 'True' if the link is a description link
@ -263,5 +272,5 @@ def productPages(html):
 def crawler():
    startCrawling()
    print("Crawling and Parsing Nexus .... DONE!")
    # print("Crawling and Parsing Nexus .... DONE!")
--- a/MarketPlaces/Nexus/parser.py
+++ b/MarketPlaces/Nexus/parser.py
@ -107,7 +107,7 @@ def nexus_listing_parser(soup):
            # Finding the name of the product
            name_of_product = product.find("h2", {"class": "woocommerce-loop-product__title"}).find("a").text
            name_of_product_cleaned = cleanString(name_of_product.strip())
            print(name_of_product_cleaned)
            # print(name_of_product_cleaned)
            name.append(name_of_product_cleaned)
            #finding the URL
            try:
@ -135,8 +135,8 @@ def nexus_listing_parser(soup):
            qLeft.append("-1")
            shipFrom.append("-1")
            shipTo.append("-1")
            print("Done! moving onto the next product!")
            print(len(shipTo))
            # print("Done! moving onto the next product!")
            # print(len(shipTo))
            nm += 1
        except AttributeError as e:
            print("I'm somewhere I don't belong. I'm going to leave")
--- a/MarketPlaces/RobinhoodMarket/crawler_selenium.py
+++ b/MarketPlaces/RobinhoodMarket/crawler_selenium.py
@ -1,7 +1,7 @@
 __author__ = 'chris'
 '''
 WeTheNorth Market Crawler (Selenium)
 RobinhoodMarket Market Crawler (Selenium)
 '''
 from selenium import webdriver
@ -23,8 +23,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
 from MarketPlaces.RobinhoodMarket.parser import Robinhood_links_parser
 from MarketPlaces.Utilities.utilities import cleanHTML
 config = configparser.ConfigParser()
 config.read('../../setup.ini')
 counter = 1
 baseURL = 'http://ilr3qzubfnx33vbhal7l5coo4ftqlkv2tboph4ujog5crz6m5ua2b2ad.onion/'
@ -34,15 +32,14 @@ def startCrawling():
    # Opening tor beforehand gives "Tor exited during startup error"
    # opentor()
    marketName = getMarketName()
    marketName = getMKTName()
    driver = getAccess()
    # Captcha
    input("Press ENTER when website has loaded")
    if driver != 'down':
        try:
            # Captcha
            input("Press ENTER when website has loaded")
            # Robinhood doesn't need login
            # login(driver)
            crawlForum(driver)
@ -50,11 +47,13 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    new_parse(marketName, baseURL, False)
    new_parse(marketName, baseURL, True)
 # Opens Tor Browser
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config
    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -70,7 +69,7 @@ def login(driver):
 # Returns the name of the website
 def getMarketName():
 def getMKTName():
    name = 'RobinhoodMarket'
    return name
@ -96,6 +95,8 @@ def closetor(driver):
 # Creates FireFox 'driver' and configure its 'Profile'
 # to use Tor proxy and socket
 def createFFDriver():
    from MarketPlaces.Initialization.markets_mining import config
    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -124,13 +125,14 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
 def getAccess():
    url = getFixedURL()
    driver = createFFDriver()
    input('Tor Connected. Press ENTER to continue\n')
    try:
        driver.get(url)
        return driver
@ -150,12 +152,14 @@ def savePage(driver, page, url):
 # Gets the full path of the page to be saved along with its appropriate file name
 def getFullPathName(url):
    from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
    from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
    mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
    fileName = getNameFromURL(url)
    if isDescriptionLink(url):
        fullPath = r'..\RobinhoodMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
    else:
        fullPath = r'..\RobinhoodMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
        fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
    return fullPath
@ -174,8 +178,8 @@ def getInterestedLinks():
    # Hacking
    links.append('http://ilr3qzubfnx33vbhal7l5coo4ftqlkv2tboph4ujog5crz6m5ua2b2ad.onion/product-category/hacking/')
    # Other Software
    links.append('http://ilr3qzubfnx33vbhal7l5coo4ftqlkv2tboph4ujog5crz6m5ua2b2ad.onion/product-category/other-software/')
    # # Other Software
    # links.append('http://ilr3qzubfnx33vbhal7l5coo4ftqlkv2tboph4ujog5crz6m5ua2b2ad.onion/product-category/other-software/')
    return links
@ -184,25 +188,24 @@ def crawlForum(driver):
    print("Crawling the Robinhood market")
    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()
    i = 0
    count = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(driver, html, link)
            has_next_page = True
            count = 0
            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(driver, html, link)
                list = productPages(html)
                for item in list:
@ -213,27 +216,20 @@ def crawlForum(driver):
                        driver.refresh()
                    savePage(driver, driver.page_source, item)
                    driver.back()
                    # comment out
                    # break
                    break
                # comment out
                # if count == 1:
                #     count = 0
                #     break
                if count == 1:
                    break
                # go to next page of market
                try:
                    nav = driver.find_element(by=By.XPATH, value="//a[@class='next page-numbers']")
                    link = nav.get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(driver, html, link)
                    count += 1
                except NoSuchElementException:
@ -243,10 +239,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    # finalTime = time.time()
    # print finalTime - initialTime
    input("Crawling Robinhood market done successfully. Press ENTER to continue\n")
    print("Crawling the Robinhood market done.")
 # Returns 'True' if the link is Topic link
--- a/MarketPlaces/ThiefWorld/crawler_selenium.py
+++ b/MarketPlaces/ThiefWorld/crawler_selenium.py
@ -1,7 +1,7 @@
 __author__ = 'Helium'
 '''
 ThiefWorld Forum Crawler (Selenium)
 ThiefWorld Market Crawler (Selenium)
 '''
 from selenium import webdriver
@ -32,7 +32,7 @@ baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
@ -44,7 +44,7 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    # new_parse(mktName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -104,7 +104,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 2)
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -120,6 +120,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -260,7 +262,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling ThiefWorld forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the ThiefWorld market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/Tor2door/captcha.png
+++ b/MarketPlaces/Tor2door/captcha.png
--- a/MarketPlaces/Tor2door/crawler_selenium.py
+++ b/MarketPlaces/Tor2door/crawler_selenium.py
@ -29,8 +29,8 @@ baseURL = 'http://yzrrne3pveltulbavydr2kiashvlnysdwclwmklo6cyjuqpxi7ku4xqd.onion
 # Opens Tor Browser, crawls the website
 def startCrawling():
    opentor()
    # marketName = getMKTName()
    # opentor()
    marketName = getMKTName()
    driver = getAccess()
    if driver != 'down':
@ -41,7 +41,7 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    # new_parse(marketName, baseURL, False)
    new_parse(marketName, baseURL, True)
 # Opens Tor Browser
@ -161,6 +161,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -278,7 +280,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling Tor2door market done sucessfully. Press ENTER to continue\n")
    print("Crawling the Tor2door market done.")
 # Returns 'True' if the link is Topic link
--- a/MarketPlaces/TorBay/crawler_selenium.py
+++ b/MarketPlaces/TorBay/crawler_selenium.py
@ -34,17 +34,17 @@ baseURL = 'http://torbay3253zck4ym5cbowwvrbfjjzruzthrx3np5y6owvifrnhy5ybid.onion
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    #
    new_parse(mktName, baseURL, False)
    driver = getAccess()
    if driver != 'down':
        try:
            login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -120,6 +120,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
 #the driver 'gets' the url, attempting to get on the site, if it can't access return 'down'
@ -230,7 +232,7 @@ def crawlForum(driver):
                # comment out
                if count == 1:
                   break
                    break
                try:
                    link = driver.find_element(by=By.XPATH, value=
@ -246,7 +248,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling TorBay forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the TorBay market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/TorMarket/crawler_selenium.py
+++ b/MarketPlaces/TorMarket/crawler_selenium.py
@ -33,17 +33,17 @@ baseURL = 'http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion
 def startCrawling():
    # opentor()
    mktName = getMKTName()
    # driver = getAccess()
    #
    # if driver != 'down':
    #     try:
    #         login(driver)
    #         crawlForum(driver)
    #     except Exception as e:
    #         print(driver.current_url, e)
    #     closetor(driver)
    driver = getAccess()
    new_parse(mktName, baseURL, False)
    if driver != 'down':
        try:
            # login(driver)
            crawlForum(driver)
        except Exception as e:
            print(driver.current_url, e)
        closetor(driver)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
@ -103,7 +103,7 @@ def createFFDriver():
    ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
    ff_prof.set_preference("network.dns.disablePrefetch", True)
    ff_prof.set_preference("network.http.sendRefererHeader", 0)
    ff_prof.set_preference("permissions.default.image", 2)
    ff_prof.set_preference("permissions.default.image", 1)
    ff_prof.set_preference("browser.download.folderList", 2)
    ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
    ff_prof.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/plain")
@ -119,6 +119,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
@ -184,12 +186,12 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []
    # Hacking Tutorials
    links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/guides-tutorials/hacking/')
    # # Malware
    # # Hacking Tutorials
    # links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/guides-tutorials/hacking/')
    # Malware
    links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/malware/')
    # # Hacking Services
    links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/services/hacking-services/')
    # links.append('http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/product-category/services/hacking-services/')
    return links
@ -232,8 +234,8 @@ def crawlForum(driver):
                    break
                # comment out
                # if count == 1:
                #     break
                if count == 1:
                    break
                try:
                    link = driver.find_element(by=By.XPATH, value=
@ -249,7 +251,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    input("Crawling TorMarket forum done sucessfully. Press ENTER to continue\n")
    print("Crawling the TorMarket market done.")
 # Returns 'True' if the link is a description link
--- a/MarketPlaces/ViceCity/crawler_selenium.py
+++ b/MarketPlaces/ViceCity/crawler_selenium.py
@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
 from MarketPlaces.ViceCity.parser import vicecity_links_parser
 from MarketPlaces.Utilities.utilities import cleanHTML
 config = configparser.ConfigParser()
 config.read('../../setup.ini')
 counter = 1
 baseURL = 'http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/'
@ -34,7 +32,7 @@ baseURL = 'http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion
 # Opens Tor Browser, crawls the website, then parses, then closes tor
 #acts like the main method for the crawler, another function at the end of this code calls this function later
 def startCrawling():
    opentor()
    # opentor()
    mktName = getMKTName()
    driver = getAccess()
@ -46,12 +44,14 @@ def startCrawling():
            print(driver.current_url, e)
        closetor(driver)
    new_parse(mktName, baseURL, False)
    new_parse(mktName, baseURL, True)
 # Opens Tor Browser
 #prompts for ENTER input to continue
 def opentor():
    from MarketPlaces.Initialization.markets_mining import config
    global pid
    print("Connecting Tor...")
    pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,6 +90,8 @@ def closetor(driver):
 # Creates FireFox 'driver' and configure its 'Profile'
 # to use Tor proxy and socket
 def createFFDriver():
    from MarketPlaces.Initialization.markets_mining import config
    ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
    ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -118,6 +120,8 @@ def createFFDriver():
    driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
    driver.maximize_window()
    return driver
 #the driver 'gets' the url, attempting to get on the site, if it can't access return 'down'
@ -140,9 +144,9 @@ def login(driver):
    # wait for first captcha page to show up (This Xpath may need to change based on different seed url)
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, "/html/body/div/div/form/div/div[1]")))
    input("Press Enter once captcha done (dont press done)")
    input("Press Enter once captcha done")
    #clicks button after captcha is inputted
    driver.find_element(by=By.XPATH, value='/html/body/div/div/form/button').click()
    # driver.find_element(by=By.XPATH, value='/html/body/div/div/form/button').click()
    #wait for login page to show up
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
@ -152,9 +156,9 @@ def login(driver):
    userBox.send_keys('ct1234')
    #waits for second catpcha to be inputted by user
    input("Press Enter once captcha done (dont press continue)")
    input("Press Enter once captcha done")
    #clicks on continue
    driver.find_element(by=By.XPATH, value='/html/body/div/div/div/form/input[2]').click()
    # driver.find_element(by=By.XPATH, value='/html/body/div/div/div/form/input[2]').click()
    #waits for password to show
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
@ -220,12 +224,12 @@ def getInterestedLinks():
    # Digital - Fraud Software, Has Hacking and Guides
    links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=150')
    # Digital - Guides and Tutorials
    links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=94')
    # Carding Services
    links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=155')
    # Digital - Other (half junk half random stuff like: bots, rats, viruses, and guides)
    links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=153')
    # # Digital - Guides and Tutorials
    # links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=94')
    # # Carding Services
    # links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=155')
    # # Digital - Other (half junk half random stuff like: bots, rats, viruses, and guides)
    # links.append('http://52qlucglu6fuaqist2herssakipapig2higaaayu7446n55xw4ylxqid.onion/?category=153')
    return links
@ -237,26 +241,24 @@ def crawlForum(driver):
    print("Crawling the ViceCity Market")
    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()
    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(driver, html, link)
            has_next_page = True
            count = 0
            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(driver, html, link)
                list = productPages(html)
                j = 0
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
@ -268,25 +270,18 @@ def crawlForum(driver):
                    time.sleep(2.5) # so site doesnt crash
                    driver.back()
                    #comment out
                    # break
                    # comment out
                    break
                # # comment out
                # if count == 1:
                #    count = 0
                #    break
                # comment out
                if count == 1:
                   break
                try:
                    temp = driver.find_element(by=By.CLASS_NAME, value='pagination')
                    link = temp.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(driver, html, link)
                    count += 1
                except NoSuchElementException:
@ -296,10 +291,7 @@ def crawlForum(driver):
            print(link, e)
        i += 1
    # finalTime = time.time()
    # print finalTime - initialTime
    input("Crawling ViceCity  done sucessfully. Press ENTER to continue\n")
    print("Crawling the ViceCity market done.")
 # Returns 'True' if the link is a description link
--- a/setup.ini
+++ b/setup.ini
@ -15,4 +15,4 @@ password = password
 database = darkweb_markets_forums
 [Encryption]
 secret = "password"
 secret = password