Merge branch 'main' into '0day'

# Conflicts: # MarketPlaces/Initialization/markets_mining.py # MarketPlaces/Initialization/prepare_parser.py
1 year ago · a00dd301b0
--- a/MarketPlaces/Initialization/markets_mining.py
+++ b/MarketPlaces/Initialization/markets_mining.py
@ -24,6 +24,7 @@ from MarketPlaces.Bohemia.crawler_selenium import crawler as crawlerBohemia
 from MarketPlaces.TheDarkMarket.crawler_selenium import crawler as crawlerTheDarkMarket
 from MarketPlaces.GoFish.crawler_selenium import crawler as crawlerGoFish
 from MarketPlaces.ZeroDay.crawler_selenium import crawler as crawlerZeroDay
 from MarketPlaces.Torzon.crawler_selenium import crawler as crawlerTorzon
 import configparser
 import os
@ -135,5 +136,7 @@ if __name__ == '__main__':
            crawlerTheDarkMarket()
        elif mkt == "ZeroDay":
            crawlerZeroDay()
        elif mkt == "Torzon":
            crawlerTorzon()
    print("\nScraping process completed!")
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@ -24,6 +24,7 @@ from MarketPlaces.Quest.parser import *
 from MarketPlaces.Ares.parser import *
 from MarketPlaces.CypherMarketplace.parser import *
 from MarketPlaces.WeTheNorth.parser import *
 from MarketPlaces.Torzon.parser import *
 from MarketPlaces.GoFish.parser import *
 from MarketPlaces.ZeroDay.parser import *
@ -163,6 +164,8 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
            rw = gofish_listing_parser(soup)
        elif marketPlace == "ZeroDay":
            rw = zeroday_listing_parser(soup)
        elif marketPlace == "Torzon":
            rw = torzon_listing_parser(soup)
        else:
            print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
            raise Exception
@ -219,6 +222,8 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile):
            rmm = gofish_description_parser(soup)
        elif marketPlace == "ZeroDay":
            rmm = zeroday_description_parser(soup)
        elif marketPlace == "Torzon":
            rmm = torzon_description_parser(soup)
        else:
            print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!")
            raise Exception
--- a/MarketPlaces/Torzon/crawler_selenium.py
+++ b/MarketPlaces/Torzon/crawler_selenium.py
@ -45,7 +45,7 @@ def startCrawling():
            print(driver.current_url, e)
        closeDriver(driver)
    new_parse(mktName, BASE_URL, False)
    # new_parse(mktName, BASE_URL, False)
 # Returns the name of the website
@ -129,8 +129,35 @@ def getAccess():
 # then allows for manual solving of captcha in the terminal
 #@param: current selenium web driver
 def login(driver):
    input("Press ENTER when CAPTCHA is completed and page is loaded\n")
    input("Press ENTER when CAPTCHA is completed and LOGIN page is loaded\n")
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, '//*[@id="username"]')))
    # wait for  page to show up (This Xpath may need to change based on different seed url)
    # entering username and password into input boxes
    usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
    # Username here
    usernameBox.send_keys('LordTachonky')
    passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="password"]')
    # Password here
    passwordBox.send_keys('BorderRanked')
    input("Press ENTER when CAPTCHA is finished\n")
    login = driver.find_element(by=By.XPATH, value='/html/body/div/form/input[4]')
    login.click()
    # wait for listing page show up (This Xpath may need to change based on different seed url)
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, '/html/body/div/center')))
    pinBox = driver.find_element(by=By.XPATH, value='//*[@id="pin"]')
    pinBox.send_keys('541236')
    submit = driver.find_element(by=By.XPATH, value='/html/body/div/form/input[2]')
    submit.click()
    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
        (By.XPATH, '/html/body/div[1]/p')))
    # driver.find_element(by=By.XPATH, value='/html/body/div[1]/label').click()
 # Saves the crawled html page, makes the directory path for html pages if not made
@ -210,33 +237,30 @@ def crawlForum(driver):
                    driver.get(link)
                except:
                    driver.refresh()
                WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
                    (By.XPATH, '/html/body/div[3]/div/table/tbody/tr/td[2]/center/table/tbody/tr[1]/td[1]')))
                html = driver.page_source
                savePage(driver, html, link)
                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(BASE_URL, str(item))
                    itemURL = urlparse.urljoin(getFixedURL(), str(item))
                    try:
                        # time.sleep(1.5) # to keep from detecting click speed
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver, driver.page_source, item)
                    # time.sleep(1.5)
                    driver.back()
                     # to keep from detecting click speed
                    # comment out
                    break
                # comment out
                if count == 1:
                    break
                #     # comment out
                #     break
                #
                # # comment out
                # if count == 1:
                #     break
                try:
                    # nav = driver.find_element(by=By.XPATH, value='/html/body/table[1]/tbody/tr/td/form/div/div[2]/table[2]')
                    # a = nav.find_element(by=By.LINK_TEXT, value=">")
                    link = driver.find_element(by=By.LINK_TEXT, value=">").get_attribute('href')
                    link = driver.find_element(by=By.XPATH, value='//a[contains(text(), "Next")]').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    count += 1
@ -246,7 +270,6 @@ def crawlForum(driver):
        except Exception as e:
            print(link, e)
            # raise e
        i += 1
    input("Crawling Torzon market done sucessfully. Press ENTER to continue\n")
--- a/MarketPlaces/Torzon/parser.py
+++ b/MarketPlaces/Torzon/parser.py
@ -318,7 +318,7 @@ def torzon_links_parser(soup):
    href = []
    # listing = soup.findAll('div', {"class": "card mt-1"})
    listing = soup.find('td', {"valign": "top"}).find("table", {"border": "0"}).findAll('td', {'width': '50%'})
    listing = soup.find('td', {"valign": "top"}).find("tbody").findAll('td', {'width': '50%'})
    for a in listing:
        bae = a.find('a',  href=True)#card-title rounded text-truncate