From cfcff093e40c81bb29fe723de9d5eecf8b29bd58 Mon Sep 17 00:00:00 2001 From: Nathan Pham Date: Sun, 12 Nov 2023 00:12:28 -0800 Subject: [PATCH] finish cyphermarketplace --- .../CypherMarketplace/crawler_selenium.py | 18 +++++++++--------- MarketPlaces/CypherMarketplace/parser.py | 6 +++--- MarketPlaces/Initialization/marketsList.txt | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/MarketPlaces/CypherMarketplace/crawler_selenium.py b/MarketPlaces/CypherMarketplace/crawler_selenium.py index 1864d85..92420ab 100644 --- a/MarketPlaces/CypherMarketplace/crawler_selenium.py +++ b/MarketPlaces/CypherMarketplace/crawler_selenium.py @@ -88,8 +88,8 @@ def createFFDriver(): ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True) ff_prof.set_preference("signon.rememberSignons", False) ff_prof.set_preference("network.cookie.lifetimePolicy", 2) - ff_prof.set_preference("network.dns.disablePrefetch", True) - ff_prof.set_preference("network.http.sendRefererHeader", 0) + # ff_prof.set_preference("network.dns.disablePrefetch", True) + # ff_prof.set_preference("network.http.sendRefererHeader", 0) ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.manager.showWhenStarting", False) @@ -131,10 +131,10 @@ def login(driver): input("Press ENTER when CAPTCHA is completed\n") # entering username and password into input boxes - usernameBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[2]/td[2]/input') + usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="username"]') # Username here usernameBox.send_keys('beachyoga278') # sends string to the username box - passwordBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[3]/td[2]/input') + passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="password"]') # Password here passwordBox.send_keys('sunfish278') # sends string to passwordBox @@ -142,7 +142,7 @@ def login(driver): # wait for listing page show up (This Xpath may need to change based on different seed url) WebDriverWait(driver, 100).until(EC.visibility_of_element_located( - (By.XPATH, "/html/body/div[2]/div/div/div[1]/div/div/div[1]/div[2]/ul/li[8]/a"))) + (By.XPATH, '//input[@name="search"]'))) # Saves the crawled html page, makes the directory path for html pages if not made def savePage(driver, page, url): @@ -245,9 +245,9 @@ def crawlForum(driver): break try: - temp = driver.find_element(by=By.XPATH, value= - '/html/body/div[2]/div/div/div[2]/div/nav/ul') - link = temp.find_element(by=By.TAG_NAME, value='page-link').get_attribute('href') + # temp = driver.find_element(by=By.XPATH, value= + # '/html/body/div[2]/div/div/div[2]/div/nav/ul') + link = driver.find_element(by=By.XPATH, value='//a[rel="next"]').get_attribute('href') if link == "": raise NoSuchElementException count += 1 @@ -299,4 +299,4 @@ def productPages(html): def crawler(): startCrawling() - # print("Crawling and Parsing BestCardingWorld .... DONE!") + # print("Crawling and Parsing CypherMarketplace .... DONE!") diff --git a/MarketPlaces/CypherMarketplace/parser.py b/MarketPlaces/CypherMarketplace/parser.py index 6ac14d6..8b3213f 100644 --- a/MarketPlaces/CypherMarketplace/parser.py +++ b/MarketPlaces/CypherMarketplace/parser.py @@ -11,7 +11,7 @@ from bs4 import BeautifulSoup #stores info it needs in different lists, these lists are returned after being organized #@param: soup object looking at html page of description page #return: 'row' that contains a variety of lists that each hold info on the description page -def darkfox_description_parser(soup): +def cyphermarketplace_description_parser(soup): # Fields to be parsed @@ -147,11 +147,11 @@ def darkfox_description_parser(soup): #stores info it needs in different lists, these lists are returned after being organized #@param: soup object looking at html page of listing page #return: 'row' that contains a variety of lists that each hold info on the listing page -def darkfox_listing_parser(soup): +def cyphermarketplace_listing_parser(soup): # Fields to be parsed nm = 0 # Total_Products (Should be Integer) - mktName = "DarkFox" # 0 Marketplace_Name + mktName = "CypherMarketplace" # 0 Marketplace_Name name = [] # 1 Product_Name CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures) MS = [] # 3 Product_MS_Classification (Microsoft Security) diff --git a/MarketPlaces/Initialization/marketsList.txt b/MarketPlaces/Initialization/marketsList.txt index 9d7692b..b656841 100644 --- a/MarketPlaces/Initialization/marketsList.txt +++ b/MarketPlaces/Initialization/marketsList.txt @@ -1 +1 @@ -Ares \ No newline at end of file +CypherMarketplace \ No newline at end of file