Browse Source

finish cyphermarketplace

main
Nathan Pham 1 year ago
parent
commit
cfcff093e4
3 changed files with 13 additions and 13 deletions
  1. +9
    -9
      MarketPlaces/CypherMarketplace/crawler_selenium.py
  2. +3
    -3
      MarketPlaces/CypherMarketplace/parser.py
  3. +1
    -1
      MarketPlaces/Initialization/marketsList.txt

+ 9
- 9
MarketPlaces/CypherMarketplace/crawler_selenium.py View File

@ -88,8 +88,8 @@ def createFFDriver():
ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True) ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True)
ff_prof.set_preference("signon.rememberSignons", False) ff_prof.set_preference("signon.rememberSignons", False)
ff_prof.set_preference("network.cookie.lifetimePolicy", 2) ff_prof.set_preference("network.cookie.lifetimePolicy", 2)
ff_prof.set_preference("network.dns.disablePrefetch", True)
ff_prof.set_preference("network.http.sendRefererHeader", 0)
# ff_prof.set_preference("network.dns.disablePrefetch", True)
# ff_prof.set_preference("network.http.sendRefererHeader", 0)
ff_prof.set_preference("permissions.default.image", 3) ff_prof.set_preference("permissions.default.image", 3)
ff_prof.set_preference("browser.download.folderList", 2) ff_prof.set_preference("browser.download.folderList", 2)
ff_prof.set_preference("browser.download.manager.showWhenStarting", False) ff_prof.set_preference("browser.download.manager.showWhenStarting", False)
@ -131,10 +131,10 @@ def login(driver):
input("Press ENTER when CAPTCHA is completed\n") input("Press ENTER when CAPTCHA is completed\n")
# entering username and password into input boxes # entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[2]/td[2]/input')
usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="username"]')
# Username here # Username here
usernameBox.send_keys('beachyoga278') # sends string to the username box usernameBox.send_keys('beachyoga278') # sends string to the username box
passwordBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[3]/td[2]/input')
passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="password"]')
# Password here # Password here
passwordBox.send_keys('sunfish278') # sends string to passwordBox passwordBox.send_keys('sunfish278') # sends string to passwordBox
@ -142,7 +142,7 @@ def login(driver):
# wait for listing page show up (This Xpath may need to change based on different seed url) # wait for listing page show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, "/html/body/div[2]/div/div/div[1]/div/div/div[1]/div[2]/ul/li[8]/a")))
(By.XPATH, '//input[@name="search"]')))
# Saves the crawled html page, makes the directory path for html pages if not made # Saves the crawled html page, makes the directory path for html pages if not made
def savePage(driver, page, url): def savePage(driver, page, url):
@ -245,9 +245,9 @@ def crawlForum(driver):
break break
try: try:
temp = driver.find_element(by=By.XPATH, value=
'/html/body/div[2]/div/div/div[2]/div/nav/ul')
link = temp.find_element(by=By.TAG_NAME, value='page-link').get_attribute('href')
# temp = driver.find_element(by=By.XPATH, value=
# '/html/body/div[2]/div/div/div[2]/div/nav/ul')
link = driver.find_element(by=By.XPATH, value='//a[rel="next"]').get_attribute('href')
if link == "": if link == "":
raise NoSuchElementException raise NoSuchElementException
count += 1 count += 1
@ -299,4 +299,4 @@ def productPages(html):
def crawler(): def crawler():
startCrawling() startCrawling()
# print("Crawling and Parsing BestCardingWorld .... DONE!")
# print("Crawling and Parsing CypherMarketplace .... DONE!")

+ 3
- 3
MarketPlaces/CypherMarketplace/parser.py View File

@ -11,7 +11,7 @@ from bs4 import BeautifulSoup
#stores info it needs in different lists, these lists are returned after being organized #stores info it needs in different lists, these lists are returned after being organized
#@param: soup object looking at html page of description page #@param: soup object looking at html page of description page
#return: 'row' that contains a variety of lists that each hold info on the description page #return: 'row' that contains a variety of lists that each hold info on the description page
def darkfox_description_parser(soup):
def cyphermarketplace_description_parser(soup):
# Fields to be parsed # Fields to be parsed
@ -147,11 +147,11 @@ def darkfox_description_parser(soup):
#stores info it needs in different lists, these lists are returned after being organized #stores info it needs in different lists, these lists are returned after being organized
#@param: soup object looking at html page of listing page #@param: soup object looking at html page of listing page
#return: 'row' that contains a variety of lists that each hold info on the listing page #return: 'row' that contains a variety of lists that each hold info on the listing page
def darkfox_listing_parser(soup):
def cyphermarketplace_listing_parser(soup):
# Fields to be parsed # Fields to be parsed
nm = 0 # Total_Products (Should be Integer) nm = 0 # Total_Products (Should be Integer)
mktName = "DarkFox" # 0 Marketplace_Name
mktName = "CypherMarketplace" # 0 Marketplace_Name
name = [] # 1 Product_Name name = [] # 1 Product_Name
CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures) CVE = [] # 2 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = [] # 3 Product_MS_Classification (Microsoft Security) MS = [] # 3 Product_MS_Classification (Microsoft Security)


+ 1
- 1
MarketPlaces/Initialization/marketsList.txt View File

@ -1 +1 @@
Ares
CypherMarketplace

Loading…
Cancel
Save