Browse Source

small edits to CypherMarketplace

main
westernmeadow 1 year ago
parent
commit
7dec52fb02
2 changed files with 9 additions and 8 deletions
  1. +7
    -6
      MarketPlaces/CypherMarketplace/crawler_selenium.py
  2. +2
    -2
      MarketPlaces/CypherMarketplace/parser.py

+ 7
- 6
MarketPlaces/CypherMarketplace/crawler_selenium.py View File

@ -144,6 +144,7 @@ def login(driver):
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//input[@name="search"]'))) (By.XPATH, '//input[@name="search"]')))
# Saves the crawled html page, makes the directory path for html pages if not made # Saves the crawled html page, makes the directory path for html pages if not made
def savePage(driver, page, url): def savePage(driver, page, url):
cleanPage = cleanHTML(driver, page) cleanPage = cleanHTML(driver, page)
@ -237,12 +238,12 @@ def crawlForum(driver):
savePage(driver, driver.page_source, item) savePage(driver, driver.page_source, item)
driver.back() driver.back()
# comment out # # comment out
break # break
#
# comment out # # comment out
if count == 1: # if count == 1:
break # break
try: try:
# temp = driver.find_element(by=By.XPATH, value= # temp = driver.find_element(by=By.XPATH, value=


+ 2
- 2
MarketPlaces/CypherMarketplace/parser.py View File

@ -281,10 +281,10 @@ def cyphermarketplace_links_parser(soup):
# Returning all links that should be visited by the Crawler # Returning all links that should be visited by the Crawler
href = [] href = []
listing = soup.findAll('div', {"class": "card-body"}) listing = soup.findAll('div', {"class": "col-12 col-sm-6 col-md-4 my-1"})
for a in listing: for a in listing:
bae = a.find('a', {"class": "text-info"}, href=True) bae = a.find('div', {"class": "card-body"}).find('a', {"class": "text-info"}, href=True)
link = bae['href'] link = bae['href']
href.append(link) href.append(link)

|||||||
x
 
000:0
Loading…
Cancel
Save