diff --git a/MarketPlaces/CypherMarketplace/crawler_selenium.py b/MarketPlaces/CypherMarketplace/crawler_selenium.py index 92420ab..9eb7d43 100644 --- a/MarketPlaces/CypherMarketplace/crawler_selenium.py +++ b/MarketPlaces/CypherMarketplace/crawler_selenium.py @@ -144,6 +144,7 @@ def login(driver): WebDriverWait(driver, 100).until(EC.visibility_of_element_located( (By.XPATH, '//input[@name="search"]'))) + # Saves the crawled html page, makes the directory path for html pages if not made def savePage(driver, page, url): cleanPage = cleanHTML(driver, page) @@ -237,12 +238,12 @@ def crawlForum(driver): savePage(driver, driver.page_source, item) driver.back() - # comment out - break - - # comment out - if count == 1: - break + # # comment out + # break + # + # # comment out + # if count == 1: + # break try: # temp = driver.find_element(by=By.XPATH, value= diff --git a/MarketPlaces/CypherMarketplace/parser.py b/MarketPlaces/CypherMarketplace/parser.py index 8b3213f..4f5fd53 100644 --- a/MarketPlaces/CypherMarketplace/parser.py +++ b/MarketPlaces/CypherMarketplace/parser.py @@ -281,10 +281,10 @@ def cyphermarketplace_links_parser(soup): # Returning all links that should be visited by the Crawler href = [] - listing = soup.findAll('div', {"class": "card-body"}) + listing = soup.findAll('div', {"class": "col-12 col-sm-6 col-md-4 my-1"}) for a in listing: - bae = a.find('a', {"class": "text-info"}, href=True) + bae = a.find('div', {"class": "card-body"}).find('a', {"class": "text-info"}, href=True) link = bae['href'] href.append(link)