From 85acaf41ad2487e762b647a374d9405ddf43e493 Mon Sep 17 00:00:00 2001 From: westernmeadow Date: Sat, 23 Dec 2023 19:04:37 -0800 Subject: [PATCH] reverted commits af32a2797b627b4caad95cca0bbd9fc8796e05c0 and 5de337ae742dafba7dfd51464c55af15211a7888 --- .../CypherMarketplace/crawler_selenium.py | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/MarketPlaces/CypherMarketplace/crawler_selenium.py b/MarketPlaces/CypherMarketplace/crawler_selenium.py index 9eb7d43..24ad4f1 100644 --- a/MarketPlaces/CypherMarketplace/crawler_selenium.py +++ b/MarketPlaces/CypherMarketplace/crawler_selenium.py @@ -144,6 +144,20 @@ def login(driver): WebDriverWait(driver, 100).until(EC.visibility_of_element_located( (By.XPATH, '//input[@name="search"]'))) +def relogin(driver): + # entering username and password into input boxes + usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="username"]') + # Username here + usernameBox.send_keys('beachyoga278') # sends string to the username box + passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="password"]') + # Password here + passwordBox.send_keys('sunfish278') # sends string to passwordBox + + input("Press ENTER when CAPTCHA is completed\n") + + # wait for listing page show up (This Xpath may need to change based on different seed url) + WebDriverWait(driver, 100).until(EC.visibility_of_element_located( + (By.XPATH, '//input[@name="search"]'))) # Saves the crawled html page, makes the directory path for html pages if not made def savePage(driver, page, url): @@ -222,7 +236,10 @@ def crawlForum(driver): while has_next_page: try: - driver.get(link) + if driver.findElements(by=By.XPATH, value='//input[@name="username"]').size() > 0: + relogin(driver) + else: + driver.get(link) except: driver.refresh() html = driver.page_source @@ -232,7 +249,10 @@ def crawlForum(driver): for item in list: itemURL = urlparse.urljoin(baseURL, str(item)) try: - driver.get(itemURL) + if driver.findElements(by=By.XPATH, value='//input[@name="username"]').size() > 0: + relogin(driver) + else: + driver.get(itemURL) except: driver.refresh() savePage(driver, driver.page_source, item)