Browse Source

reverted commits af32a2797b and 5de337ae74

main
westernmeadow 1 year ago
parent
commit
85acaf41ad
1 changed files with 22 additions and 2 deletions
  1. +22
    -2
      MarketPlaces/CypherMarketplace/crawler_selenium.py

+ 22
- 2
MarketPlaces/CypherMarketplace/crawler_selenium.py View File

@ -144,6 +144,20 @@ def login(driver):
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//input[@name="search"]')))
def relogin(driver):
# entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="username"]')
# Username here
usernameBox.send_keys('beachyoga278') # sends string to the username box
passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="password"]')
# Password here
passwordBox.send_keys('sunfish278') # sends string to passwordBox
input("Press ENTER when CAPTCHA is completed\n")
# wait for listing page show up (This Xpath may need to change based on different seed url)
WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
(By.XPATH, '//input[@name="search"]')))
# Saves the crawled html page, makes the directory path for html pages if not made
def savePage(driver, page, url):
@ -222,7 +236,10 @@ def crawlForum(driver):
while has_next_page:
try:
driver.get(link)
if driver.findElements(by=By.XPATH, value='//input[@name="username"]').size() > 0:
relogin(driver)
else:
driver.get(link)
except:
driver.refresh()
html = driver.page_source
@ -232,7 +249,10 @@ def crawlForum(driver):
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
if driver.findElements(by=By.XPATH, value='//input[@name="username"]').size() > 0:
relogin(driver)
else:
driver.get(itemURL)
except:
driver.refresh()
savePage(driver, driver.page_source, item)


Loading…
Cancel
Save