|
|
@ -88,8 +88,8 @@ def createFFDriver(): |
|
|
|
ff_prof.set_preference("privacy.sanitize.sanitizeOnShutdown", True) |
|
|
|
ff_prof.set_preference("signon.rememberSignons", False) |
|
|
|
ff_prof.set_preference("network.cookie.lifetimePolicy", 2) |
|
|
|
ff_prof.set_preference("network.dns.disablePrefetch", True) |
|
|
|
ff_prof.set_preference("network.http.sendRefererHeader", 0) |
|
|
|
# ff_prof.set_preference("network.dns.disablePrefetch", True) |
|
|
|
# ff_prof.set_preference("network.http.sendRefererHeader", 0) |
|
|
|
ff_prof.set_preference("permissions.default.image", 3) |
|
|
|
ff_prof.set_preference("browser.download.folderList", 2) |
|
|
|
ff_prof.set_preference("browser.download.manager.showWhenStarting", False) |
|
|
@ -131,10 +131,10 @@ def login(driver): |
|
|
|
input("Press ENTER when CAPTCHA is completed\n") |
|
|
|
|
|
|
|
# entering username and password into input boxes |
|
|
|
usernameBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[2]/td[2]/input') |
|
|
|
usernameBox = driver.find_element(by=By.XPATH, value='//input[@name="username"]') |
|
|
|
# Username here |
|
|
|
usernameBox.send_keys('beachyoga278') # sends string to the username box |
|
|
|
passwordBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[3]/td[2]/input') |
|
|
|
passwordBox = driver.find_element(by=By.XPATH, value='//input[@name="password"]') |
|
|
|
# Password here |
|
|
|
passwordBox.send_keys('sunfish278') # sends string to passwordBox |
|
|
|
|
|
|
@ -142,7 +142,7 @@ def login(driver): |
|
|
|
|
|
|
|
# wait for listing page show up (This Xpath may need to change based on different seed url) |
|
|
|
WebDriverWait(driver, 100).until(EC.visibility_of_element_located( |
|
|
|
(By.XPATH, "/html/body/div[2]/div/div/div[1]/div/div/div[1]/div[2]/ul/li[8]/a"))) |
|
|
|
(By.XPATH, '//input[@name="search"]'))) |
|
|
|
|
|
|
|
# Saves the crawled html page, makes the directory path for html pages if not made |
|
|
|
def savePage(driver, page, url): |
|
|
@ -245,9 +245,9 @@ def crawlForum(driver): |
|
|
|
break |
|
|
|
|
|
|
|
try: |
|
|
|
temp = driver.find_element(by=By.XPATH, value= |
|
|
|
'/html/body/div[2]/div/div/div[2]/div/nav/ul') |
|
|
|
link = temp.find_element(by=By.TAG_NAME, value='page-link').get_attribute('href') |
|
|
|
# temp = driver.find_element(by=By.XPATH, value= |
|
|
|
# '/html/body/div[2]/div/div/div[2]/div/nav/ul') |
|
|
|
link = driver.find_element(by=By.XPATH, value='//a[rel="next"]').get_attribute('href') |
|
|
|
if link == "": |
|
|
|
raise NoSuchElementException |
|
|
|
count += 1 |
|
|
@ -299,4 +299,4 @@ def productPages(html): |
|
|
|
|
|
|
|
def crawler(): |
|
|
|
startCrawling() |
|
|
|
# print("Crawling and Parsing BestCardingWorld .... DONE!") |
|
|
|
# print("Crawling and Parsing CypherMarketplace .... DONE!") |