diff --git a/MarketPlaces/DarkDock/crawler_selenium.py b/MarketPlaces/DarkDock/crawler_selenium.py index c5b98b7..1d04b5a 100644 --- a/MarketPlaces/DarkDock/crawler_selenium.py +++ b/MarketPlaces/DarkDock/crawler_selenium.py @@ -231,9 +231,7 @@ def crawlMarket(driver): try: has_next_page = True - count = 1 # Number of pages traversed - maxPages = '' - + count = 2 while has_next_page: @@ -247,21 +245,6 @@ def crawlMarket(driver): html = driver.page_source savePage(driver, html, linksToCrawl[i] + f"page{count}") - # Get the number of maxPages if maxPages isn't fetched yet - if maxPages == '': - try: - # Wait 30 seconds or until element loads - WebDriverWait(driver, 30).until( - EC.presence_of_element_located((By.XPATH, '//div[@class="pages"]//a[last()]')) - ) - # fetches the element that gives the total number of pages in a category - maxPages = int(driver.find_element(By.XPATH, '//div[@class="pages"]//a[last()]').text) - print(f"Total number of Pages: {maxPages}") - except Exception as e: - print(f"Element not found: {str(e)}") - - - # Parse the product/description pages list = descriptionPages(html) for item in list: @@ -275,11 +258,9 @@ def crawlMarket(driver): # Go back to the previous category page driver.back() - # # Add a break for testing if we are checking only the first description/product page - # break - - - + # # Add a break for testing if we are checking only the first description/product page + # break + # # # Add a break for testing based on how many numbers of pages to test # if count == 3: # break @@ -287,7 +268,8 @@ def crawlMarket(driver): # Try to find the next page try: link = f"{baseCategoryLink}/{count}/" - print("\tCurrent Page :", f"{link}") + driver.find_element(By.XPATH, f'//a[@href="{urlparse.urlparse(link).path}"]') + if link == "": raise NoSuchElementException count += 1 @@ -295,11 +277,6 @@ def crawlMarket(driver): except NoSuchElementException: has_next_page = False - # If reached the number of maxPages stop crawling the current category - if count > maxPages: - print("Max Pages reached") - has_next_page = False - except Exception as e: print(link, e) i += 1