edits to DarkDock crawler

1 year ago · 381e8be4e0
--- a/MarketPlaces/DarkDock/crawler_selenium.py
+++ b/MarketPlaces/DarkDock/crawler_selenium.py
@ -231,9 +231,7 @@ def crawlMarket(driver):
        
        try:
            has_next_page = True
            count = 1 # Number of pages traversed
            maxPages = ''

            count = 2

            while has_next_page:
    
@ -247,21 +245,6 @@ def crawlMarket(driver):
                html = driver.page_source
                savePage(driver, html, linksToCrawl[i] + f"page{count}")

                # Get the number of maxPages if maxPages isn't fetched yet 
                if maxPages == '':
                    try:
                        # Wait 30 seconds or until element loads
                        WebDriverWait(driver, 30).until(
                            EC.presence_of_element_located((By.XPATH, '//div[@class="pages"]//a[last()]'))
                        )
                        # fetches the element that gives the total number of pages in a category
                        maxPages = int(driver.find_element(By.XPATH, '//div[@class="pages"]//a[last()]').text)
                        print(f"Total number of Pages: {maxPages}")
                    except Exception as e:
                        print(f"Element not found: {str(e)}")



                # Parse the product/description pages
                list = descriptionPages(html)
                for item in list:
@ -275,11 +258,9 @@ def crawlMarket(driver):
                    # Go back to the previous category page
                    driver.back()

                    # # Add a break for testing if we are checking only the first description/product page
                    # break
                
                          
                
                #     # Add a break for testing if we are checking only the first description/product page
                #     break
                #
                # # Add a break for testing based on how many numbers of pages to test
                # if count == 3:
                #     break
@ -287,7 +268,8 @@ def crawlMarket(driver):
                # Try to find the next page
                try:
                    link = f"{baseCategoryLink}/{count}/"
                    print("\tCurrent Page :", f"{link}")
                    driver.find_element(By.XPATH, f'//a[@href="{urlparse.urlparse(link).path}"]')

                    if link == "":
                        raise NoSuchElementException
                    count += 1
@ -295,11 +277,6 @@ def crawlMarket(driver):
                except NoSuchElementException:
                    has_next_page = False

                # If reached the number of maxPages stop crawling the current category
                if count > maxPages:
                    print("Max Pages reached")
                    has_next_page = False

        except Exception as e:
            print(link, e)
        i += 1