Merge branch 'main' of https://gitlab.com/dw9372422/dw_pipeline_test

2 years ago · 4133f340cd
--- a/Forums/AbyssForum/crawler_selenium.py
+++ b/Forums/AbyssForum/crawler_selenium.py
@ -191,86 +191,66 @@ def crawlForum(driver):
    print("Crawling the AbyssForum forum")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    i = 0
    count = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                list = topicPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver.page_source, item)
                    driver.back()
                    '''
                    #variable to check if there is a next page for the topic
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                topics = topicPages(html)
                for topic in topics:
                    has_next_topic_page = True
                    counter = 1
                    page = topic

                    # check if there is a next page for the topics
                    while has_next_topic_page:
                        # try to access next page of th topic
                        itemURL = urlparse.urljoin(baseURL, str(item))
                        itemURL = urlparse.urljoin(baseURL, str(page))
                        try:
                            driver.get(itemURL)
                        except:
                            driver.refresh()
                        savePage(driver.page_source, item)
                        savePage(driver.page_source, topic + f"page{counter}")

                        # if there is a next page then go and save....
                        # next page in the topic?
                        try:
                            temp = driver.find_element(By.XPATH, '/html/body/div/div[2]/div/div[2]/div') # /html/body/div/div[2]/div/div[2]/div/
                            item = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href') #/html/body/div/div[2]/div/div[2]/div
                        # comment out
                        if counter == 2:
                            break

                        try:
                            temp = driver.find_element(By.XPATH, '/html/body/div[2]/div[2]/div[2]/div[3]')
                            item = temp.find_element(by=By.CLASS_NAME, value='button button-icon-only').get_attribute('href')
                            if item == "":
                                raise NoSuchElementException
                                has_next_topic_page = False
                            else:
                                counter += 1
                            counter += 1

                        except NoSuchElementException:
                            has_next_topic_page = False

                    # end of loop
                    for i in range(counter):
                        driver.back()
                    '''

                    # comment out
                    break

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
                    link = driver.find_element(by=By.XPATH, value = '/html/body/div[2]/div[2]/div[2]/div[2]/ul/li[9]/a').get_attribute('href')

                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -280,10 +260,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime


    input("Crawling AbyssForum forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/AnonymousMarketplace/crawler_selenium.py
+++ b/MarketPlaces/AnonymousMarketplace/crawler_selenium.py
@ -202,24 +202,23 @@ def crawlForum(driver):
    print("Crawling the AnonymousMarketplace market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -231,24 +230,17 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    # break
                    break

                # comment out
                # if count == 20:
                #     count = 0
                #     break
                if count == 1:
                    break

                #left in in case site changes
                try:
                    link = ""
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -258,9 +250,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling AnonymousMarketplace forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/Apocalypse/crawler_selenium.py
+++ b/MarketPlaces/Apocalypse/crawler_selenium.py
@ -216,24 +216,23 @@ def crawlForum(driver):
    print("Crawling the Apocalypse market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -245,11 +244,10 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    # break
                    break

                # comment out
                if count == 20:
                    count = 0
                if count == 1:
                    break

                try:
@ -257,12 +255,6 @@ def crawlForum(driver):
                        '/html/body/div[1]/div/div[2]/nav/ul/li[5]/a').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -272,9 +264,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling Apocalypse forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/BlackPyramid/crawler_selenium.py
+++ b/MarketPlaces/BlackPyramid/crawler_selenium.py
@ -220,26 +220,25 @@ def crawlForum(driver):
    print("Crawling the BlackPyramid market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                clicker = driver.find_element(by=By.XPATH, value='/html/body/div[2]/form/nav/nav/ul/li[2]/div/a')
                clicker.click() # open tab with url
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    clicker = driver.find_element(by=By.XPATH, value='/html/body/div[2]/form/nav/nav/ul/li[2]/div/a')
                    clicker.click()  # open tab with url
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -255,7 +254,6 @@ def crawlForum(driver):

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
@ -263,12 +261,6 @@ def crawlForum(driver):
                        '/html/body/center/div[4]/div/div[3]/div[23]/div[2]/input[1]')
                    if clicker == "":
                        raise NoSuchElementException
                    try:
                        clicker.click()
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -278,9 +270,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling BlackPyramid forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/CityMarket/crawler_selenium.py
+++ b/MarketPlaces/CityMarket/crawler_selenium.py
@ -221,24 +221,23 @@ def crawlForum(driver):
    print("Crawling the CityMarket market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -254,7 +253,6 @@ def crawlForum(driver):

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
@ -262,12 +260,6 @@ def crawlForum(driver):
                        '/html/body/div[1]/div/div[2]/nav/ul/li[5]/a').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -277,9 +269,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling CityMarket forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/CypherMarketplace/crawler_selenium.py
+++ b/MarketPlaces/CypherMarketplace/crawler_selenium.py
@ -214,24 +214,23 @@ def crawlForum(driver):
    print("Crawling the CypherMarketplace market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -247,7 +246,6 @@ def crawlForum(driver):

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
@ -256,12 +254,6 @@ def crawlForum(driver):
                    link = temp.find_element(by=By.TAG_NAME, value='page-link').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -271,9 +263,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling CypherMarketplace forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/DarkFox/crawler_selenium.py
+++ b/MarketPlaces/DarkFox/crawler_selenium.py
@ -239,46 +239,47 @@ def crawlForum(driver):
    print("Crawling the DarkFox market")

    linksToCrawl = getInterestedLinks()
    # visited = set(linksToCrawl)
    # initialTime = time.time()
    count = 0

    i = 0
    while i < len(linksToCrawl):
        if count >= 500:
            break
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = str(item)
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
                    except:
                        driver.refresh()
                    savePage(driver.page_source, item)
                    driver.back()
                    count += 1

                    # comment out
                    break

                # comment out
                if count == 0:
                    break

                try:
                    link = driver.find_element(by=By.XPATH, value=
                        '/html/body/main/div/div[2]/div/div[2]/div/div/div/nav/a[2]').get_attribute('href')
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    if link == "":
                        raise NoSuchElementException
                    count += 1

                except NoSuchElementException:
                    has_next_page = False

@ -286,9 +287,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling BestCardingWorld forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/DarkMatter/crawler_selenium.py
+++ b/MarketPlaces/DarkMatter/crawler_selenium.py
@ -205,26 +205,24 @@ def crawlForum(driver):
    print("Crawling the DarkMatter market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                list = productPages(html)
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
@ -239,7 +237,6 @@ def crawlForum(driver):

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
@ -248,12 +245,6 @@ def crawlForum(driver):
                    link = a.get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -263,9 +254,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling DarkMatter forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/DarkTor/crawler_selenium.py
+++ b/MarketPlaces/DarkTor/crawler_selenium.py
@ -201,24 +201,23 @@ def crawlForum(driver):
    print("Crawling the DarkTor market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -230,23 +229,16 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    # break
                    break

                # comment out
                if count == 30:
                    count = 0
                if count == 1:
                    break

                try:
                    link = ""
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -256,9 +248,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling DarkTor forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/DigitalThriftShop/crawler_selenium.py
+++ b/MarketPlaces/DigitalThriftShop/crawler_selenium.py
@ -204,24 +204,23 @@ def crawlForum(driver):
    print("Crawling the DigitalThriftShop market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -237,7 +236,6 @@ def crawlForum(driver):

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
@ -245,12 +243,6 @@ def crawlForum(driver):
                        '/html/body/div[1]/div[2]/div/div[2]/main/div[1]/nav/ul/li[5]/a').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -260,9 +252,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling DigitalThriftShop forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/LionMarketplace/crawler_selenium.py
+++ b/MarketPlaces/LionMarketplace/crawler_selenium.py
@ -212,24 +212,23 @@ def crawlForum(driver):
    print("Crawling the LionMarketplace market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -245,7 +244,6 @@ def crawlForum(driver):

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
@ -253,12 +251,6 @@ def crawlForum(driver):
                        '/html/body/div[2]/div[2]/div/div[2]/nav/ul/li[5]/a').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -268,9 +260,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling LionMarketplace forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/M00nkeyMarket/crawler_selenium.py
+++ b/MarketPlaces/M00nkeyMarket/crawler_selenium.py
@ -215,24 +215,23 @@ def crawlForum(driver):
    print("Crawling the M00nkeyMarket market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(BASE_URL, str(item))
@ -244,24 +243,16 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    # break
                    break

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
                    link = driver.find_element(by=By.LINK_TEXT, value='Next ›').get_attribute('href')

                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -271,9 +262,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling M00nkeyMarket done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/MikesGrandStore/crawler_selenium.py
+++ b/MarketPlaces/MikesGrandStore/crawler_selenium.py
@ -227,24 +227,23 @@ def crawlForum(driver):
    print("Crawling the MikesGrandStore market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -256,24 +255,17 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    # break
                    break

                # comment out
                # if count == 1:
                #     count = 0
                #     break
                if count == 1:
                    break

                try:
                    link = driver.find_element(by=By.XPATH, value=
                        '/html/body/div[1]/main/div/div[1]/div/div[3]/nav/ul/li[6]/a').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -283,9 +275,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling MikesGrandStore forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/ThiefWorld/crawler_selenium.py
+++ b/MarketPlaces/ThiefWorld/crawler_selenium.py
@ -211,24 +211,23 @@ def crawlForum(driver):
    print("Crawling the ThiefWorld market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -243,8 +242,7 @@ def crawlForum(driver):
                    break

                # comment out
                if count == 20:
                    count = 0
                if count == 1:
                    break

                try:
@ -252,12 +250,6 @@ def crawlForum(driver):
                        '/html/body/div/div[1]/div/div/div[2]/div[3]/div/ul/li[13]/a').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -267,9 +259,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling ThiefWorld forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/Tor2door/crawler_selenium.py
+++ b/MarketPlaces/Tor2door/crawler_selenium.py
@ -228,25 +228,23 @@ def crawlForum(driver):
    print("Crawling the Tor2door market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    i = 0
    count = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)

        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -256,12 +254,12 @@ def crawlForum(driver):
                        driver.refresh()
                    savePage(driver.page_source, item)
                    driver.back()

                    # comment out
                    break

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
@ -269,15 +267,8 @@ def crawlForum(driver):
                        '/html/body/main/div/div/div[2]/div[11]/div/nav')
                    a = nav.find_element(by=By.LINK_TEXT, value="›")
                    link = a.get_attribute('href')

                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -287,9 +278,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling Tor2door market done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/TorBay/crawler_selenium.py
+++ b/MarketPlaces/TorBay/crawler_selenium.py
@ -198,24 +198,23 @@ def crawlForum(driver):
    print("Crawling the  TorBay Market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -226,25 +225,18 @@ def crawlForum(driver):
                    savePage(driver.page_source, item)
                    driver.back()

                #     #comment out
                #     break
                #
                # # # comment out
                # if count == 1:
                #    count = 0
                #    break
                    # comment out
                    break

                # comment out
                if count == 1:
                   break

                try:
                    link = driver.find_element(by=By.XPATH, value=
                        '/html/body/section/div/div/div[2]/div/div[2]/ul/li[3]/a').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -254,9 +246,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling TorBay forum done sucessfully. Press ENTER to continue\n")


--- a/MarketPlaces/TorMarket/crawler_selenium.py
+++ b/MarketPlaces/TorMarket/crawler_selenium.py
@ -201,24 +201,23 @@ def crawlForum(driver):
    print("Crawling the TorMarket market")

    linksToCrawl = getInterestedLinks()
    visited = set(linksToCrawl)
    initialTime = time.time()

    count = 0
    i = 0
    while i < len(linksToCrawl):
        link = linksToCrawl[i]
        print('Crawling :', link)
        try:
            try:
                driver.get(link)
            except:
                driver.refresh()
            html = driver.page_source
            savePage(html, link)

            has_next_page = True
            count = 0

            while has_next_page:
                try:
                    driver.get(link)
                except:
                    driver.refresh()
                html = driver.page_source
                savePage(html, link)

                list = productPages(html)
                for item in list:
                    itemURL = urlparse.urljoin(baseURL, str(item))
@ -234,7 +233,6 @@ def crawlForum(driver):

                # comment out
                if count == 1:
                    count = 0
                    break

                try:
@ -242,12 +240,6 @@ def crawlForum(driver):
                        '/html/body/div[2]/div/div/div[1]/main/nav/ul/li[5]/a').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    try:
                        driver.get(link)
                    except:
                        driver.refresh()
                    html = driver.page_source
                    savePage(html, link)
                    count += 1

                except NoSuchElementException:
@ -257,9 +249,6 @@ def crawlForum(driver):
            print(link, e)
        i += 1

    # finalTime = time.time()
    # print finalTime - initialTime

    input("Crawling TorMarket forum done sucessfully. Press ENTER to continue\n")