Cleaned up some test comments in crawler and parser

1 year ago · 2e34fe2e7d
--- a/MarketPlaces/BlackPyramid/crawler_selenium.py
+++ b/MarketPlaces/BlackPyramid/crawler_selenium.py
@ -204,7 +204,6 @@ def goToPage(driver, page):
    time.sleep(10)

    # click
    #xpath = "//input[@value='" + page + "']"
    xpath = "//input[@name='" + page + "']"
    link = driver.find_element(By.XPATH, xpath)
    time.sleep(1)
@ -286,9 +285,6 @@ def crawlForum(driver):
                    if not nav.is_enabled():
                        raise NoSuchElementException
                    try:
                        # block obscuring element
                        #element = driver.find_element(by=By.XPATH, value="//input[@class='tei39950693']")
                        #driver.execute_script("arguments[0].style.visibility='hidden'", element)
                        # select next page
                        pgnum = uiClasses.Select(driver.find_element(by=By.XPATH, value="//select[@name='pageination']"))
                        print("pg options:", pgnum.options)
--- a/MarketPlaces/BlackPyramid/parser.py
+++ b/MarketPlaces/BlackPyramid/parser.py
@ -283,94 +283,3 @@ def BlackPyramid_links_parser(soup):
            href.append(link)

    return href


 import glob
 import os
 import codecs
 import shutil
 import traceback

 if __name__ == '__main__':
    nError = 0
    marketPlace = 'BlackPyramid'

    lines = []  # listing pages
    lns = []  # description pages
    detPage = {}

    '''
    # reading description pages
    count = 0
    for fileDescription in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\10222023\\Description", '*.html')):
        count += 1
        lns.append(fileDescription)
        # if count > 5:
        #     break

    for index, line2 in enumerate(lns):

        print("Reading description folder of '" + marketPlace + "', file '" + os.path.basename(line2) + "', index= " + str(index + 1) + " ... " + str(len(lns)))

        try:
            html = codecs.open(line2.strip('\n'), encoding='utf8')
            soup = BeautifulSoup(html, "html.parser")
            html.close()
        except:

            try:
                html = open(line2.strip('\n'))
                soup = BeautifulSoup(html, "html.parser")
                html.close()
            except:

                nError += 1
                print("There was a problem to read the file " + line2 + " in the Description section!")
                # if createLog:
                #     logFile.write(str(nError) + ". There was a problem to read the file " + line2 + " in the Description section.\n")
                continue

        try:
            print(BlackPyramid_description_parser(soup))
        except:
            traceback.print_exc()
            print("There was a problem to parse the file " + line2 + " in the Description section!")
    '''
    # reading listing pages
    count = 0
    for fileListing in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\10222023\\Listing", '*.html')):
        count += 1
        lines.append(fileListing)
        # if count > 1:
        #    break

    for index, line1 in enumerate(lines):

        print("Reading listing folder of '" + marketPlace + "', file '" + os.path.basename(line1) + "', index= " + str(
            index + 1) + " ... " + str(len(lines)))

        readError = False
        try:
            html = codecs.open(line1.strip('\n'), encoding='utf8')
            soup = BeautifulSoup(html, "html.parser")
            html.close()
        except:
            try:
                html = open(line1.strip('\n'))
                soup = BeautifulSoup(html, "html.parser")
                html.close()
            except:
                print("There was a problem to read the file " + line1 + " in the Listing section!")
                readError = True

        if not readError:

            parseError = False
            try:
                print(BlackPyramid_listing_parser(soup))
            except:
                traceback.print_exc()
                print("There was a problem to parse the file " + line1 + " in the listing section!")
                parseError = True

    print("DONE")