From 2e34fe2e7d2447b9694dc8b1e805ed703f494df6 Mon Sep 17 00:00:00 2001 From: chris Date: Mon, 30 Oct 2023 00:33:48 -0700 Subject: [PATCH] Cleaned up some test comments in crawler and parser --- MarketPlaces/BlackPyramid/crawler_selenium.py | 4 - MarketPlaces/BlackPyramid/parser.py | 91 ------------------- 2 files changed, 95 deletions(-) diff --git a/MarketPlaces/BlackPyramid/crawler_selenium.py b/MarketPlaces/BlackPyramid/crawler_selenium.py index cf93b4a..6f7e45a 100644 --- a/MarketPlaces/BlackPyramid/crawler_selenium.py +++ b/MarketPlaces/BlackPyramid/crawler_selenium.py @@ -204,7 +204,6 @@ def goToPage(driver, page): time.sleep(10) # click - #xpath = "//input[@value='" + page + "']" xpath = "//input[@name='" + page + "']" link = driver.find_element(By.XPATH, xpath) time.sleep(1) @@ -286,9 +285,6 @@ def crawlForum(driver): if not nav.is_enabled(): raise NoSuchElementException try: - # block obscuring element - #element = driver.find_element(by=By.XPATH, value="//input[@class='tei39950693']") - #driver.execute_script("arguments[0].style.visibility='hidden'", element) # select next page pgnum = uiClasses.Select(driver.find_element(by=By.XPATH, value="//select[@name='pageination']")) print("pg options:", pgnum.options) diff --git a/MarketPlaces/BlackPyramid/parser.py b/MarketPlaces/BlackPyramid/parser.py index 5224c1e..4b45ee7 100644 --- a/MarketPlaces/BlackPyramid/parser.py +++ b/MarketPlaces/BlackPyramid/parser.py @@ -283,94 +283,3 @@ def BlackPyramid_links_parser(soup): href.append(link) return href - - -import glob -import os -import codecs -import shutil -import traceback - -if __name__ == '__main__': - nError = 0 - marketPlace = 'BlackPyramid' - - lines = [] # listing pages - lns = [] # description pages - detPage = {} - - ''' - # reading description pages - count = 0 - for fileDescription in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\10222023\\Description", '*.html')): - count += 1 - lns.append(fileDescription) - # if count > 5: - # break - - for index, line2 in enumerate(lns): - - print("Reading description folder of '" + marketPlace + "', file '" + os.path.basename(line2) + "', index= " + str(index + 1) + " ... " + str(len(lns))) - - try: - html = codecs.open(line2.strip('\n'), encoding='utf8') - soup = BeautifulSoup(html, "html.parser") - html.close() - except: - - try: - html = open(line2.strip('\n')) - soup = BeautifulSoup(html, "html.parser") - html.close() - except: - - nError += 1 - print("There was a problem to read the file " + line2 + " in the Description section!") - # if createLog: - # logFile.write(str(nError) + ". There was a problem to read the file " + line2 + " in the Description section.\n") - continue - - try: - print(BlackPyramid_description_parser(soup)) - except: - traceback.print_exc() - print("There was a problem to parse the file " + line2 + " in the Description section!") - ''' - # reading listing pages - count = 0 - for fileListing in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\10222023\\Listing", '*.html')): - count += 1 - lines.append(fileListing) - # if count > 1: - # break - - for index, line1 in enumerate(lines): - - print("Reading listing folder of '" + marketPlace + "', file '" + os.path.basename(line1) + "', index= " + str( - index + 1) + " ... " + str(len(lines))) - - readError = False - try: - html = codecs.open(line1.strip('\n'), encoding='utf8') - soup = BeautifulSoup(html, "html.parser") - html.close() - except: - try: - html = open(line1.strip('\n')) - soup = BeautifulSoup(html, "html.parser") - html.close() - except: - print("There was a problem to read the file " + line1 + " in the Listing section!") - readError = True - - if not readError: - - parseError = False - try: - print(BlackPyramid_listing_parser(soup)) - except: - traceback.print_exc() - print("There was a problem to parse the file " + line1 + " in the listing section!") - parseError = True - - print("DONE") \ No newline at end of file