Browse Source

Cleaned up some test comments in crawler and parser

main
chris 1 year ago
parent
commit
2e34fe2e7d
2 changed files with 0 additions and 95 deletions
  1. +0
    -4
      MarketPlaces/BlackPyramid/crawler_selenium.py
  2. +0
    -91
      MarketPlaces/BlackPyramid/parser.py

+ 0
- 4
MarketPlaces/BlackPyramid/crawler_selenium.py View File

@ -204,7 +204,6 @@ def goToPage(driver, page):
time.sleep(10)
# click
#xpath = "//input[@value='" + page + "']"
xpath = "//input[@name='" + page + "']"
link = driver.find_element(By.XPATH, xpath)
time.sleep(1)
@ -286,9 +285,6 @@ def crawlForum(driver):
if not nav.is_enabled():
raise NoSuchElementException
try:
# block obscuring element
#element = driver.find_element(by=By.XPATH, value="//input[@class='tei39950693']")
#driver.execute_script("arguments[0].style.visibility='hidden'", element)
# select next page
pgnum = uiClasses.Select(driver.find_element(by=By.XPATH, value="//select[@name='pageination']"))
print("pg options:", pgnum.options)


+ 0
- 91
MarketPlaces/BlackPyramid/parser.py View File

@ -283,94 +283,3 @@ def BlackPyramid_links_parser(soup):
href.append(link)
return href
import glob
import os
import codecs
import shutil
import traceback
if __name__ == '__main__':
nError = 0
marketPlace = 'BlackPyramid'
lines = [] # listing pages
lns = [] # description pages
detPage = {}
'''
# reading description pages
count = 0
for fileDescription in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\10222023\\Description", '*.html')):
count += 1
lns.append(fileDescription)
# if count > 5:
# break
for index, line2 in enumerate(lns):
print("Reading description folder of '" + marketPlace + "', file '" + os.path.basename(line2) + "', index= " + str(index + 1) + " ... " + str(len(lns)))
try:
html = codecs.open(line2.strip('\n'), encoding='utf8')
soup = BeautifulSoup(html, "html.parser")
html.close()
except:
try:
html = open(line2.strip('\n'))
soup = BeautifulSoup(html, "html.parser")
html.close()
except:
nError += 1
print("There was a problem to read the file " + line2 + " in the Description section!")
# if createLog:
# logFile.write(str(nError) + ". There was a problem to read the file " + line2 + " in the Description section.\n")
continue
try:
print(BlackPyramid_description_parser(soup))
except:
traceback.print_exc()
print("There was a problem to parse the file " + line2 + " in the Description section!")
'''
# reading listing pages
count = 0
for fileListing in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\10222023\\Listing", '*.html')):
count += 1
lines.append(fileListing)
# if count > 1:
# break
for index, line1 in enumerate(lines):
print("Reading listing folder of '" + marketPlace + "', file '" + os.path.basename(line1) + "', index= " + str(
index + 1) + " ... " + str(len(lines)))
readError = False
try:
html = codecs.open(line1.strip('\n'), encoding='utf8')
soup = BeautifulSoup(html, "html.parser")
html.close()
except:
try:
html = open(line1.strip('\n'))
soup = BeautifulSoup(html, "html.parser")
html.close()
except:
print("There was a problem to read the file " + line1 + " in the Listing section!")
readError = True
if not readError:
parseError = False
try:
print(BlackPyramid_listing_parser(soup))
except:
traceback.print_exc()
print("There was a problem to parse the file " + line1 + " in the listing section!")
parseError = True
print("DONE")

Loading…
Cancel
Save