From d413f48b9e8702412d1d6d0551a319bbe4e47b10 Mon Sep 17 00:00:00 2001 From: westernmeadow Date: Mon, 18 Sep 2023 17:14:16 -0700 Subject: [PATCH] finished fully running completed Libre --- Forums/Libre/crawler_selenium.py | 32 ++++++++++++++++++-------------- Forums/Libre/parser.py | 4 ++-- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/Forums/Libre/crawler_selenium.py b/Forums/Libre/crawler_selenium.py index 58274ec..6e8f43b 100644 --- a/Forums/Libre/crawler_selenium.py +++ b/Forums/Libre/crawler_selenium.py @@ -181,12 +181,16 @@ def getNameFromURL(url): def getInterestedLinks(): links = [] - # # cyber security + # cybersecurity links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/CyberSecurity') - # # services - # links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/Services') - # # programming - # links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/Programming') + # services + links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/Services') + # programming + links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/Programming') + # jobs for crypto + links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/JobsforCypto') + # darknet markets + links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/DarkNetMarkets') return links @@ -226,9 +230,9 @@ def crawlForum(driver): driver.refresh() savePage(driver, driver.page_source, topic + f"page{counter}") # very important - # comment out - if counter == 2: - break + # # comment out + # if counter == 2: + # break try: page = "" # no next page so far may have some later on @@ -242,12 +246,12 @@ def crawlForum(driver): for j in range(counter): driver.back() - # comment out - # break - - # comment out - if count == 1: - break + # # comment out + # break + # + # # comment out + # if count == 1: + # break try: link = driver.find_element(by=By.LINK_TEXT, value='>').get_attribute('href') diff --git a/Forums/Libre/parser.py b/Forums/Libre/parser.py index 80aa790..16113f7 100644 --- a/Forums/Libre/parser.py +++ b/Forums/Libre/parser.py @@ -182,7 +182,7 @@ def libre_listing_parser(soup): image_author.append("-1") # Adding the url to the list of urls - link_to_clean = itopic.find("a", {"class": "link text-xl text-zinc-300"}).get("href") + link_to_clean = itopic.find('div', {'class': 'flex space-x-2 items-center'}).find('a').get('href') href.append(link_to_clean) @@ -239,7 +239,7 @@ def libre_listing_parser(soup): def libre_links_parser(soup): # Returning all links that should be visited by the Crawler href = [] - listing = soup.find_all('div', {"class": "flex-grow p-2 text-justify"}) + listing = soup.find("div", {"class", "space-y-2 mt-4"}).find_all('div', {"class": "flex box"}) for a in listing: link = a.find('div', {'class': 'flex space-x-2 items-center'}).find('a').get('href')