Browse Source

finished fully running completed Libre

main
westernmeadow 1 year ago
parent
commit
d413f48b9e
2 changed files with 20 additions and 16 deletions
  1. +18
    -14
      Forums/Libre/crawler_selenium.py
  2. +2
    -2
      Forums/Libre/parser.py

+ 18
- 14
Forums/Libre/crawler_selenium.py View File

@ -181,12 +181,16 @@ def getNameFromURL(url):
def getInterestedLinks():
links = []
# # cyber security
# cybersecurity
links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/CyberSecurity')
# # services
# links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/Services')
# # programming
# links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/Programming')
# services
links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/Services')
# programming
links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/Programming')
# jobs for crypto
links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/JobsforCypto')
# darknet markets
links.append('http://libreeunomyly6ot7kspglmbd5cvlkogib6rozy43r2glatc6rmwauqd.onion/c/DarkNetMarkets')
return links
@ -226,9 +230,9 @@ def crawlForum(driver):
driver.refresh()
savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:
break
# # comment out
# if counter == 2:
# break
try:
page = "" # no next page so far may have some later on
@ -242,12 +246,12 @@ def crawlForum(driver):
for j in range(counter):
driver.back()
# comment out
# break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try:
link = driver.find_element(by=By.LINK_TEXT, value='>').get_attribute('href')


+ 2
- 2
Forums/Libre/parser.py View File

@ -182,7 +182,7 @@ def libre_listing_parser(soup):
image_author.append("-1")
# Adding the url to the list of urls
link_to_clean = itopic.find("a", {"class": "link text-xl text-zinc-300"}).get("href")
link_to_clean = itopic.find('div', {'class': 'flex space-x-2 items-center'}).find('a').get('href')
href.append(link_to_clean)
@ -239,7 +239,7 @@ def libre_listing_parser(soup):
def libre_links_parser(soup):
# Returning all links that should be visited by the Crawler
href = []
listing = soup.find_all('div', {"class": "flex-grow p-2 text-justify"})
listing = soup.find("div", {"class", "space-y-2 mt-4"}).find_all('div', {"class": "flex box"})
for a in listing:
link = a.find('div', {'class': 'flex space-x-2 items-center'}).find('a').get('href')


Loading…
Cancel
Save