Browse Source

finished fully running completed Altenens and Cardingleaks

main
westernmeadow 1 year ago
parent
commit
41f3735787
5 changed files with 62 additions and 50 deletions
  1. +14
    -12
      Forums/Altenens/crawler_selenium.py
  2. +4
    -2
      Forums/Altenens/parser.py
  3. +13
    -9
      Forums/Cardingleaks/crawler_selenium.py
  4. +12
    -6
      Forums/Cardingleaks/parser.py
  5. +19
    -21
      Forums/OnniForums/crawler_selenium.py

+ 14
- 12
Forums/Altenens/crawler_selenium.py View File

@ -227,9 +227,9 @@ def crawlForum(driver):
driver.refresh()
savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:
break
# # comment out
# if counter == 2:
# break
try:
page = driver.find_element(By.LINK_TEXT, value='Next').get_attribute('href')
@ -240,15 +240,17 @@ def crawlForum(driver):
except NoSuchElementException:
has_next_topic_page = False
for j in range(counter):
driver.back()
# comment out
break
# comment out
if count == 1:
break
try:
driver.get(link)
except:
driver.refresh()
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try:
link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')


+ 4
- 2
Forums/Altenens/parser.py View File

@ -25,8 +25,10 @@ def altenens_description_parser(soup):
image_user = [] # 9 all user avatars of each post
image_post = [] # 10 all first images of each post
topic = soup.find("h1", {"class": "p-title-value"}).text
topic = cleanString(topic.strip())
etopic = soup.find("h1", {"class": "p-title-value"})
if etopic is not None:
topic = etopic.text
topic = cleanString(topic.strip())
body = soup.find('div', {"class": "block-container lbContainer"})
iposts = body.find_all('article', {"class": "message message--post js-post js-inlineModContainer"})


+ 13
- 9
Forums/Cardingleaks/crawler_selenium.py View File

@ -230,12 +230,16 @@ def crawlForum(driver):
driver.get(itemURL)
except:
driver.refresh()
savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:
if isListingLink(driver.current_url):
break
savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# # comment out
# if counter == 2:
# break
try:
page = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')
if page == "":
@ -248,12 +252,12 @@ def crawlForum(driver):
for j in range(counter):
driver.back()
# comment out
break
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try:
link = driver.find_element(by=By.LINK_TEXT, value='Next').get_attribute('href')


+ 12
- 6
Forums/Cardingleaks/parser.py View File

@ -109,8 +109,11 @@ def cardingleaks_listing_parser(soup: Tag):
li = soup.find("h1", {"class": "p-title-value"})
board = cleanString(li.text.strip())
thread_list = soup.find('div', {"class": "structItemContainer-group structItemContainer-group--sticky"}).find_all('div', {"data-author": True}) + \
soup.find("div", {"class": "structItemContainer-group js-threadList"}).find_all("div", {"data-author": True})
thread_list = soup.find("div", {"class": "structItemContainer-group js-threadList"}).find_all("div", {"data-author": True})
sticky = soup.find('div', {"class": "structItemContainer-group structItemContainer-group--sticky"})
if sticky is not None:
thread_list = sticky.find_all("div", {"data-author": True}) + thread_list
nm = len(thread_list)
@ -123,9 +126,12 @@ def cardingleaks_listing_parser(soup: Tag):
author_icon = thread.find("a", {"class": "avatar avatar--s"})
if author_icon is not None:
author_icon = author_icon.find('img').get('src')
author_icon = author_icon.split('base64,')[-1]
image_user.append(author_icon)
author_icon = author_icon.find('img')
if author_icon is not None:
author_icon = author_icon.get('src').split('base64,')[-1]
image_user.append(author_icon)
else:
image_user.append('-1')
else:
image_user.append('-1')
@ -158,4 +164,4 @@ def cardingleaks_links_parser(soup):
href.append(link)
return href
return [href[-1]]

+ 19
- 21
Forums/OnniForums/crawler_selenium.py View File

@ -177,24 +177,24 @@ def getInterestedLinks():
# Hacking & Cracking tutorials
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-tutorials')
# Hacking & Cracking questions
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-questions')
# Exploit PoCs
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Exploit-PoCs')
# sellers
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Sellers')
# buyers questions
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Buyers-Questions')
# combo lists
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Combo-lists')
# Malware-development
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Malware-development')
# coding
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Coding')
# Carding & Fraud
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Carding-Fraud')
# OPSEC
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-OPSEC--13')
# # Hacking & Cracking questions
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-questions')
# # Exploit PoCs
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Exploit-PoCs')
# # sellers
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Sellers')
# # buyers questions
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Buyers-Questions')
# # combo lists
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Combo-lists')
# # Malware-development
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Malware-development')
# # coding
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Coding')
# # Carding & Fraud
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Carding-Fraud')
# # OPSEC
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-OPSEC--13')
return links
@ -249,9 +249,7 @@ def crawlForum(driver):
except NoSuchElementException:
has_next_topic_page = False
for j in range(counter):
driver.back()
# making sure we go back to the listing page (browser back button simulation)
try:
driver.get(link)
except:


Loading…
Cancel
Save