Browse Source

finished fully running completed OnniForums

main
westernmeadow 1 year ago
parent
commit
414dc9114b
2 changed files with 59 additions and 54 deletions
  1. +36
    -32
      Forums/OnniForums/crawler_selenium.py
  2. +23
    -22
      Forums/OnniForums/parser.py

+ 36
- 32
Forums/OnniForums/crawler_selenium.py View File

@ -49,14 +49,13 @@ def startCrawling():
def login(driver): def login(driver):
#click login button #click login button
login_link = driver.find_element( login_link = driver.find_element(
by=By.XPATH, value='/html/body/div/div[2]/div/table/tbody/tr[2]/td/center/pre/strong/a').\
get_attribute('href')
by=By.XPATH, value='/html/body/div/div[1]/div[2]/div[1]/div/span/a[1]').get_attribute('href')
driver.get(login_link) driver.get(login_link)
#entering username and password into input boxes #entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[2]/td[2]/input') usernameBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[2]/td[2]/input')
#Username here #Username here
usernameBox.send_keys('purely_cabbage')
usernameBox.send_keys('cabbage_purely')
passwordBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[3]/td[2]/input') passwordBox = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/form/table/tbody/tr[3]/td[2]/input')
#Password here #Password here
passwordBox.send_keys('$ourP@tchK1ds') passwordBox.send_keys('$ourP@tchK1ds')
@ -176,26 +175,26 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# # Hacking & Cracking tutorials
# Hacking & Cracking tutorials
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-tutorials') links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-tutorials')
# Hacking & Cracking questions # Hacking & Cracking questions
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-questions')
# # Exploit PoCs
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Exploit-PoCs')
# # Cracked software
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Cracked-software')
# # Malware-development
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Malware-development')
# # Carding & Fraud
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Carding-Fraud')
# # Darknet Discussions
# links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=88')
# # OPSEC
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-OPSEC')
# # Databases
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Databases')
# # Proxies
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Proxies')
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-questions')
# Exploit PoCs
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Exploit-PoCs')
# sellers
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Sellers')
# buyers questions
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Buyers-Questions')
# combo lists
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Combo-lists')
# Malware-development
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Malware-development')
# coding
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Coding')
# Carding & Fraud
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Carding-Fraud')
# OPSEC
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-OPSEC--13')
return links return links
@ -235,13 +234,13 @@ def crawlForum(driver):
driver.refresh() driver.refresh()
savePage(driver, driver.page_source, topic + f"page{counter}") # very important savePage(driver, driver.page_source, topic + f"page{counter}") # very important
# comment out
if counter == 2:
break
# # comment out
# if counter == 2:
# break
try: try:
temp = driver.find_element(By.XPATH,'/html/body/div/div[2]/div/div[3]/div') # /html/body/div/div[2]/div/div[2]/div/
page = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href') # /html/body/div/div[2]/div/div[2]/div
temp = driver.find_element(by=By.CLASS_NAME, value='float_left')
page = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')
if page == "": if page == "":
raise NoSuchElementException raise NoSuchElementException
@ -253,15 +252,20 @@ def crawlForum(driver):
for j in range(counter): for j in range(counter):
driver.back() driver.back()
# comment out
# break
try:
driver.get(link)
except:
driver.refresh()
# comment out
if count == 1:
break
# # comment out
# break
#
# # comment out
# if count == 1:
# break
try: try:
temp = driver.find_element(by=By.XPATH, value='/html/body/div/div[2]/div/div[3]/div') # /html/body/div/div[2]/div/div[3]/div
temp = driver.find_element(by=By.CLASS_NAME, value='float_left')
link = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href') link = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href')
if link == "": if link == "":


+ 23
- 22
Forums/OnniForums/parser.py View File

@ -79,34 +79,35 @@ def onniForums_description_parser(soup: BeautifulSoup) -> tuple:
# Append a "-1" to `feedbacks` array since they don't exists on this forum # Append a "-1" to `feedbacks` array since they don't exists on this forum
feedbacks.append("-1") feedbacks.append("-1")
date_posted: str = topic.find("span", {"class": "post_date"}).text
date_posted_cleaned = cleanString(date_posted.split(",")[0])
today = datetime.now()
if date_posted_cleaned == 'Yesterday':
date_object = today - timedelta(days=1)
elif date_posted_cleaned.find('hour') > 0:
hours_ago = int(date_posted_cleaned.split(' ')[0])
date_object = today - timedelta(hours=hours_ago)
elif date_posted_cleaned.find('minute') > 0:
minutes_ago = int(date_posted_cleaned.split(' ')[0])
date_object = today - timedelta(minutes=minutes_ago)
date_posted = topic.find("span", {"class": "post_date"}).text.strip()
if 'modified' in date_posted:
date_posted = date_posted.split('(')[0].strip()
if 'Today' in date_posted or 'Yesterday' in date_posted:
day = topic.find("span", {"class": "post_date"}).find('span').get('title').strip()
time = date_posted.split(',')[1].strip()
date_posted = day + ', ' + time
date_object = datetime.strptime(date_posted, "%m-%d-%Y, %I:%M %p")
elif 'hour' in date_posted or 'minute' in date_posted:
date_posted = topic.find("span", {"class": "post_date"}).find('span').get('title').strip()
date_object = datetime.strptime(date_posted, "%m-%d-%Y, %I:%M %p")
else: else:
date_object = datetime.strptime(date_posted_cleaned, "%m-%d-%Y")
date_object = datetime.strptime(date_posted, "%m-%d-%Y, %I:%M %p")
addDates.append(date_object) addDates.append(date_object)
image_post.append("-1") image_post.append("-1")
img = topic.find('div', {"class": "author_avatar"}).find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]
avatar = topic.find('div', {"class": "author_avatar"})
if avatar is not None:
img = avatar.find('img')
if img is not None:
img = img.get('src').split('base64,')[-1]
else:
img = '-1'
else: else:
img = "-1" img = "-1"
image_user.append(img) image_user.append(img)


Loading…
Cancel
Save