From 3bed6a4f37244f8943107ac60faf6388bbf03abc Mon Sep 17 00:00:00 2001 From: Helium Date: Wed, 14 Jun 2023 12:49:17 -0700 Subject: [PATCH] finished onni --- Forums/CryptBB/crawler_selenium.py | 33 +++++++++++++++++++-------- Forums/OnniForums/crawler_selenium.py | 18 +++++++-------- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/Forums/CryptBB/crawler_selenium.py b/Forums/CryptBB/crawler_selenium.py index e510898..6c0ac9b 100644 --- a/Forums/CryptBB/crawler_selenium.py +++ b/Forums/CryptBB/crawler_selenium.py @@ -258,16 +258,31 @@ def crawlForum(driver): savePage(html, link) has_next_page = True + + #loop through the topics while has_next_page: - list = topicPages(html)#parses? + list = topicPages(html)# for multiple pages for item in list: - itemURL = urlparse.urljoin(baseURL, str(item)) - try: - driver.get(itemURL) - except: - driver.refresh() - savePage(driver.page_source, item) - driver.back() + #variable to check if there is a next page for the topic + has_next_topic_page = True + # check if there is a next page for the topics + while has_next_topic_page: + # try to access next page of th topic + itemURL = urlparse.urljoin(baseURL, str(item)) + try: + driver.get(itemURL) + except: + driver.refresh() + savePage(driver.page_source, item) + + # if there is a next page then go and save.... + # item = + + + + #end of loop + for i in count: + driver.back() # comment out break @@ -276,7 +291,7 @@ def crawlForum(driver): count = 0 break - try:# change depending on web page + try:# change depending on web page, #next page temp = driver.find_element(by=By.XPATH, value= '/html/body/div/div[2]/div/div[2]/div') link = temp.find_element(by=By.CLASS_NAME, value='pagination_next').get_attribute('href') diff --git a/Forums/OnniForums/crawler_selenium.py b/Forums/OnniForums/crawler_selenium.py index cfeab2f..ca895bd 100644 --- a/Forums/OnniForums/crawler_selenium.py +++ b/Forums/OnniForums/crawler_selenium.py @@ -190,23 +190,23 @@ def getInterestedLinks(): # Hacking & Cracking tutorials links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-tutorials') # Hacking & Cracking questions - # links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-questions') + links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-questions') # Exploit PoCs - # links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Exploit-PoCs') + links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Exploit-PoCs') # Cracked software - # links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Cracked-software') + links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Cracked-software') # Malware-development - # links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Malware-development') + links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Malware-development') # Carding & Fraud - # links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Carding-Fraud') + links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Carding-Fraud') # Darknet Discussions - # links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=88') + links.append('http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/forumdisplay.php?fid=88') # OPSEC - # links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-OPSEC') + links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-OPSEC') # Databases - # links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Databases') + links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Databases') # Proxies - # links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Proxies') + links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Proxies') return links