diff --git a/Forums/OnniForums/parser.py b/Forums/OnniForums/parser.py index b190732..2eebccf 100644 --- a/Forums/OnniForums/parser.py +++ b/Forums/OnniForums/parser.py @@ -13,7 +13,7 @@ from bs4 import BeautifulSoup # This is the method to parse the Description Pages (one page to each topic in the Listing Pages) -def onniForums_description_parser(soup: BeautifulSoup): +def onniForums_description_parser(soup: BeautifulSoup) -> tuple: topicName: str = "-1" # 0 *topic name users : List[str] = [] # 1 *all users of each post diff --git a/MarketPlaces/Classifier/__pycache__/__init__.cpython-310.pyc b/MarketPlaces/Classifier/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..595e7da Binary files /dev/null and b/MarketPlaces/Classifier/__pycache__/__init__.cpython-310.pyc differ diff --git a/MarketPlaces/Classifier/__pycache__/classify_product.cpython-310.pyc b/MarketPlaces/Classifier/__pycache__/classify_product.cpython-310.pyc new file mode 100644 index 0000000..b4854a3 Binary files /dev/null and b/MarketPlaces/Classifier/__pycache__/classify_product.cpython-310.pyc differ diff --git a/MarketPlaces/Classifier/__pycache__/transformer.cpython-310.pyc b/MarketPlaces/Classifier/__pycache__/transformer.cpython-310.pyc new file mode 100644 index 0000000..bbeee01 Binary files /dev/null and b/MarketPlaces/Classifier/__pycache__/transformer.cpython-310.pyc differ diff --git a/MarketPlaces/DB_Connection/__pycache__/__init__.cpython-310.pyc b/MarketPlaces/DB_Connection/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..7cfd2ff Binary files /dev/null and b/MarketPlaces/DB_Connection/__pycache__/__init__.cpython-310.pyc differ diff --git a/MarketPlaces/DB_Connection/__pycache__/db_connection.cpython-310.pyc b/MarketPlaces/DB_Connection/__pycache__/db_connection.cpython-310.pyc new file mode 100644 index 0000000..7312e64 Binary files /dev/null and b/MarketPlaces/DB_Connection/__pycache__/db_connection.cpython-310.pyc differ diff --git a/MarketPlaces/DarkFox/__pycache__/crawler_selenium.cpython-310.pyc b/MarketPlaces/DarkFox/__pycache__/crawler_selenium.cpython-310.pyc new file mode 100644 index 0000000..857483e Binary files /dev/null and b/MarketPlaces/DarkFox/__pycache__/crawler_selenium.cpython-310.pyc differ diff --git a/MarketPlaces/DarkFox/__pycache__/parser.cpython-310.pyc b/MarketPlaces/DarkFox/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000..d9b0dd9 Binary files /dev/null and b/MarketPlaces/DarkFox/__pycache__/parser.cpython-310.pyc differ diff --git a/MarketPlaces/Initialization/__pycache__/__init__.cpython-310.pyc b/MarketPlaces/Initialization/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..7d9bdba Binary files /dev/null and b/MarketPlaces/Initialization/__pycache__/__init__.cpython-310.pyc differ diff --git a/MarketPlaces/Initialization/__pycache__/markets_mining.cpython-310.pyc b/MarketPlaces/Initialization/__pycache__/markets_mining.cpython-310.pyc new file mode 100644 index 0000000..df340ae Binary files /dev/null and b/MarketPlaces/Initialization/__pycache__/markets_mining.cpython-310.pyc differ diff --git a/MarketPlaces/Initialization/__pycache__/prepare_parser.cpython-310.pyc b/MarketPlaces/Initialization/__pycache__/prepare_parser.cpython-310.pyc new file mode 100644 index 0000000..878de36 Binary files /dev/null and b/MarketPlaces/Initialization/__pycache__/prepare_parser.cpython-310.pyc differ diff --git a/MarketPlaces/LionMarketplace/__pycache__/crawler_selenium.cpython-310.pyc b/MarketPlaces/LionMarketplace/__pycache__/crawler_selenium.cpython-310.pyc new file mode 100644 index 0000000..6f7a60a Binary files /dev/null and b/MarketPlaces/LionMarketplace/__pycache__/crawler_selenium.cpython-310.pyc differ diff --git a/MarketPlaces/LionMarketplace/__pycache__/parser.cpython-310.pyc b/MarketPlaces/LionMarketplace/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000..88c61cf Binary files /dev/null and b/MarketPlaces/LionMarketplace/__pycache__/parser.cpython-310.pyc differ diff --git a/MarketPlaces/MikesGrandStore/__pycache__/crawler_selenium.cpython-310.pyc b/MarketPlaces/MikesGrandStore/__pycache__/crawler_selenium.cpython-310.pyc new file mode 100644 index 0000000..023f754 Binary files /dev/null and b/MarketPlaces/MikesGrandStore/__pycache__/crawler_selenium.cpython-310.pyc differ diff --git a/MarketPlaces/MikesGrandStore/__pycache__/parser.cpython-310.pyc b/MarketPlaces/MikesGrandStore/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000..c1282fd Binary files /dev/null and b/MarketPlaces/MikesGrandStore/__pycache__/parser.cpython-310.pyc differ diff --git a/MarketPlaces/MikesGrandStore/crawler_selenium.py b/MarketPlaces/MikesGrandStore/crawler_selenium.py index 5cba41b..1631e7b 100644 --- a/MarketPlaces/MikesGrandStore/crawler_selenium.py +++ b/MarketPlaces/MikesGrandStore/crawler_selenium.py @@ -33,19 +33,19 @@ baseURL = 'http://4yx2akutmkhwfgzlpdxiah7cknurw6vlddlq24fxa3r3ebophwgpvhyd.onion # Opens Tor Browser, crawls the website, then parses, then closes tor #acts like the main method for the crawler, another function at the end of this code calls this function later def startCrawling(): - opentor() - # mktName = getMKTName() - driver = getAccess() - - if driver != 'down': - try: - login(driver) - crawlForum(driver) - except Exception as e: - print(driver.current_url, e) - closetor(driver) - - # new_parse(mktName, baseURL, False) + # opentor() + mktName = getMKTName() + # driver = getAccess() + + # if driver != 'down': + # try: + # login(driver) + # crawlForum(driver) + # except Exception as e: + # print(driver.current_url, e) + # closetor(driver) + + new_parse(mktName, baseURL, False) # Opens Tor Browser diff --git a/MarketPlaces/MikesGrandStore/parser.py b/MarketPlaces/MikesGrandStore/parser.py index 4810ed6..f6ae641 100644 --- a/MarketPlaces/MikesGrandStore/parser.py +++ b/MarketPlaces/MikesGrandStore/parser.py @@ -1,14 +1,88 @@ __author__ = 'DarkWeb' # Here, we are importing the auxiliary functions to clean or convert data +from typing import List, Tuple from MarketPlaces.Utilities.utilities import * # Here, we are importing BeautifulSoup to search through the HTML tree from bs4 import BeautifulSoup -def mikesGrandStore_description_parser(soup: BeautifulSoup): - pass +def mikesGrandStore_description_parser(soup: BeautifulSoup) -> Tuple: + + name = "-1" # 0 Product_Name + describe = "-1" # 1 Product_Description + lastSeen = "-1" # 2 Product_LastViewDate + rules = "-1" # 3 NOT USED ... + CVE = "-1" # 4 Product_CVE_Classification (Common Vulnerabilities and Exposures) + MS = "-1" # 5 Product_MS_Classification (Microsoft Security) + review = "-1" # 6 Product_Number_Of_Reviews + category = "-1" # 7 Product_Category + shipFrom = "-1" # 8 Product_ShippedFrom + shipTo = "-1" # 9 Product_ShippedTo + left = "-1" # 10 Product_QuantityLeft + escrow = "-1" # 11 Vendor_Warranty + terms = "-1" # 12 Vendor_TermsAndConditions + vendor = "-1" # 13 Vendor_Name + sold = "-1" # 14 Product_QuantitySold + addDate = "-1" # 15 Product_AddedDate + available = "-1" # 16 NOT USED ... + endDate = "-1" # 17 NOT USED ... + BTC = "-1" # 18 Product_BTC_SellingPrice + USD = "-1" # 19 Product_USD_SellingPrice + rating = "-1" # 20 Vendor_Rating + success = "-1" # 21 Vendor_Successful_Transactions + EURO = "-1" # 22 Product_EURO_SellingPrice + + + name: str = soup.find("h1", {"class": "product-title product_title entry-title"}).text + + describe = soup.find("div", {"id": "tab-description"}).text + + commentsList: List[BeautifulSoup] = soup.find("ol", {"class": "commentlist"}).find_all("li") + + if len(commentsList) > 0: + lastReview: BeautifulSoup = commentsList[0] + lastSeen = lastReview.find("time").get("datetime").text + + reviewTab: str = soup.find('a', {'href': '#tab-reivews'}).text + review = reviewTab.split('(')[1].split(')')[0] + + navbarBreadcrumbs: List[BeautifulSoup] = soup.find('nav', {'class': 'woocommerce-breadcrumb breadcrumbs '}).find_all('a') + category = navbarBreadcrumbs[1].text + + USD = soup.find("div", {"class": "price-wrapper"}).text + + reviewStats: str = soup.find("div", {"class": "star-rating"}).text + rating = reviewStats.split(' ')[1] + + row = ( + name, + describe, + lastSeen, + rules, + CVE, + MS, + review, + category, + shipFrom, + shipTo, + left, + escrow, + terms, + vendor, + sold, + addDate, + available, + endDate, + BTC, + USD, + rating, + success, + EURO + ) + + return row def mikesGtrandStore_listing_parser(soup: BeautifulSoup): diff --git a/MarketPlaces/MikesGrandStore/testing.py b/MarketPlaces/MikesGrandStore/testing.py new file mode 100644 index 0000000..e69de29 diff --git a/MarketPlaces/ThiefWorld/__pycache__/crawler_selenium.cpython-310.pyc b/MarketPlaces/ThiefWorld/__pycache__/crawler_selenium.cpython-310.pyc new file mode 100644 index 0000000..fe4535a Binary files /dev/null and b/MarketPlaces/ThiefWorld/__pycache__/crawler_selenium.cpython-310.pyc differ diff --git a/MarketPlaces/ThiefWorld/__pycache__/parser.cpython-310.pyc b/MarketPlaces/ThiefWorld/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000..348f6dd Binary files /dev/null and b/MarketPlaces/ThiefWorld/__pycache__/parser.cpython-310.pyc differ diff --git a/MarketPlaces/Tor2door/__pycache__/crawler_selenium.cpython-310.pyc b/MarketPlaces/Tor2door/__pycache__/crawler_selenium.cpython-310.pyc new file mode 100644 index 0000000..8036c23 Binary files /dev/null and b/MarketPlaces/Tor2door/__pycache__/crawler_selenium.cpython-310.pyc differ diff --git a/MarketPlaces/Tor2door/__pycache__/parser.cpython-310.pyc b/MarketPlaces/Tor2door/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000..65dd6fd Binary files /dev/null and b/MarketPlaces/Tor2door/__pycache__/parser.cpython-310.pyc differ diff --git a/MarketPlaces/TorBay/__pycache__/crawler_selenium.cpython-310.pyc b/MarketPlaces/TorBay/__pycache__/crawler_selenium.cpython-310.pyc new file mode 100644 index 0000000..b8bb9ee Binary files /dev/null and b/MarketPlaces/TorBay/__pycache__/crawler_selenium.cpython-310.pyc differ diff --git a/MarketPlaces/TorBay/__pycache__/parser.cpython-310.pyc b/MarketPlaces/TorBay/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000..b0cb498 Binary files /dev/null and b/MarketPlaces/TorBay/__pycache__/parser.cpython-310.pyc differ diff --git a/MarketPlaces/TorMarket/__pycache__/crawler_selenium.cpython-310.pyc b/MarketPlaces/TorMarket/__pycache__/crawler_selenium.cpython-310.pyc new file mode 100644 index 0000000..c5525a1 Binary files /dev/null and b/MarketPlaces/TorMarket/__pycache__/crawler_selenium.cpython-310.pyc differ diff --git a/MarketPlaces/TorMarket/__pycache__/parser.cpython-310.pyc b/MarketPlaces/TorMarket/__pycache__/parser.cpython-310.pyc new file mode 100644 index 0000000..a3e0dba Binary files /dev/null and b/MarketPlaces/TorMarket/__pycache__/parser.cpython-310.pyc differ diff --git a/MarketPlaces/Utilities/__pycache__/__init__.cpython-310.pyc b/MarketPlaces/Utilities/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..614ec53 Binary files /dev/null and b/MarketPlaces/Utilities/__pycache__/__init__.cpython-310.pyc differ diff --git a/MarketPlaces/Utilities/__pycache__/utilities.cpython-310.pyc b/MarketPlaces/Utilities/__pycache__/utilities.cpython-310.pyc new file mode 100644 index 0000000..9937050 Binary files /dev/null and b/MarketPlaces/Utilities/__pycache__/utilities.cpython-310.pyc differ diff --git a/MarketPlaces/__pycache__/__init__.cpython-310.pyc b/MarketPlaces/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..93a6668 Binary files /dev/null and b/MarketPlaces/__pycache__/__init__.cpython-310.pyc differ