Browse Source

Finished with description parser for MikesGrandStore

main
Khoi 1 year ago
parent
commit
1bb7780f40
29 changed files with 90 additions and 16 deletions
  1. +1
    -1
      Forums/OnniForums/parser.py
  2. BIN
      MarketPlaces/Classifier/__pycache__/__init__.cpython-310.pyc
  3. BIN
      MarketPlaces/Classifier/__pycache__/classify_product.cpython-310.pyc
  4. BIN
      MarketPlaces/Classifier/__pycache__/transformer.cpython-310.pyc
  5. BIN
      MarketPlaces/DB_Connection/__pycache__/__init__.cpython-310.pyc
  6. BIN
      MarketPlaces/DB_Connection/__pycache__/db_connection.cpython-310.pyc
  7. BIN
      MarketPlaces/DarkFox/__pycache__/crawler_selenium.cpython-310.pyc
  8. BIN
      MarketPlaces/DarkFox/__pycache__/parser.cpython-310.pyc
  9. BIN
      MarketPlaces/Initialization/__pycache__/__init__.cpython-310.pyc
  10. BIN
      MarketPlaces/Initialization/__pycache__/markets_mining.cpython-310.pyc
  11. BIN
      MarketPlaces/Initialization/__pycache__/prepare_parser.cpython-310.pyc
  12. BIN
      MarketPlaces/LionMarketplace/__pycache__/crawler_selenium.cpython-310.pyc
  13. BIN
      MarketPlaces/LionMarketplace/__pycache__/parser.cpython-310.pyc
  14. BIN
      MarketPlaces/MikesGrandStore/__pycache__/crawler_selenium.cpython-310.pyc
  15. BIN
      MarketPlaces/MikesGrandStore/__pycache__/parser.cpython-310.pyc
  16. +13
    -13
      MarketPlaces/MikesGrandStore/crawler_selenium.py
  17. +76
    -2
      MarketPlaces/MikesGrandStore/parser.py
  18. +0
    -0
      MarketPlaces/MikesGrandStore/testing.py
  19. BIN
      MarketPlaces/ThiefWorld/__pycache__/crawler_selenium.cpython-310.pyc
  20. BIN
      MarketPlaces/ThiefWorld/__pycache__/parser.cpython-310.pyc
  21. BIN
      MarketPlaces/Tor2door/__pycache__/crawler_selenium.cpython-310.pyc
  22. BIN
      MarketPlaces/Tor2door/__pycache__/parser.cpython-310.pyc
  23. BIN
      MarketPlaces/TorBay/__pycache__/crawler_selenium.cpython-310.pyc
  24. BIN
      MarketPlaces/TorBay/__pycache__/parser.cpython-310.pyc
  25. BIN
      MarketPlaces/TorMarket/__pycache__/crawler_selenium.cpython-310.pyc
  26. BIN
      MarketPlaces/TorMarket/__pycache__/parser.cpython-310.pyc
  27. BIN
      MarketPlaces/Utilities/__pycache__/__init__.cpython-310.pyc
  28. BIN
      MarketPlaces/Utilities/__pycache__/utilities.cpython-310.pyc
  29. BIN
      MarketPlaces/__pycache__/__init__.cpython-310.pyc

+ 1
- 1
Forums/OnniForums/parser.py View File

@ -13,7 +13,7 @@ from bs4 import BeautifulSoup
# This is the method to parse the Description Pages (one page to each topic in the Listing Pages) # This is the method to parse the Description Pages (one page to each topic in the Listing Pages)
def onniForums_description_parser(soup: BeautifulSoup):
def onniForums_description_parser(soup: BeautifulSoup) -> tuple:
topicName: str = "-1" # 0 *topic name topicName: str = "-1" # 0 *topic name
users : List[str] = [] # 1 *all users of each post users : List[str] = [] # 1 *all users of each post


BIN
MarketPlaces/Classifier/__pycache__/__init__.cpython-310.pyc View File


BIN
MarketPlaces/Classifier/__pycache__/classify_product.cpython-310.pyc View File


BIN
MarketPlaces/Classifier/__pycache__/transformer.cpython-310.pyc View File


BIN
MarketPlaces/DB_Connection/__pycache__/__init__.cpython-310.pyc View File


BIN
MarketPlaces/DB_Connection/__pycache__/db_connection.cpython-310.pyc View File


BIN
MarketPlaces/DarkFox/__pycache__/crawler_selenium.cpython-310.pyc View File


BIN
MarketPlaces/DarkFox/__pycache__/parser.cpython-310.pyc View File


BIN
MarketPlaces/Initialization/__pycache__/__init__.cpython-310.pyc View File


BIN
MarketPlaces/Initialization/__pycache__/markets_mining.cpython-310.pyc View File


BIN
MarketPlaces/Initialization/__pycache__/prepare_parser.cpython-310.pyc View File


BIN
MarketPlaces/LionMarketplace/__pycache__/crawler_selenium.cpython-310.pyc View File


BIN
MarketPlaces/LionMarketplace/__pycache__/parser.cpython-310.pyc View File


BIN
MarketPlaces/MikesGrandStore/__pycache__/crawler_selenium.cpython-310.pyc View File


BIN
MarketPlaces/MikesGrandStore/__pycache__/parser.cpython-310.pyc View File


+ 13
- 13
MarketPlaces/MikesGrandStore/crawler_selenium.py View File

@ -33,19 +33,19 @@ baseURL = 'http://4yx2akutmkhwfgzlpdxiah7cknurw6vlddlq24fxa3r3ebophwgpvhyd.onion
# Opens Tor Browser, crawls the website, then parses, then closes tor # Opens Tor Browser, crawls the website, then parses, then closes tor
#acts like the main method for the crawler, another function at the end of this code calls this function later #acts like the main method for the crawler, another function at the end of this code calls this function later
def startCrawling(): def startCrawling():
opentor()
# mktName = getMKTName()
driver = getAccess()
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# new_parse(mktName, baseURL, False)
# opentor()
mktName = getMKTName()
# driver = getAccess()
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
new_parse(mktName, baseURL, False)
# Opens Tor Browser # Opens Tor Browser


+ 76
- 2
MarketPlaces/MikesGrandStore/parser.py View File

@ -1,14 +1,88 @@
__author__ = 'DarkWeb' __author__ = 'DarkWeb'
# Here, we are importing the auxiliary functions to clean or convert data # Here, we are importing the auxiliary functions to clean or convert data
from typing import List, Tuple
from MarketPlaces.Utilities.utilities import * from MarketPlaces.Utilities.utilities import *
# Here, we are importing BeautifulSoup to search through the HTML tree # Here, we are importing BeautifulSoup to search through the HTML tree
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
def mikesGrandStore_description_parser(soup: BeautifulSoup):
pass
def mikesGrandStore_description_parser(soup: BeautifulSoup) -> Tuple:
name = "-1" # 0 Product_Name
describe = "-1" # 1 Product_Description
lastSeen = "-1" # 2 Product_LastViewDate
rules = "-1" # 3 NOT USED ...
CVE = "-1" # 4 Product_CVE_Classification (Common Vulnerabilities and Exposures)
MS = "-1" # 5 Product_MS_Classification (Microsoft Security)
review = "-1" # 6 Product_Number_Of_Reviews
category = "-1" # 7 Product_Category
shipFrom = "-1" # 8 Product_ShippedFrom
shipTo = "-1" # 9 Product_ShippedTo
left = "-1" # 10 Product_QuantityLeft
escrow = "-1" # 11 Vendor_Warranty
terms = "-1" # 12 Vendor_TermsAndConditions
vendor = "-1" # 13 Vendor_Name
sold = "-1" # 14 Product_QuantitySold
addDate = "-1" # 15 Product_AddedDate
available = "-1" # 16 NOT USED ...
endDate = "-1" # 17 NOT USED ...
BTC = "-1" # 18 Product_BTC_SellingPrice
USD = "-1" # 19 Product_USD_SellingPrice
rating = "-1" # 20 Vendor_Rating
success = "-1" # 21 Vendor_Successful_Transactions
EURO = "-1" # 22 Product_EURO_SellingPrice
name: str = soup.find("h1", {"class": "product-title product_title entry-title"}).text
describe = soup.find("div", {"id": "tab-description"}).text
commentsList: List[BeautifulSoup] = soup.find("ol", {"class": "commentlist"}).find_all("li")
if len(commentsList) > 0:
lastReview: BeautifulSoup = commentsList[0]
lastSeen = lastReview.find("time").get("datetime").text
reviewTab: str = soup.find('a', {'href': '#tab-reivews'}).text
review = reviewTab.split('(')[1].split(')')[0]
navbarBreadcrumbs: List[BeautifulSoup] = soup.find('nav', {'class': 'woocommerce-breadcrumb breadcrumbs '}).find_all('a')
category = navbarBreadcrumbs[1].text
USD = soup.find("div", {"class": "price-wrapper"}).text
reviewStats: str = soup.find("div", {"class": "star-rating"}).text
rating = reviewStats.split(' ')[1]
row = (
name,
describe,
lastSeen,
rules,
CVE,
MS,
review,
category,
shipFrom,
shipTo,
left,
escrow,
terms,
vendor,
sold,
addDate,
available,
endDate,
BTC,
USD,
rating,
success,
EURO
)
return row
def mikesGtrandStore_listing_parser(soup: BeautifulSoup): def mikesGtrandStore_listing_parser(soup: BeautifulSoup):


+ 0
- 0
MarketPlaces/MikesGrandStore/testing.py View File


BIN
MarketPlaces/ThiefWorld/__pycache__/crawler_selenium.cpython-310.pyc View File


BIN
MarketPlaces/ThiefWorld/__pycache__/parser.cpython-310.pyc View File


BIN
MarketPlaces/Tor2door/__pycache__/crawler_selenium.cpython-310.pyc View File


BIN
MarketPlaces/Tor2door/__pycache__/parser.cpython-310.pyc View File


BIN
MarketPlaces/TorBay/__pycache__/crawler_selenium.cpython-310.pyc View File


BIN
MarketPlaces/TorBay/__pycache__/parser.cpython-310.pyc View File


BIN
MarketPlaces/TorMarket/__pycache__/crawler_selenium.cpython-310.pyc View File


BIN
MarketPlaces/TorMarket/__pycache__/parser.cpython-310.pyc View File


BIN
MarketPlaces/Utilities/__pycache__/__init__.cpython-310.pyc View File


BIN
MarketPlaces/Utilities/__pycache__/utilities.cpython-310.pyc View File


BIN
MarketPlaces/__pycache__/__init__.cpython-310.pyc View File


Loading…
Cancel
Save