diff --git a/MarketPlaces/GoFish/crawler_selenium.py b/MarketPlaces/GoFish/crawler_selenium.py index 42f7bfa..7ce4cee 100644 --- a/MarketPlaces/GoFish/crawler_selenium.py +++ b/MarketPlaces/GoFish/crawler_selenium.py @@ -31,17 +31,17 @@ baseURL = 'http://gofishbybookb4a2kvviuygmwjqfxx7nqsovweogs2cxvqvexhe7edyd.onion def startCrawling(): mktName = getMKTName() - driver = getAccess() - - if driver != 'down': - try: - login(driver) - crawlForum(driver) - except Exception as e: - print(driver.current_url, e) - closeDriver(driver) - - # new_parse(mktName, baseURL, True) + # driver = getAccess() + # + # if driver != 'down': + # try: + # login(driver) + # crawlForum(driver) + # except Exception as e: + # print(driver.current_url, e) + # closeDriver(driver) + + new_parse(mktName, baseURL, True) # Returns the name of the website diff --git a/MarketPlaces/GoFish/parser.py b/MarketPlaces/GoFish/parser.py index de10035..b1e5c0e 100644 --- a/MarketPlaces/GoFish/parser.py +++ b/MarketPlaces/GoFish/parser.py @@ -2,7 +2,6 @@ __author__ = 'DarkWeb' # Here, we are importing the auxiliary functions to clean or convert data from MarketPlaces.Utilities.utilities import * - # Here, we are importing BeautifulSoup to search through the HTML tree from bs4 import BeautifulSoup @@ -37,67 +36,75 @@ def gofish_description_parser(soup): vendor_image = "-1" # 20 Vendor_Image # Finding Product Name - divmb = soup.findAll('div', {'class': "mb-1"}) + divmb = soup.find('div', {'class': "p-3 mb-1 fs-3 fw-bold border border-2 bg-white rounded"}) - name = divmb[0].text + name = divmb.text name = name.replace('\n', ' ') name = name.replace(",", "") name = name.strip() # Finding Vendor - vendor = divmb[1].find('a').text.strip() + vendor = soup.find('div', {'class': 'my-1'}).text.strip() # Finding Vendor Rating - temp = soup.find('div', {'class': ""}).text - temp = temp.split('(') - rating = temp[0].replace("Vendor's Review : ", "") - rating = rating.replace("%", "") - rating_vendor = rating.strip() + # temp = soup.find('div', {'class': ""}).text + # temp = temp.split('(') + # rating = temp[0].replace("Vendor's Review : ", "") + # rating = rating.replace("%", "") + # rating_vendor = rating.strip() # Finding the Product Rating and Number of Product Reviews - reviews = temp[2].replace(" review)", "") - reviews = reviews.strip() + # reviews = temp[2].replace(" review)", "") + # reviews = reviews.strip() - temp = temp[1].split(")") - rating = temp[1].replace("Product Review : ", "") - rating = rating.replace("%", "") - rating_item = rating.strip() + # temp = temp[1].split(")") + # rating = temp[1].replace("Product Review : ", "") + # rating = rating.replace("%", "") + # rating_item = rating.strip() # Finding Prices - USD = soup.find('div', {'class': "h3 text-primary"}).text.strip() + precios = soup.findall('td', {'class': "text-end text-nowrap"}) + USD = precios[0].text.strip().replace('$', '') # Finding the Product Category - pmb = soup.findAll('p', {'class': "mb-1"}) + # pmb = soup.findAll('p', {'class': "mb-1"}) - category = pmb[-1].text - category = category.replace("Category: ", "").strip() + # category = pmb[-1].text + # category = category.replace("Category: ", "").strip() # Finding the Product Quantity Available - left = divmb[-1].text - left = left.split(",", 1)[1] - left = left.replace("in stock", "") - left = left.strip() + # left = divmb[-1].text + # left = left.split(",", 1)[1] + # left = left.replace("in stock", "") + # left = left.strip() # Finding Number Sold - sold = divmb[-1].text - sold = sold.split(",", 1)[0] - sold = sold.replace("sold", "") - sold = sold.strip() + # sold = divmb[-1].text + # sold = sold.split(",", 1)[0] + # sold = sold.replace("sold", "") + # sold = sold.strip() # Finding Shipment Information (Origin) - pmb[0].text - shipFrom = shipFrom.replace("Ships from: ", "").strip() + origin = soup.findall('div', {'class': "p-3 mt-2 mb-3 border border-2 bg-white rounded"}) + origin = origin[0].text.strip() + + shipFrom = origin[1:].strip() # Finding Shipment Information (Destination) - pmb[1].text - shipTo = shipTo.replace("Ships to: ", "").strip() + dest = soup.findall('div', {'class': 'p-3 mb-3 overflow-auto border border-2 bg-white rounded'}) + dest = dest[-1].text.strip() + + shipTo = dest[1:].strip() # Finding the Product description - cardbody = soup.findAll('div', {'class': "card-body"}) - describe = cardbody[1].text.strip() + cardbody = soup.findAll('div', {'class': "p-3 mb-3 overflow-auto border border-2 bg-white rounded"}) + describe = cardbody[0].text + describe = describe.replace('\n', ' ') + describe = describe.strip() # Finding Product Image - image = soup.find('div', {'class': 'product-primary'}).find('img') + img = soup.findall('figure', {'class': 'image-feature'})[0] + image = img.find('img', {'class': 'image-block rounded'}) image = image.get('src') image = image.split('base64,')[-1] @@ -159,21 +166,20 @@ def gofish_listing_parser(soup): image_vendor = [] # 21 Vendor_Image href = [] # 22 Product_Links - listing = soup.findAll('div', {"id": "itembox"}) + listing = soup.find('tbody', {"class": "border border-2 align-middle"}).findall('tr') # Populating the Number of Products nm = len(listing) for a in listing: bae = a.findAll('a', href=True) - lb = a.findAll('div', {"id": "littlebox"}) # Adding the url to the list of urls link = bae[0].get('href') href.append(link) # Finding the Product - product = lb[1].find('a').text + product = bae[1].text product = product.replace('\n', ' ') product = product.replace(",", "") product = product.replace("...", "") @@ -181,19 +187,19 @@ def gofish_listing_parser(soup): name.append(product) # Finding Product Image - product_image = a.find('img') + product_image = bae[0].find('img') product_image = product_image.get('src') product_image = product_image.split('base64,')[-1] image.append(product_image) # Finding Prices - price = lb[-1].find('div', {"class": "mb-1"}).text + price = a.find('span', {"class": "fw-bold text-nowrap"}).text price = price.replace("$","") price = price.strip() USD.append(price) # Finding the Vendor - vendor_name = lb[-1].find("a").text + vendor_name = bae[-1].text vendor_name = vendor_name.replace(",", "") vendor_name = vendor_name.strip() vendor.append(vendor_name) @@ -201,35 +207,35 @@ def gofish_listing_parser(soup): image_vendor.append("-1") # Finding the Category - cat = lb[-1].find("span").text - cat = cat.replace("class:", "") - cat = cat.strip() - category.append(cat) + # cat = lb[-1].find("span").text + # cat = cat.replace("class:", "") + # cat = cat.strip() + # category.append(cat) - span = lb[0].findAll("span") + # span = lb[0].findAll("span") # Finding Number of Views - num = span[0].text - num = num.replace("views:", "") - num = num.strip() - sold.append(num) + # num = span[0].text + # num = num.replace("views:", "") + # num = num.strip() + # sold.append(num) # Finding Number Sold - num = span[2].text - num = num.replace("Sold:", "") - num = num.strip() - sold.append(num) + # num = span[2].text + # num = num.replace("Sold:", "") + # num = num.strip() + # sold.append(num) # Finding Quantity Left - quant = span[1].text - quant = quant.replace("stock:", "") - quant = quant.strip() - qLeft.append(quant) + # quant = span[1].text + # quant = quant.replace("stock:", "") + # quant = quant.strip() + # qLeft.append(quant) # add shipping information - ship = lb[2].findAll('small')[1].findAll('span')[1].text.split("->") - shipFrom.append(ship[0].replace("Ship from ", "").strip()) - shipTo.append(ship[1].replace("to ", "").strip()) + # ship = lb[2].findAll('small')[1].findAll('span')[1].text.split("->") + # shipFrom.append(ship[0].replace("Ship from ", "").strip()) + # shipTo.append(ship[1].replace("to ", "").strip()) # Searching for CVE and MS categories diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py index 79a2bdc..4e3ab4a 100644 --- a/MarketPlaces/Initialization/prepare_parser.py +++ b/MarketPlaces/Initialization/prepare_parser.py @@ -19,6 +19,7 @@ from MarketPlaces.DarkBazar.parser import * from MarketPlaces.Sonanza.parser import * from MarketPlaces.Kingdom.parser import * from MarketPlaces.Ares.parser import * +from MarketPlaces.GoFish.parser import * from MarketPlaces.Classifier.classify_product import predict @@ -142,6 +143,8 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile): rw = sonanza_listing_parser(soup) elif marketPlace == "Kingdom": rw = kingdom_listing_parser(soup) + elif marketPlace == "GoFish": + rw = gofish_listing_parser(soup) else: print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!") raise Exception @@ -184,6 +187,8 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile): rmm = sonanza_description_parser(soup) elif marketPlace == "Kingdom": rmm = kingdom_description_parser(soup) + elif marketPlace == "GoFish": + rmm = gofish_description_parser(soup) else: print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!") raise Exception