Browse Source

minor naming fix and used cleanString and cleanNumbers

main
Kimtaiyo Mech 11 months ago
parent
commit
fa8cb71d37
2 changed files with 21 additions and 9 deletions
  1. +1
    -1
      MarketPlaces/Initialization/prepare_parser.py
  2. +20
    -8
      MarketPlaces/ZeroDay/parser.py

+ 1
- 1
MarketPlaces/Initialization/prepare_parser.py View File

@ -162,7 +162,7 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
elif marketPlace == "GoFish": elif marketPlace == "GoFish":
rw = gofish_listing_parser(soup) rw = gofish_listing_parser(soup)
elif marketPlace == "ZeroDay": elif marketPlace == "ZeroDay":
rw = zeroday_listing_parser()
rw = zeroday_listing_parser(soup)
else: else:
print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!") print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
raise Exception raise Exception


+ 20
- 8
MarketPlaces/ZeroDay/parser.py View File

@ -39,19 +39,22 @@ def zeroday_description_parser(soup):
# Finding Vendor # Finding Vendor
div_vendor = soup.find('div', {'class': "exploit_view_table_user_content"}) div_vendor = soup.find('div', {'class': "exploit_view_table_user_content"})
vendor = div_vendor.find('a').text.strip()
vendor = div_vendor.find('a').text
vendor = cleanString(vendor).strip()
# Finding Vendor Rating (bug in their system shows standard rating) # Finding Vendor Rating (bug in their system shows standard rating)
div_rating = div_vendor.find_next_sibling('div') div_rating = div_vendor.find_next_sibling('div')
rating_vendor = div_rating.find_all('div', {'class': "td"})[1].text rating_vendor = div_rating.find_all('div', {'class': "td"})[1].text
rating_vendor = cleanNumbers(rating_vendor).strip()
# Finding Product Name # Finding Product Name
div_name = soup.find('div', {'class': "exploit_title"}) div_name = soup.find('div', {'class': "exploit_title"})
name = div_name.find('h1', {'class': "YellowText"}).text name = div_name.find('h1', {'class': "YellowText"}).text
name = name.strip()
name = cleanString(name).strip()
# Finding Product description # Finding Product description
describe = soup.find('meta', attrs={'name': "description"}).get("content") describe = soup.find('meta', attrs={'name': "description"}).get("content")
describe = cleanString(describe).strip()
# Searching for CVE and MS categories # Searching for CVE and MS categories
cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}')) cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
@ -62,6 +65,7 @@ def zeroday_description_parser(soup):
CVE += " " CVE += " "
CVE = CVE.replace(',', ' ') CVE = CVE.replace(',', ' ')
CVE = CVE.replace('\n', '') CVE = CVE.replace('\n', '')
CVE = CVE.replace('\t', '')
ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}')) ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}'))
if ms: if ms:
MS = " " MS = " "
@ -73,7 +77,8 @@ def zeroday_description_parser(soup):
# Finding category # Finding category
div_category = soup.find('div', {'class': "td"}, text="Category").find_next_sibling('div', {'class': "td"}) div_category = soup.find('div', {'class': "td"}, text="Category").find_next_sibling('div', {'class': "td"})
category = div_category.text.strip()
category = div_category.text
category = cleanString(category).strip()
# Finding views # Finding views
div_views = soup.find('div', {'class': "td"}, text="Views").find_next_sibling('div', {'class': "td"}) div_views = soup.find('div', {'class': "td"}, text="Views").find_next_sibling('div', {'class': "td"})
@ -82,6 +87,7 @@ def zeroday_description_parser(soup):
# Finding added date # Finding added date
div_date = soup.find('div', {'class': 'td'}, text='Date add').find_next_sibling('div', {'class': "td"}) div_date = soup.find('div', {'class': 'td'}, text='Date add').find_next_sibling('div', {'class': "td"})
addDate = div_date.text.strip() addDate = div_date.text.strip()
# addDate = datetime.strptime(addDate, "%d-%m-%Y")
# Finding BTC and USD/GOLD # Finding BTC and USD/GOLD
div_price = soup.find('div', {'class': "td"}, text="Price") div_price = soup.find('div', {'class': "td"}, text="Price")
@ -111,7 +117,7 @@ def zeroday_description_parser(soup):
def zeroday_listing_parser(soup): def zeroday_listing_parser(soup):
# Fields to be parsed # Fields to be parsed
nm = 0 # *Total_Products (Should be Integer) nm = 0 # *Total_Products (Should be Integer)
mktName = "0Days" # 0 *Marketplace_Name y
mktName = "ZeroDay" # 0 *Marketplace_Name y
vendor = [] # 1 *Vendor y vendor = [] # 1 *Vendor y
rating_vendor = [] # 2 Vendor_Rating y rating_vendor = [] # 2 Vendor_Rating y
success = [] # 3 Vendor_Successful_Transactions n success = [] # 3 Vendor_Successful_Transactions n
@ -142,16 +148,19 @@ def zeroday_listing_parser(soup):
for listing in listings: for listing in listings:
# Finding the vendor name # Finding the vendor name
vendor_name = listing.find('a', href=lambda href: href and '/author/' in href).text.strip()
vendor_name = listing.find('a', href=lambda href: href and '/author/' in href).text
vendor_name = cleanString(vendor_name).strip()
vendor.append(vendor_name) vendor.append(vendor_name)
# Finding the vendor rating # Finding the vendor rating
vendor_level_div = listing.find('div', {'class': "tips_bl"}) vendor_level_div = listing.find('div', {'class': "tips_bl"})
vendor_level = vendor_level_div.find('div', {'class': "tips_value_big"}).text vendor_level = vendor_level_div.find('div', {'class': "tips_value_big"}).text
vendor_level = cleanNumbers(vendor_level)
rating_vendor.append(vendor_level) rating_vendor.append(vendor_level)
# Finding the product name # Finding the product name
product_name = listing.find('h3').text.strip()
product_name = listing.find('h3').text
product_name = cleanString(product_name).strip()
name.append(product_name) name.append(product_name)
# Searching for CVE and MS categories # Searching for CVE and MS categories
@ -165,6 +174,7 @@ def zeroday_listing_parser(soup):
cee += " " cee += " "
cee = cee.replace(',', ' ') cee = cee.replace(',', ' ')
cee = cee.replace('\n', '') cee = cee.replace('\n', '')
cee = cee.replace('\t', '')
cveValue = cee cveValue = cee
CVE.append(cveValue) CVE.append(cveValue)
@ -182,7 +192,8 @@ def zeroday_listing_parser(soup):
MS.append(MSValue) MS.append(MSValue)
# Finding the category # Finding the category
category_text = listing.find_all('div', {'class': "td"})[2].text.strip()
category_text = listing.find_all('div', {'class': "td"})[2].text
category_text = cleanString(category_text).strip()
category.append(category_text) category.append(category_text)
# Finding the hrefs # Finding the hrefs
@ -190,11 +201,12 @@ def zeroday_listing_parser(soup):
href.append(description_link) href.append(description_link)
# Finding the views # Finding the views
views_text = listing.find_all('div', {'class': "td"})[3].text.strip()
views_text = listing.find_all('div', {'class': "td"})[3].text.replace(' ', '').strip()
views.append(views_text) views.append(views_text)
# Finding the date added # Finding the date added
date = listing.find('div', {'class': "td"}).find('a').text.strip() date = listing.find('div', {'class': "td"}).find('a').text.strip()
# date = datetime.strptime(date, "%d-%m-%Y")
addDate.append(date) addDate.append(date)
# Finding the BTC and USD/GOLD # Finding the BTC and USD/GOLD


Loading…
Cancel
Save