|
|
@ -39,19 +39,22 @@ def zeroday_description_parser(soup): |
|
|
|
|
|
|
|
# Finding Vendor |
|
|
|
div_vendor = soup.find('div', {'class': "exploit_view_table_user_content"}) |
|
|
|
vendor = div_vendor.find('a').text.strip() |
|
|
|
vendor = div_vendor.find('a').text |
|
|
|
vendor = cleanString(vendor).strip() |
|
|
|
|
|
|
|
# Finding Vendor Rating (bug in their system shows standard rating) |
|
|
|
div_rating = div_vendor.find_next_sibling('div') |
|
|
|
rating_vendor = div_rating.find_all('div', {'class': "td"})[1].text |
|
|
|
rating_vendor = cleanNumbers(rating_vendor).strip() |
|
|
|
|
|
|
|
# Finding Product Name |
|
|
|
div_name = soup.find('div', {'class': "exploit_title"}) |
|
|
|
name = div_name.find('h1', {'class': "YellowText"}).text |
|
|
|
name = name.strip() |
|
|
|
name = cleanString(name).strip() |
|
|
|
|
|
|
|
# Finding Product description |
|
|
|
describe = soup.find('meta', attrs={'name': "description"}).get("content") |
|
|
|
describe = cleanString(describe).strip() |
|
|
|
|
|
|
|
# Searching for CVE and MS categories |
|
|
|
cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}')) |
|
|
@ -62,6 +65,7 @@ def zeroday_description_parser(soup): |
|
|
|
CVE += " " |
|
|
|
CVE = CVE.replace(',', ' ') |
|
|
|
CVE = CVE.replace('\n', '') |
|
|
|
CVE = CVE.replace('\t', '') |
|
|
|
ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}')) |
|
|
|
if ms: |
|
|
|
MS = " " |
|
|
@ -73,7 +77,8 @@ def zeroday_description_parser(soup): |
|
|
|
|
|
|
|
# Finding category |
|
|
|
div_category = soup.find('div', {'class': "td"}, text="Category").find_next_sibling('div', {'class': "td"}) |
|
|
|
category = div_category.text.strip() |
|
|
|
category = div_category.text |
|
|
|
category = cleanString(category).strip() |
|
|
|
|
|
|
|
# Finding views |
|
|
|
div_views = soup.find('div', {'class': "td"}, text="Views").find_next_sibling('div', {'class': "td"}) |
|
|
@ -82,6 +87,7 @@ def zeroday_description_parser(soup): |
|
|
|
# Finding added date |
|
|
|
div_date = soup.find('div', {'class': 'td'}, text='Date add').find_next_sibling('div', {'class': "td"}) |
|
|
|
addDate = div_date.text.strip() |
|
|
|
# addDate = datetime.strptime(addDate, "%d-%m-%Y") |
|
|
|
|
|
|
|
# Finding BTC and USD/GOLD |
|
|
|
div_price = soup.find('div', {'class': "td"}, text="Price") |
|
|
@ -111,7 +117,7 @@ def zeroday_description_parser(soup): |
|
|
|
def zeroday_listing_parser(soup): |
|
|
|
# Fields to be parsed |
|
|
|
nm = 0 # *Total_Products (Should be Integer) |
|
|
|
mktName = "0Days" # 0 *Marketplace_Name y |
|
|
|
mktName = "ZeroDay" # 0 *Marketplace_Name y |
|
|
|
vendor = [] # 1 *Vendor y |
|
|
|
rating_vendor = [] # 2 Vendor_Rating y |
|
|
|
success = [] # 3 Vendor_Successful_Transactions n |
|
|
@ -142,16 +148,19 @@ def zeroday_listing_parser(soup): |
|
|
|
|
|
|
|
for listing in listings: |
|
|
|
# Finding the vendor name |
|
|
|
vendor_name = listing.find('a', href=lambda href: href and '/author/' in href).text.strip() |
|
|
|
vendor_name = listing.find('a', href=lambda href: href and '/author/' in href).text |
|
|
|
vendor_name = cleanString(vendor_name).strip() |
|
|
|
vendor.append(vendor_name) |
|
|
|
|
|
|
|
# Finding the vendor rating |
|
|
|
vendor_level_div = listing.find('div', {'class': "tips_bl"}) |
|
|
|
vendor_level = vendor_level_div.find('div', {'class': "tips_value_big"}).text |
|
|
|
vendor_level = cleanNumbers(vendor_level) |
|
|
|
rating_vendor.append(vendor_level) |
|
|
|
|
|
|
|
# Finding the product name |
|
|
|
product_name = listing.find('h3').text.strip() |
|
|
|
product_name = listing.find('h3').text |
|
|
|
product_name = cleanString(product_name).strip() |
|
|
|
name.append(product_name) |
|
|
|
|
|
|
|
# Searching for CVE and MS categories |
|
|
@ -165,6 +174,7 @@ def zeroday_listing_parser(soup): |
|
|
|
cee += " " |
|
|
|
cee = cee.replace(',', ' ') |
|
|
|
cee = cee.replace('\n', '') |
|
|
|
cee = cee.replace('\t', '') |
|
|
|
cveValue = cee |
|
|
|
CVE.append(cveValue) |
|
|
|
|
|
|
@ -182,7 +192,8 @@ def zeroday_listing_parser(soup): |
|
|
|
MS.append(MSValue) |
|
|
|
|
|
|
|
# Finding the category |
|
|
|
category_text = listing.find_all('div', {'class': "td"})[2].text.strip() |
|
|
|
category_text = listing.find_all('div', {'class': "td"})[2].text |
|
|
|
category_text = cleanString(category_text).strip() |
|
|
|
category.append(category_text) |
|
|
|
|
|
|
|
# Finding the hrefs |
|
|
@ -190,11 +201,12 @@ def zeroday_listing_parser(soup): |
|
|
|
href.append(description_link) |
|
|
|
|
|
|
|
# Finding the views |
|
|
|
views_text = listing.find_all('div', {'class': "td"})[3].text.strip() |
|
|
|
views_text = listing.find_all('div', {'class': "td"})[3].text.replace(' ', '').strip() |
|
|
|
views.append(views_text) |
|
|
|
|
|
|
|
# Finding the date added |
|
|
|
date = listing.find('div', {'class': "td"}).find('a').text.strip() |
|
|
|
# date = datetime.strptime(date, "%d-%m-%Y") |
|
|
|
addDate.append(date) |
|
|
|
|
|
|
|
# Finding the BTC and USD/GOLD |
|
|
|