diff --git a/MarketPlaces/Initialization/prepare_parser.py b/MarketPlaces/Initialization/prepare_parser.py index a2a6279..a5a8d2c 100644 --- a/MarketPlaces/Initialization/prepare_parser.py +++ b/MarketPlaces/Initialization/prepare_parser.py @@ -162,7 +162,7 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile): elif marketPlace == "GoFish": rw = gofish_listing_parser(soup) elif marketPlace == "ZeroDay": - rw = zeroday_listing_parser() + rw = zeroday_listing_parser(soup) else: print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!") raise Exception diff --git a/MarketPlaces/ZeroDay/parser.py b/MarketPlaces/ZeroDay/parser.py index e3a50f5..7caafbe 100644 --- a/MarketPlaces/ZeroDay/parser.py +++ b/MarketPlaces/ZeroDay/parser.py @@ -39,19 +39,22 @@ def zeroday_description_parser(soup): # Finding Vendor div_vendor = soup.find('div', {'class': "exploit_view_table_user_content"}) - vendor = div_vendor.find('a').text.strip() + vendor = div_vendor.find('a').text + vendor = cleanString(vendor).strip() # Finding Vendor Rating (bug in their system shows standard rating) div_rating = div_vendor.find_next_sibling('div') rating_vendor = div_rating.find_all('div', {'class': "td"})[1].text + rating_vendor = cleanNumbers(rating_vendor).strip() # Finding Product Name div_name = soup.find('div', {'class': "exploit_title"}) name = div_name.find('h1', {'class': "YellowText"}).text - name = name.strip() + name = cleanString(name).strip() # Finding Product description describe = soup.find('meta', attrs={'name': "description"}).get("content") + describe = cleanString(describe).strip() # Searching for CVE and MS categories cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}')) @@ -62,6 +65,7 @@ def zeroday_description_parser(soup): CVE += " " CVE = CVE.replace(',', ' ') CVE = CVE.replace('\n', '') + CVE = CVE.replace('\t', '') ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}')) if ms: MS = " " @@ -73,7 +77,8 @@ def zeroday_description_parser(soup): # Finding category div_category = soup.find('div', {'class': "td"}, text="Category").find_next_sibling('div', {'class': "td"}) - category = div_category.text.strip() + category = div_category.text + category = cleanString(category).strip() # Finding views div_views = soup.find('div', {'class': "td"}, text="Views").find_next_sibling('div', {'class': "td"}) @@ -82,6 +87,7 @@ def zeroday_description_parser(soup): # Finding added date div_date = soup.find('div', {'class': 'td'}, text='Date add').find_next_sibling('div', {'class': "td"}) addDate = div_date.text.strip() + # addDate = datetime.strptime(addDate, "%d-%m-%Y") # Finding BTC and USD/GOLD div_price = soup.find('div', {'class': "td"}, text="Price") @@ -111,7 +117,7 @@ def zeroday_description_parser(soup): def zeroday_listing_parser(soup): # Fields to be parsed nm = 0 # *Total_Products (Should be Integer) - mktName = "0Days" # 0 *Marketplace_Name y + mktName = "ZeroDay" # 0 *Marketplace_Name y vendor = [] # 1 *Vendor y rating_vendor = [] # 2 Vendor_Rating y success = [] # 3 Vendor_Successful_Transactions n @@ -142,16 +148,19 @@ def zeroday_listing_parser(soup): for listing in listings: # Finding the vendor name - vendor_name = listing.find('a', href=lambda href: href and '/author/' in href).text.strip() + vendor_name = listing.find('a', href=lambda href: href and '/author/' in href).text + vendor_name = cleanString(vendor_name).strip() vendor.append(vendor_name) # Finding the vendor rating vendor_level_div = listing.find('div', {'class': "tips_bl"}) vendor_level = vendor_level_div.find('div', {'class': "tips_value_big"}).text + vendor_level = cleanNumbers(vendor_level) rating_vendor.append(vendor_level) # Finding the product name - product_name = listing.find('h3').text.strip() + product_name = listing.find('h3').text + product_name = cleanString(product_name).strip() name.append(product_name) # Searching for CVE and MS categories @@ -165,6 +174,7 @@ def zeroday_listing_parser(soup): cee += " " cee = cee.replace(',', ' ') cee = cee.replace('\n', '') + cee = cee.replace('\t', '') cveValue = cee CVE.append(cveValue) @@ -182,7 +192,8 @@ def zeroday_listing_parser(soup): MS.append(MSValue) # Finding the category - category_text = listing.find_all('div', {'class': "td"})[2].text.strip() + category_text = listing.find_all('div', {'class': "td"})[2].text + category_text = cleanString(category_text).strip() category.append(category_text) # Finding the hrefs @@ -190,11 +201,12 @@ def zeroday_listing_parser(soup): href.append(description_link) # Finding the views - views_text = listing.find_all('div', {'class': "td"})[3].text.strip() + views_text = listing.find_all('div', {'class': "td"})[3].text.replace(' ', '').strip() views.append(views_text) # Finding the date added date = listing.find('div', {'class': "td"}).find('a').text.strip() + # date = datetime.strptime(date, "%d-%m-%Y") addDate.append(date) # Finding the BTC and USD/GOLD