minor naming fix and used cleanString and cleanNumbers

1 year ago · fa8cb71d37
--- a/MarketPlaces/Initialization/prepare_parser.py
+++ b/MarketPlaces/Initialization/prepare_parser.py
@ -162,7 +162,7 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
        elif marketPlace == "GoFish":
            rw = gofish_listing_parser(soup)
        elif marketPlace == "ZeroDay":
            rw = zeroday_listing_parser()
            rw = zeroday_listing_parser(soup)
        else:
            print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
            raise Exception
--- a/MarketPlaces/ZeroDay/parser.py
+++ b/MarketPlaces/ZeroDay/parser.py
@ -39,19 +39,22 @@ def zeroday_description_parser(soup):

    # Finding Vendor
    div_vendor = soup.find('div', {'class': "exploit_view_table_user_content"})
    vendor = div_vendor.find('a').text.strip()
    vendor = div_vendor.find('a').text
    vendor = cleanString(vendor).strip()

    # Finding Vendor Rating (bug in their system shows standard rating)
    div_rating = div_vendor.find_next_sibling('div')
    rating_vendor = div_rating.find_all('div', {'class': "td"})[1].text
    rating_vendor = cleanNumbers(rating_vendor).strip()

    # Finding Product Name
    div_name = soup.find('div', {'class': "exploit_title"})
    name = div_name.find('h1', {'class': "YellowText"}).text
    name = name.strip()
    name = cleanString(name).strip()

    # Finding Product description
    describe = soup.find('meta', attrs={'name': "description"}).get("content")
    describe = cleanString(describe).strip()

    # Searching for CVE and MS categories
    cve = soup.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
@ -62,6 +65,7 @@ def zeroday_description_parser(soup):
            CVE += "  "
            CVE = CVE.replace(',', ' ')
            CVE = CVE.replace('\n', '')
            CVE = CVE.replace('\t', '')
    ms = soup.findAll(text=re.compile('MS\d{2}-\d{3}'))
    if ms:
        MS = " "
@ -73,7 +77,8 @@ def zeroday_description_parser(soup):

    # Finding category
    div_category = soup.find('div', {'class': "td"}, text="Category").find_next_sibling('div', {'class': "td"})
    category = div_category.text.strip()
    category = div_category.text
    category = cleanString(category).strip()

    # Finding views
    div_views = soup.find('div', {'class': "td"}, text="Views").find_next_sibling('div', {'class': "td"})
@ -82,6 +87,7 @@ def zeroday_description_parser(soup):
    # Finding added date
    div_date = soup.find('div', {'class': 'td'}, text='Date add').find_next_sibling('div', {'class': "td"})
    addDate = div_date.text.strip()
    # addDate = datetime.strptime(addDate, "%d-%m-%Y")

    # Finding BTC and USD/GOLD
    div_price = soup.find('div', {'class': "td"}, text="Price")
@ -111,7 +117,7 @@ def zeroday_description_parser(soup):
 def zeroday_listing_parser(soup):
    # Fields to be parsed
    nm = 0  # *Total_Products (Should be Integer)
    mktName = "0Days"  # 0 *Marketplace_Name y
    mktName = "ZeroDay"  # 0 *Marketplace_Name y
    vendor = []  # 1 *Vendor y
    rating_vendor = []  # 2 Vendor_Rating y
    success = []  # 3 Vendor_Successful_Transactions n
@ -142,16 +148,19 @@ def zeroday_listing_parser(soup):

    for listing in listings:
        # Finding the vendor name
        vendor_name = listing.find('a', href=lambda href: href and '/author/' in href).text.strip()
        vendor_name = listing.find('a', href=lambda href: href and '/author/' in href).text
        vendor_name = cleanString(vendor_name).strip()
        vendor.append(vendor_name)

        # Finding the vendor rating
        vendor_level_div = listing.find('div', {'class': "tips_bl"})
        vendor_level = vendor_level_div.find('div', {'class': "tips_value_big"}).text
        vendor_level = cleanNumbers(vendor_level)
        rating_vendor.append(vendor_level)

        # Finding the product name
        product_name = listing.find('h3').text.strip()
        product_name = listing.find('h3').text
        product_name = cleanString(product_name).strip()
        name.append(product_name)

        # Searching for CVE and MS categories
@ -165,6 +174,7 @@ def zeroday_listing_parser(soup):
                cee += "  "
                cee = cee.replace(',', ' ')
                cee = cee.replace('\n', '')
                cee = cee.replace('\t', '')
            cveValue = cee
        CVE.append(cveValue)

@ -182,7 +192,8 @@ def zeroday_listing_parser(soup):
        MS.append(MSValue)

        # Finding the category
        category_text = listing.find_all('div', {'class': "td"})[2].text.strip()
        category_text = listing.find_all('div', {'class': "td"})[2].text
        category_text = cleanString(category_text).strip()
        category.append(category_text)

        # Finding the hrefs
@ -190,11 +201,12 @@ def zeroday_listing_parser(soup):
        href.append(description_link)

        # Finding the views
        views_text = listing.find_all('div', {'class': "td"})[3].text.strip()
        views_text = listing.find_all('div', {'class': "td"})[3].text.replace(' ', '').strip()
        views.append(views_text)

        # Finding the date added
        date = listing.find('div', {'class': "td"}).find('a').text.strip()
        # date = datetime.strptime(date, "%d-%m-%Y")
        addDate.append(date)

        # Finding the BTC and USD/GOLD