Crawler & Parser for CityMarket. utilities.py was modified to work with CityMarket.

1 year ago · 735f4cfd19
--- a/MarketPlaces/CityMarket/crawler_selenium.py
+++ b/MarketPlaces/CityMarket/crawler_selenium.py
@ -131,10 +131,10 @@ def login(driver):
    input("Press ENTER when CAPTCHA is complete and login page has loaded\n")

    # entering username and password into input boxes
    usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
    usernameBox = driver.find_element(by=By.XPATH, value='//input[@id="username"]')
    # Username here
    usernameBox.send_keys('findingmykeys')
    passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="inputPassword3"]')
    passwordBox = driver.find_element(by=By.XPATH, value='//input[@id="inputPassword3"]')
    # Password here
    passwordBox.send_keys('ican’tFindMycarKey$')

@ -185,16 +185,10 @@ def getNameFromURL(url):
 def getInterestedLinks():
    links = []

    # # Hiring hacker
    # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3')
    # virus and malware
    # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=15')
    # # Hire hacker
    links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3')
    # # ddos
    # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16')
    # # software
    # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=17')
    # # botnets
    # links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=18')
    links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16')
    # # hacking service
    links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=31')

@ -227,6 +221,7 @@ def crawlForum(driver):

                list = productPages(html)
                for item in list:
                    # what is this line doing?
                    itemURL = urlparse.urljoin(baseURL, str(item))
                    try:
                        driver.get(itemURL)
@ -236,15 +231,17 @@ def crawlForum(driver):
                    driver.back()

                    # comment out
                    break
                    # break

                # comment out
                """count += 1
                if count == 1:
                    break
                    break"""

                try:
                    link = driver.find_element(by=By.XPATH, value=
                        '/html/body/div[1]/div/div[2]/nav/ul/li[5]/a').get_attribute('href')
                    #link = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div/div[2]/nav/ul/li[5]/a').get_attribute('href')
                    link = driver.find_element(by=By.XPATH,
                                               value='//a[@rel="next"]').get_attribute('href')
                    if link == "":
                        raise NoSuchElementException
                    count += 1
--- a/MarketPlaces/CityMarket/parser.py
+++ b/MarketPlaces/CityMarket/parser.py
@ -38,42 +38,20 @@ def city_description_parser(soup):
    vendor_image = "-1"                 # 20 Vendor_Image

    divmd7 = soup.find('div', {'class': "col-md-7"})
    ptag = soup.findAll('p')

    # Finding Product Name
    # NA

    # Finding Vendor
    vendor = divmd7.find('a').text.strip()

    # Finding Vendor Rating
    # NA

    # Finding Successful Transactions
    success = soup.find('span', {'class': "badge-primary"})

    # Finding Prices
    USD = soup.find('span', {'class': "total"}).text.strip()

    BTC = soup.find('div', {'class': "text-center"}).text.strip()

    # Finding Escrow
    escrow = ptag[-1].text.strip()

    # Finding the Product Category
    category = ptag[-2].text.strip()

    # Finding the Product Quantity Available
    # NA

    # Finding Number Sold
    # NA
    tempBTC = soup.find('div', {'class': "text-center"}).text.strip()
    BTC = tempBTC.replace("BTC", "").strip()

    # Finding Shipment Information (Origin)
    # NA

    # Finding Shipment Information (Destination)
    # NA
    # Finding Product Image
    image = soup.find('img', {'class': 'img-fluid'})
    image = image.get('src')
    image = image.split('base64,')[-1]

    # Finding the Product description
    describe = soup.find('div', {'class': "text-white"}).text
@ -150,7 +128,7 @@ def city_listing_parser(soup):
    image_vendor = []                         # 21 Vendor_Image
    href = []                                 # 22 Product_Links

    listing = soup.findAll('div', {"class": "card"})
    listing = soup.findAll('div', {"class": "p-4"})

    # Populating the Number of Products
    nm = len(listing)
@ -163,7 +141,12 @@ def city_listing_parser(soup):
        link = cleanLink(link)
        href.append(link)

        # Finding the Product
        # Category
        tempCategory = soup.find('select', {"name": "category"})
        tempCategory = tempCategory.find('option', selected=True).text.strip()
        category.append(tempCategory)

        # Product Name
        product = a.find('h4', {"class": "text-center"}).text
        product = product.replace('\n', ' ')
        product = product.replace(",", "")
@ -171,9 +154,7 @@ def city_listing_parser(soup):
        product = product.strip()
        name.append(product)

        bae = a.find('div', {'class': "media-content"}).find('div').find_all('div')

        # Finding Prices
        # USD and BTC Price
        price = a.find('div', {"class": "price"}).text
        tempUSD = price.split("~")[0]
        tempUSD = tempUSD.replace("$", "")
@ -185,17 +166,11 @@ def city_listing_parser(soup):
        tempBTC = tempBTC.strip()
        BTC.append(tempBTC)

        # Finding the Vendor
        # NA

        # Finding the Category
        # NA

        # Finding Number Sold and Quantity Left
        # NA

        # Finding Successful Transactions
        # NA
        # Img
        product_image = a.find('img')
        product_image = product_image.get('src')
        product_image = product_image.split('base64,')[-1]
        image.append(product_image)

        # Searching for CVE and MS categories
        cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))
--- a/MarketPlaces/Utilities/utilities.py
+++ b/MarketPlaces/Utilities/utilities.py
@ -252,7 +252,8 @@ def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nom

        lne = marketplace                                                       # 0
        lne += ","
        lne += vendor[n]                                                        # 1
        # Added for CityMarket
        lne += "=1" if len(vendor) == 0 else vendor[n]                                                        # 1
        lne += ","
        lne += "-1" if len(rating_vendor) == 0 else rating_vendor[n]            # 2
        lne += ","