Browse Source

Crawler & Parser for CityMarket. utilities.py was modified to work with CityMarket.

main
andymunoz92 1 year ago
parent
commit
735f4cfd19
3 changed files with 33 additions and 60 deletions
  1. +12
    -15
      MarketPlaces/CityMarket/crawler_selenium.py
  2. +19
    -44
      MarketPlaces/CityMarket/parser.py
  3. +2
    -1
      MarketPlaces/Utilities/utilities.py

+ 12
- 15
MarketPlaces/CityMarket/crawler_selenium.py View File

@ -131,10 +131,10 @@ def login(driver):
input("Press ENTER when CAPTCHA is complete and login page has loaded\n")
# entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="username"]')
usernameBox = driver.find_element(by=By.XPATH, value='//input[@id="username"]')
# Username here
usernameBox.send_keys('findingmykeys')
passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="inputPassword3"]')
passwordBox = driver.find_element(by=By.XPATH, value='//input[@id="inputPassword3"]')
# Password here
passwordBox.send_keys('ican’tFindMycarKey$')
@ -185,16 +185,10 @@ def getNameFromURL(url):
def getInterestedLinks():
links = []
# # Hiring hacker
# links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3')
# virus and malware
# links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=15')
# # Hire hacker
links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=3')
# # ddos
# links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16')
# # software
# links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=17')
# # botnets
# links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=18')
links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=16')
# # hacking service
links.append('http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/?sub_id=31')
@ -227,6 +221,7 @@ def crawlForum(driver):
list = productPages(html)
for item in list:
# what is this line doing?
itemURL = urlparse.urljoin(baseURL, str(item))
try:
driver.get(itemURL)
@ -236,15 +231,17 @@ def crawlForum(driver):
driver.back()
# comment out
break
# break
# comment out
"""count += 1
if count == 1:
break
break"""
try:
link = driver.find_element(by=By.XPATH, value=
'/html/body/div[1]/div/div[2]/nav/ul/li[5]/a').get_attribute('href')
#link = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div/div[2]/nav/ul/li[5]/a').get_attribute('href')
link = driver.find_element(by=By.XPATH,
value='//a[@rel="next"]').get_attribute('href')
if link == "":
raise NoSuchElementException
count += 1


+ 19
- 44
MarketPlaces/CityMarket/parser.py View File

@ -38,42 +38,20 @@ def city_description_parser(soup):
vendor_image = "-1" # 20 Vendor_Image
divmd7 = soup.find('div', {'class': "col-md-7"})
ptag = soup.findAll('p')
# Finding Product Name
# NA
# Finding Vendor
vendor = divmd7.find('a').text.strip()
# Finding Vendor Rating
# NA
# Finding Successful Transactions
success = soup.find('span', {'class': "badge-primary"})
# Finding Prices
USD = soup.find('span', {'class': "total"}).text.strip()
BTC = soup.find('div', {'class': "text-center"}).text.strip()
# Finding Escrow
escrow = ptag[-1].text.strip()
# Finding the Product Category
category = ptag[-2].text.strip()
# Finding the Product Quantity Available
# NA
# Finding Number Sold
# NA
tempBTC = soup.find('div', {'class': "text-center"}).text.strip()
BTC = tempBTC.replace("BTC", "").strip()
# Finding Shipment Information (Origin)
# NA
# Finding Shipment Information (Destination)
# NA
# Finding Product Image
image = soup.find('img', {'class': 'img-fluid'})
image = image.get('src')
image = image.split('base64,')[-1]
# Finding the Product description
describe = soup.find('div', {'class': "text-white"}).text
@ -150,7 +128,7 @@ def city_listing_parser(soup):
image_vendor = [] # 21 Vendor_Image
href = [] # 22 Product_Links
listing = soup.findAll('div', {"class": "card"})
listing = soup.findAll('div', {"class": "p-4"})
# Populating the Number of Products
nm = len(listing)
@ -163,7 +141,12 @@ def city_listing_parser(soup):
link = cleanLink(link)
href.append(link)
# Finding the Product
# Category
tempCategory = soup.find('select', {"name": "category"})
tempCategory = tempCategory.find('option', selected=True).text.strip()
category.append(tempCategory)
# Product Name
product = a.find('h4', {"class": "text-center"}).text
product = product.replace('\n', ' ')
product = product.replace(",", "")
@ -171,9 +154,7 @@ def city_listing_parser(soup):
product = product.strip()
name.append(product)
bae = a.find('div', {'class': "media-content"}).find('div').find_all('div')
# Finding Prices
# USD and BTC Price
price = a.find('div', {"class": "price"}).text
tempUSD = price.split("~")[0]
tempUSD = tempUSD.replace("$", "")
@ -185,17 +166,11 @@ def city_listing_parser(soup):
tempBTC = tempBTC.strip()
BTC.append(tempBTC)
# Finding the Vendor
# NA
# Finding the Category
# NA
# Finding Number Sold and Quantity Left
# NA
# Finding Successful Transactions
# NA
# Img
product_image = a.find('img')
product_image = product_image.get('src')
product_image = product_image.split('base64,')[-1]
image.append(product_image)
# Searching for CVE and MS categories
cve = a.findAll(text=re.compile('CVE-\d{4}-\d{4}'))


+ 2
- 1
MarketPlaces/Utilities/utilities.py View File

@ -252,7 +252,8 @@ def organizeProducts(marketplace, nm, vendor, rating_vendor, success_vendor, nom
lne = marketplace # 0
lne += ","
lne += vendor[n] # 1
# Added for CityMarket
lne += "=1" if len(vendor) == 0 else vendor[n] # 1
lne += ","
lne += "-1" if len(rating_vendor) == 0 else rating_vendor[n] # 2
lne += ","


Loading…
Cancel
Save