Browse Source

Dark Market Crawler and Parser

main
Isabelle Wang 7 months ago
parent
commit
0bbaefb23f
4 changed files with 18 additions and 6 deletions
  1. +3
    -0
      MarketPlaces/Initialization/markets_mining.py
  2. +11
    -2
      MarketPlaces/Initialization/prepare_parser.py
  3. +1
    -1
      MarketPlaces/TheDarkMarket/crawler_selenium.py
  4. +3
    -3
      MarketPlaces/TheDarkMarket/parser.py

+ 3
- 0
MarketPlaces/Initialization/markets_mining.py View File

@ -25,6 +25,7 @@ from MarketPlaces.TheDarkMarket.crawler_selenium import crawler as crawlerTheDar
from MarketPlaces.GoFish.crawler_selenium import crawler as crawlerGoFish from MarketPlaces.GoFish.crawler_selenium import crawler as crawlerGoFish
from MarketPlaces.ZeroDay.crawler_selenium import crawler as crawlerZeroDay from MarketPlaces.ZeroDay.crawler_selenium import crawler as crawlerZeroDay
from MarketPlaces.Torzon.crawler_selenium import crawler as crawlerTorzon from MarketPlaces.Torzon.crawler_selenium import crawler as crawlerTorzon
from MarketPlaces.DarkMarket.crawler_selenium import crawler as crawlerDarkMarket
import configparser import configparser
import os import os
@ -138,5 +139,7 @@ if __name__ == '__main__':
crawlerZeroDay() crawlerZeroDay()
elif mkt == "Torzon": elif mkt == "Torzon":
crawlerTorzon() crawlerTorzon()
elif mkt == "DarkMarket":
crawlerDarkMarket()
print("\nScraping process completed!") print("\nScraping process completed!")

+ 11
- 2
MarketPlaces/Initialization/prepare_parser.py View File

@ -27,6 +27,7 @@ from MarketPlaces.WeTheNorth.parser import *
from MarketPlaces.Torzon.parser import * from MarketPlaces.Torzon.parser import *
from MarketPlaces.GoFish.parser import * from MarketPlaces.GoFish.parser import *
from MarketPlaces.ZeroDay.parser import * from MarketPlaces.ZeroDay.parser import *
from MarketPlaces.DarkMarket.parser import *
from MarketPlaces.Classifier.classify_product import predict from MarketPlaces.Classifier.classify_product import predict
from Translator.translate import translate from Translator.translate import translate
@ -158,7 +159,7 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
elif marketPlace == "CypherMarketplace": elif marketPlace == "CypherMarketplace":
rw = cyphermarketplace_listing_parser(soup) rw = cyphermarketplace_listing_parser(soup)
elif marketPlace == "TheDarkMarket": elif marketPlace == "TheDarkMarket":
rw = darkmarket_listing_parser(soup)
rw = thedarkmarket_listing_parser(soup)
elif marketPlace == "WeTheNorth": elif marketPlace == "WeTheNorth":
rw = wethenorth_listing_parser(soup) rw = wethenorth_listing_parser(soup)
elif marketPlace == "GoFish": elif marketPlace == "GoFish":
@ -167,6 +168,8 @@ def parse_listing(marketPlace, listingFile, soup, createLog, logFile):
rw = zeroday_listing_parser(soup) rw = zeroday_listing_parser(soup)
elif marketPlace == "Torzon": elif marketPlace == "Torzon":
rw = torzon_listing_parser(soup) rw = torzon_listing_parser(soup)
elif marketPlace == "DarkMarket":
rw = darkmarket_listing_parser(soup)
else: else:
print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!") print("MISSING CALL TO LISTING PARSER IN PREPARE_PARSER.PY!")
raise Exception raise Exception
@ -216,7 +219,7 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile):
elif marketPlace == "CypherMarketplace": elif marketPlace == "CypherMarketplace":
rmm = cyphermarketplace_description_parser(soup) rmm = cyphermarketplace_description_parser(soup)
elif marketPlace == "TheDarkMarket": elif marketPlace == "TheDarkMarket":
rmm = darkmarket_description_parser(soup)
rmm = thedarkmarket_description_parser(soup)
elif marketPlace == "WeTheNorth": elif marketPlace == "WeTheNorth":
rmm = wethenorth_description_parser(soup) rmm = wethenorth_description_parser(soup)
elif marketPlace == "GoFish": elif marketPlace == "GoFish":
@ -225,6 +228,8 @@ def parse_description(marketPlace, descriptionFile, soup, createLog, logFile):
rmm = zeroday_description_parser(soup) rmm = zeroday_description_parser(soup)
elif marketPlace == "Torzon": elif marketPlace == "Torzon":
rmm = torzon_description_parser(soup) rmm = torzon_description_parser(soup)
elif marketPlace == "DarkMarket":
rmm = darkmarket_description_parser(soup)
else: else:
print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!") print("MISSING CALL TO DESCRIPTION PARSER IN PREPARE_PARSER.PY!")
raise Exception raise Exception
@ -250,6 +255,10 @@ def get_source_language(marketPlace):
lang = 'english' lang = 'english'
elif marketPlace == "Incogsnoo": elif marketPlace == "Incogsnoo":
lang = 'english' lang = 'english'
elif marketPlace == "CityMarket":
lang = 'english'
elif marketPlace == "DarkMarket":
lang = 'english'
else: else:
print("MISSING CALL TO GET LANGUAGE IN PREPARE_PARSER.PY!") print("MISSING CALL TO GET LANGUAGE IN PREPARE_PARSER.PY!")
lang = 'auto' lang = 'auto'


+ 1
- 1
MarketPlaces/TheDarkMarket/crawler_selenium.py View File

@ -22,7 +22,7 @@ from datetime import date
import subprocess import subprocess
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from MarketPlaces.Initialization.prepare_parser import new_parse from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.TheDarkMarket.parser import darkmarket_links_parser
from MarketPlaces.TheDarkMarket.parser import thedarkmarket_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
counter = 1 counter = 1


+ 3
- 3
MarketPlaces/TheDarkMarket/parser.py View File

@ -8,7 +8,7 @@ from bs4 import BeautifulSoup, ResultSet, Tag
# This is the method to parse the Description Pages (one page to each Product in the Listing Pages) # This is the method to parse the Description Pages (one page to each Product in the Listing Pages)
def darkmarket_description_parser(soup: BeautifulSoup):
def thedarkmarket_description_parser(soup: BeautifulSoup):
# Fields to be parsed # Fields to be parsed
@ -75,7 +75,7 @@ def darkmarket_description_parser(soup: BeautifulSoup):
# This is the method to parse the Listing Pages # This is the method to parse the Listing Pages
def darkmarket_listing_parser(soup: BeautifulSoup):
def thedarkmarket_listing_parser(soup: BeautifulSoup):
# Fields to be parsed # Fields to be parsed
nm = 0 # *Total_Products (Should be Integer) nm = 0 # *Total_Products (Should be Integer)
@ -165,7 +165,7 @@ def darkmarket_listing_parser(soup: BeautifulSoup):
image_vendor) image_vendor)
def darkmarket_links_parser(soup: BeautifulSoup):
def thedarkmarket_links_parser(soup: BeautifulSoup):
# Returning all links that should be visited by the Crawler # Returning all links that should be visited by the Crawler
href = [] href = []


Loading…
Cancel
Save