Browse Source

Updates from virtual machine

main
Khoi 1 year ago
parent
commit
3784e8d533
43 changed files with 6095 additions and 375 deletions
  1. +1
    -0
      .idea/.gitignore
  2. +1
    -0
      .idea/.name
  3. +586
    -0
      Forums/AbyssForum/HTML_Pages/06262023/Description/viewtopicphpt1402.html
  4. +1012
    -0
      Forums/AbyssForum/HTML_Pages/06262023/Description/viewtopicphpt849.html
  5. +1468
    -0
      Forums/AbyssForum/HTML_Pages/06262023/Listing/httpqyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjidonionviewforumphpf26sid6f7add746810784861a7ec31703a3757.html
  6. +1462
    -0
      Forums/AbyssForum/HTML_Pages/06262023/Listing/httpqyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjidonionviewforumphpf26start25.html
  7. +16
    -16
      Forums/AbyssForum/crawler_selenium.py
  8. +31
    -127
      Forums/AbyssForum/parser.py
  9. +19
    -20
      Forums/Altenens/crawler_selenium.py
  10. +28
    -0
      Forums/CryptBB/HTML_Pages/06262023/Description/Read/showthreadphptid16584.html
  11. +28
    -0
      Forums/CryptBB/HTML_Pages/06262023/Description/Read/showthreadphptid2628.html
  12. +8
    -0
      Forums/CryptBB/HTML_Pages/06262023/Listing/httpcryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebidonionforumdisplayphpfid86.html
  13. +8
    -0
      Forums/CryptBB/HTML_Pages/06262023/Listing/httpcryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebidonionforumdisplayphpfid86page2.html
  14. +22
    -19
      Forums/CryptBB/crawler_selenium.py
  15. +2
    -2
      Forums/DB_Connection/db_connection.py
  16. +9
    -9
      Forums/HiddenAnswers/crawler_selenium.py
  17. +1
    -0
      Forums/Initialization/forumsList.txt
  18. +8
    -1
      Forums/Initialization/forums_mining.py
  19. +53
    -3
      Forums/Initialization/geckodriver.log
  20. +55
    -21
      Forums/Initialization/prepare_parser.py
  21. +15
    -1
      Forums/OnniForums/HTML_Pages/06272023/Listing/httponnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qdonionForumHackingCrackingtutorials.html
  22. +14
    -16
      Forums/OnniForums/crawler_selenium.py
  23. +9
    -5
      MarketPlaces/AnonymousMarketplace/crawler_selenium.py
  24. +9
    -5
      MarketPlaces/Apocalypse/crawler_selenium.py
  25. +9
    -5
      MarketPlaces/CityMarket/crawler_selenium.py
  26. +10
    -5
      MarketPlaces/CypherMarketplace/crawler_selenium.py
  27. +2
    -2
      MarketPlaces/DB_Connection/db_connection.py
  28. +14
    -14
      MarketPlaces/DarkFox/crawler_selenium.py
  29. +22
    -17
      MarketPlaces/DarkMatter/crawler_selenium.py
  30. +2
    -2
      MarketPlaces/DarkMatter/parser.py
  31. +9
    -5
      MarketPlaces/DarkTor/crawler_selenium.py
  32. +8
    -5
      MarketPlaces/DigitalThriftShop/crawler_selenium.py
  33. +1066
    -17
      MarketPlaces/Initialization/geckodriver.log
  34. +5
    -3
      MarketPlaces/Initialization/markets_mining.py
  35. +5
    -3
      MarketPlaces/Initialization/prepare_parser.py
  36. +9
    -5
      MarketPlaces/LionMarketplace/crawler_selenium.py
  37. +9
    -5
      MarketPlaces/M00nkeyMarket/crawler_selenium.py
  38. +9
    -5
      MarketPlaces/MikesGrandStore/crawler_selenium.py
  39. +21
    -17
      MarketPlaces/ThiefWorld/crawler_selenium.py
  40. +9
    -5
      MarketPlaces/Tor2door/crawler_selenium.py
  41. +9
    -8
      MarketPlaces/TorBay/crawler_selenium.py
  42. +9
    -5
      MarketPlaces/TorMarket/crawler_selenium.py
  43. +3
    -2
      setup.ini

+ 1
- 0
.idea/.gitignore View File

@ -2,6 +2,7 @@
/shelf/ /shelf/
/workspace.xml /workspace.xml
/selenium/geckodriver.exe /selenium/geckodriver.exe
setup.ini
*.html *.html
*.log *.log
*.png *.png

+ 1
- 0
.idea/.name View File

@ -0,0 +1 @@
forums_mining.py

+ 586
- 0
Forums/AbyssForum/HTML_Pages/06262023/Description/viewtopicphpt1402.html View File

@ -0,0 +1,586 @@
<html dir="ltr" lang="en-gb"><head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- Chrome, Firefox OS and Opera -->
<meta name="theme-color" content="#094c8a">
<!-- Windows Phone -->
<meta name="msapplication-navbutton-color" content="#094c8a">
<!-- iOS Safari -->
<meta name="apple-mobile-web-app-status-bar-style" content="#094c8a">
<title>Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting. - Abyss Forum| forum, drugs,guns,porn,hacking,crypto</title>
<link rel="alternate" type="application/atom+xml" title="Feed - Abyss Forum| forum, drugs,guns,porn,hacking,crypto" href="/app.php/feed"> <link rel="alternate" type="application/atom+xml" title="Feed - New Topics" href="/app.php/feed/topics"> <link rel="alternate" type="application/atom+xml" title="Feed - Forum - Hacked Database" href="/app.php/feed/forum/26"> <link rel="alternate" type="application/atom+xml" title="Feed - Topic - Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting." href="/app.php/feed/topic/1402">
<link rel="canonical" href="http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewtopic.php?t=1402">
<!--
phpBB style name: Supernova
Based on style: prosilver (this is the default phpBB3 style)
Original author: Tom Beddard ( http://www.subBlue.com/ )
Modified by: Christian 2.0
Updated by: MannixMD @MannixMD
-->
<link href="./assets/css/font-awesome.min.css?assets_version=22" rel="stylesheet">
<link href="./styles/prosilver/theme/stylesheet.css?assets_version=22" rel="stylesheet">
<link href="./styles/Supernova/theme/supernova.css?assets_version=22" rel="stylesheet">
<link href="./styles/Supernova/theme/en/stylesheet.css?assets_version=22" rel="stylesheet">
<!--[if lte IE 9]>
<link href="./styles/Supernova/theme/tweaks.css?assets_version=22" rel="stylesheet">
<![endif]-->
<meta name="description" content="Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting WinPot Malware WinPot malware ATMii Malware Cutler Malware Buy ATM jackpotting virus">
</script></script>
{"@context":"https://schema.org","@type":"DiscussionForumPosting","url":"http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewtopic.php?t=1402","headline":"Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting.","description":"Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting WinPot Malware WinPot malware ATMii Malware Cutler Malware Buy ATM jackpotting virus","author":{"@type":"Person","name":"darkclonee"},"datePublished":"2023-04-29T22:33:53+00:00","articleSection":"Hacked Database","publisher":{"@type":"Organization","name":"Abyss Forum| forum, drugs,guns,porn,hacking,crypto","url":"http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion"}}
</script>
<link href="./ext/phpbb/pages/styles/prosilver/theme/pages_common.css?assets_version=22" rel="stylesheet" media="screen">
<link rel="stylesheet" href="./ext/danieltj/verifiedprofiles/styles/all/theme/verifiedprofiles.css">
</head>
<body id="phpbb" class="notouch section-viewtopic ltr hasjs">
<div id="page-header">
<div class="headerbar" role="banner">
<div class="inner">
<div id="site-description" class="site-description">
<a id="logo" class="logo" href="http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion" title="Home">
<span class="site_logo"></span>
</a>
<p class="skiplink"><a href="#start_here">Skip to content</a></p>
</div>
<div id="search-box" class="search-box search-header" role="search">
<form action="./search.php" method="get" id="search">
<fieldset>
<input name="keywords" id="keywords" type="search" maxlength="128" title="Search for keywords" class="inputbox search tiny" size="20" value="" placeholder="Search…">
<button class="button button-search" type="submit" title="Search">
<i class="icon fa-search fa-fw" aria-hidden="true"></i><span class="sr-only">Search</span>
</button>
<a href="./search.php" class="button button-search-end" title="Advanced search">
<i class="icon fa-cog fa-fw" aria-hidden="true"></i><span class="sr-only">Advanced search</span>
</a>
</fieldset>
</form>
</div>
</div>
</div>
<div class="navigation-top" role="navigation">
<ul id="nav-main" class="nav-main linklist" role="menubar">
<li id="quick-links" class="quick-links dropdown-container responsive-menu" data-skip-responsive="true">
<a href="#" class="dropdown-trigger dropdown-toggle">
<i class="icon fa-bars fa-fw" aria-hidden="true"></i><span>Quick links</span>
</a>
<div class="dropdown">
<div class="pointer"><div class="pointer-inner"></div></div>
<ul class="dropdown-contents" role="menu">
<li class="small-icon icon-pages icon-page-abyss-directory">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="separator"></li>
<li>
<a href="./search.php?search_id=unanswered" role="menuitem">
<i class="icon fa-file-o fa-fw icon-gray" aria-hidden="true"></i><span>Unanswered topics</span>
</a>
</li>
<li>
<a href="./search.php?search_id=active_topics" role="menuitem">
<i class="icon fa-file-o fa-fw icon-blue" aria-hidden="true"></i><span>Active topics</span>
</a>
</li>
<li class="separator"></li>
<li>
<a href="./search.php" role="menuitem">
<i class="icon fa-search fa-fw" aria-hidden="true"></i><span>Search</span>
</a>
</li>
<li class="separator"></li>
<li class="small-icon icon-pages icon-page-abyss-directory">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
</ul>
</div>
</li>
<li class="small-icon icon-pages icon-page-about no-bulletin">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-abyss-directory no-bulletin">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li data-skip-responsive="true">
<a href="/app.php/help/faq" rel="help" title="Frequently Asked Questions" role="menuitem">
<i class="icon fa-question-circle fa-fw" aria-hidden="true"></i><span>FAQ</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-abyss-directory no-bulletin">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about no-bulletin">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="rightside" data-skip-responsive="true">
<a href="./ucp.php?mode=login&amp;redirect=viewtopic.php%3Ft%3D1402" title="Login" accesskey="x" role="menuitem">
<i class="icon fa-power-off fa-fw" aria-hidden="true"></i><span>Login</span>
</a>
</li>
<li class="rightside" data-skip-responsive="true">
<a href="./ucp.php?mode=register" role="menuitem">
<i class="icon fa-pencil-square-o fa-fw" aria-hidden="true"></i><span>Register</span>
</a>
</li>
</ul>
</div> </div>
<div id="wrap" class="wrap">
<a id="top" class="top-anchor" accesskey="t"></a>
<div class="navbar navbar-wrap" role="navigation">
<div class="inner">
<ul id="nav-breadcrumbs" class="nav-breadcrumbs linklist navlinks" role="menubar">
<li data-skip-responsive="" class="small-icon icon-pages icon-page-abyss-directory breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList">
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="/app.php/abyss-directory" title="Welcome to abyss directory link collection"><span itemprop="name">Abyss Directory</span></a><meta itemprop="position" content="0"></span>
</li>
<li data-skip-responsive="" class="small-icon icon-pages icon-page-about breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList">
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="/app.php/about" title="about Abyss Forum"><span itemprop="name">about</span></a><meta itemprop="position" content="0"></span>
</li>
<li class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList">
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion" data-navbar-reference="home" title="Home"><i class="icon fa-home fa-fw" aria-hidden="true"></i><span itemprop="name">Home</span></a><meta itemprop="position" content="1"></span>
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="./index.php" accesskey="h" data-navbar-reference="index" title="Board"><span itemprop="name">Board</span></a><meta itemprop="position" content="2"></span>
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope="" data-forum-id="3"><a itemprop="item" href="./viewforum.php?f=3" title="Hacking"><span itemprop="name">Hacking</span></a><meta itemprop="position" content="3"></span>
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope="" data-forum-id="26"><a itemprop="item" href="./viewforum.php?f=26" title="Hacked Database"><span itemprop="name">Hacked Database</span></a><meta itemprop="position" content="4"></span>
</li>
<li data-skip-responsive="" class="small-icon icon-pages icon-page-abyss-directory breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList">
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="/app.php/abyss-directory" title="Welcome to abyss directory link collection"><span itemprop="name">Abyss Directory</span></a><meta itemprop="position" content="5"></span>
</li>
<li data-skip-responsive="" class="small-icon icon-pages icon-page-about breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList">
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="/app.php/about" title="about Abyss Forum"><span itemprop="name">about</span></a><meta itemprop="position" content="6"></span>
</li>
<li class="rightside responsive-search">
<a href="./search.php" title="View the advanced search options" role="menuitem">
<i class="icon fa-search fa-fw" aria-hidden="true"></i><span class="sr-only">Search</span>
</a>
</li>
</ul>
</div>
</div>
<a id="start_here" class="anchor"></a>
<div id="page-body" class="page-body" role="main">
<h2 class="topic-title"><a href="./viewtopic.php?t=1402">Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting.</a></h2>
<!-- NOTE: remove the style="display: none" when you want to have the forum description on the topic body -->
<div style="display: none !important;">Buy and sell hacked database's, accounts, logs etc<br></div>
<div class="action-bar bar-top">
<a href="./posting.php?mode=reply&amp;t=1402" class="button sn-btn" title="Post a reply">
<span>Post Reply</span> <i class="icon fa-reply fa-fw" aria-hidden="true"></i>
</a>
<div class="dropdown-container dropdown-button-control topic-tools">
<span title="Topic tools" class="button button-secondary dropdown-trigger dropdown-select sn-btn dropdown-toggle">
<i class="icon fa-wrench fa-fw" aria-hidden="true"></i>
<span class="caret"><i class="icon fa-sort-down fa-fw" aria-hidden="true"></i></span>
</span>
<div class="dropdown">
<div class="pointer"><div class="pointer-inner"></div></div>
<ul class="dropdown-contents">
<li>
<a href="./viewtopic.php?t=1402&amp;view=print" title="Print view" accesskey="p">
<i class="icon fa-print fa-fw" aria-hidden="true"></i><span>Print view</span>
</a>
</li>
</ul>
</div>
</div>
<div class="search-box" role="search">
<form method="get" id="topic-search" action="./search.php">
<fieldset>
<input class="inputbox search tiny" type="search" name="keywords" id="search_keywords" size="20" placeholder="Search this topic…">
<button class="button button-search" type="submit" title="Search">
<i class="icon fa-search fa-fw" aria-hidden="true"></i><span class="sr-only">Search</span>
</button>
<a href="./search.php" class="button button-search-end" title="Advanced search">
<i class="icon fa-cog fa-fw" aria-hidden="true"></i><span class="sr-only">Advanced search</span>
</a>
<input type="hidden" name="t" value="1402">
<input type="hidden" name="sf" value="msgonly">
</fieldset>
</form>
</div>
<div class="pagination">
1 post
• Page <strong>1</strong> of <strong>1</strong>
</div>
</div>
<div id="p1586" class="post has-profile bg2">
<div class="inner">
<dl class="postprofile" id="profile1586">
<dt class="no-profile-rank no-avatar">
<div class="avatar-container">
</div>
<a href="./memberlist.php?mode=viewprofile&amp;u=789" class="username">darkclonee</a> </dt>
<dd class="profile-posts"><strong>Posts:</strong> <a href="./search.php?author_id=789&amp;sr=posts">13</a></dd> <dd class="profile-joined"><strong>Joined:</strong> Sat Apr 29, 2023 10:33 am</dd>
</dl>
<div class="postbody">
<div id="post_content1586">
<h3 class="first">
<a href="./viewtopic.php?p=1586#p1586">Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting.</a>
</h3>
<ul class="post-buttons">
<li>
<a href="./posting.php?mode=quote&amp;p=1586" title="Reply with quote" class="button button-icon-only btn-blue">
<i class="icon fa-quote-left fa-fw" aria-hidden="true"></i><span class="sr-only">Quote</span>
</a>
</li>
<li class="responsive-menu hidden dropdown-container"><a href="javascript:void(0);" class="js-responsive-menu-link responsive-menu-link dropdown-toggle"><i class="icon fa-bars fa-fw" aria-hidden="true"></i></a><div class="dropdown"><div class="pointer"><div class="pointer-inner"></div></div><ul class="dropdown-contents"></ul></div></li></ul>
<p class="author">
<a class="unread" href="./viewtopic.php?p=1586#p1586" title="Post">
<i class="icon fa-file fa-fw icon-lightgray icon-md" aria-hidden="true"></i><span class="sr-only">Post</span>
</a>
<span class="responsive-hide">by <strong><a href="./memberlist.php?mode=viewprofile&amp;u=789" class="username">darkclonee</a></strong> » </span><time datetime="2023-04-29T22:33:53+00:00">Sat Apr 29, 2023 10:33 pm</time>
</p>
<div class="content">Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting <br>
<br>
WinPot Malware<br>
WinPot malware<br>
ATMii Malware<br>
Cutler Malware<br>
<br>
Buy ATM jackpotting virus. WinPot requires almost no advanced knowledge or professional computer skills. The fastest way to make money.<br>
This software will help you bring down an ATM machine and you would be able to get the cash.<br>
Contact if you are interested very affordable.<br>
<br>
CONTACT<br>
General support wickr me...jmorgan420<br>
Email..... (<a href="mailto:[email protected]">[email protected]</a>)<br>
Telegram.....Bradley_bil7<br>
WhatsApp....+13239891049</div>
</div>
</div>
<div class="back2top">
<a href="#top" class="top" title="Top">
<i class="icon fa-chevron-circle-up fa-fw icon-gray" aria-hidden="true"></i>
<span class="sr-only">Top</span>
</a>
</div>
</div>
</div>
<div class="action-bar bar-bottom">
<a href="./posting.php?mode=reply&amp;t=1402" class="button sn-btn" title="Post a reply">
<span>Post Reply</span> <i class="icon fa-reply fa-fw" aria-hidden="true"></i>
</a>
<div class="dropdown-container dropdown-button-control topic-tools">
<span title="Topic tools" class="button button-secondary dropdown-trigger dropdown-select sn-btn dropdown-toggle">
<i class="icon fa-wrench fa-fw" aria-hidden="true"></i>
<span class="caret"><i class="icon fa-sort-down fa-fw" aria-hidden="true"></i></span>
</span>
<div class="dropdown">
<div class="pointer"><div class="pointer-inner"></div></div>
<ul class="dropdown-contents">
<li>
<a href="./viewtopic.php?t=1402&amp;view=print" title="Print view" accesskey="p">
<i class="icon fa-print fa-fw" aria-hidden="true"></i><span>Print view</span>
</a>
</li>
</ul>
</div>
</div>
<div class="pagination">
1 post
• Page <strong>1</strong> of <strong>1</strong>
</div>
</div>
<div class="action-bar actions-jump">
<p class="jumpbox-return">
<a href="./viewforum.php?f=26" class="left-box arrow-left" accesskey="r">
<i class="icon fa-angle-left fa-fw icon-black" aria-hidden="true"></i><span>Return to “Hacked Database”</span>
</a>
</p>
<div class="jumpbox dropdown-container dropdown-container-right dropdown-up dropdown-left dropdown-button-control" id="jumpbox">
<span title="Jump to" class="button button-secondary dropdown-trigger dropdown-select sn-btn dropdown-toggle">
<span>Jump to</span>
<span class="caret"><i class="icon fa-sort-down fa-fw" aria-hidden="true"></i></span>
</span>
<div class="dropdown">
<div class="pointer"><div class="pointer-inner"></div></div>
<ul class="dropdown-contents">
<li><a href="./viewforum.php?f=1" class="jumpbox-cat-link"> <span> General</span></a></li>
<li><a href="./viewforum.php?f=2" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Welcome to Abyss Forum</span></a></li>
<li><a href="./viewforum.php?f=3" class="jumpbox-cat-link"> <span> Hacking</span></a></li>
<li><a href="./viewforum.php?f=26" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Hacked Database</span></a></li>
<li><a href="./viewforum.php?f=27" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Hacking Ondemand (Hire a Hacker)</span></a></li>
<li><a href="./viewforum.php?f=28" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Hacking Tools(Malware, ransomware,Exploit)</span></a></li>
<li><a href="./viewforum.php?f=29" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Anonymity and Security</span></a></li>
<li><a href="./viewforum.php?f=30" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Carding Forum</span></a></li>
<li><a href="./viewforum.php?f=31" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Sell CC &amp; CVV</span></a></li>
<li><a href="./viewforum.php?f=32" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Social Media Hacking</span></a></li>
<li><a href="./viewforum.php?f=4" class="jumpbox-cat-link"> <span> Ponography</span></a></li>
<li><a href="./viewforum.php?f=14" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Soft Porn</span></a></li>
<li><a href="./viewforum.php?f=15" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Hard Porn</span></a></li>
<li><a href="./viewforum.php?f=5" class="jumpbox-cat-link"> <span> MarketPlaces</span></a></li>
<li><a href="./viewforum.php?f=16" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Drug Stores</span></a></li>
<li><a href="./viewforum.php?f=17" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Gun Shops</span></a></li>
<li><a href="./viewforum.php?f=18" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Documents</span></a></li>
<li><a href="./viewforum.php?f=19" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Banknotes (Fake Bills)</span></a></li>
<li><a href="./viewforum.php?f=20" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Transfers</span></a></li>
<li><a href="./viewforum.php?f=6" class="jumpbox-cat-link"> <span> Money Laundering and crypto currency</span></a></li>
<li><a href="./viewforum.php?f=23" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Cryptocurrency</span></a></li>
<li><a href="./viewforum.php?f=24" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Money Laundering</span></a></li>
<li><a href="./viewforum.php?f=25" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Crypto Mixers</span></a></li>
<li><a href="./viewforum.php?f=37" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Crypto Wallets</span></a></li>
<li><a href="./viewforum.php?f=7" class="jumpbox-forum-link"> <span> Extremist Groups</span></a></li>
<li><a href="./viewforum.php?f=8" class="jumpbox-cat-link"> <span> Onion Links</span></a></li>
<li><a href="./viewforum.php?f=22" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Onion Links</span></a></li>
<li><a href="./viewforum.php?f=10" class="jumpbox-cat-link"> <span> Tutorials</span></a></li>
<li><a href="./viewforum.php?f=11" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; General Tutorials</span></a></li>
<li><a href="./viewforum.php?f=12" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Hacking Tutorials</span></a></li>
<li><a href="./viewforum.php?f=13" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Cracking Tutorials</span></a></li>
<li><a href="./viewforum.php?f=21" class="jumpbox-forum-link"> <span> Politics</span></a></li>
<li><a href="./viewforum.php?f=34" class="jumpbox-forum-link"> <span> Research Chemicals</span></a></li>
<li><a href="./viewforum.php?f=35" class="jumpbox-forum-link"> <span> Recomended</span></a></li>
<li><a href="./viewforum.php?f=36" class="jumpbox-forum-link"> <span> Scamming Bastards</span></a></li>
<li><a href="./viewforum.php?f=38" class="jumpbox-cat-link"> <span> Organ Donation</span></a></li>
<li><a href="./viewforum.php?f=40" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Organ Market</span></a></li>
<li><a href="./viewforum.php?f=39" class="jumpbox-cat-link"> <span> Buy Girls 8-18</span></a></li>
<li><a href="./viewforum.php?f=41" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Human Trafficking</span></a></li>
</ul>
</div>
</div>
</div>
</div>
<div id="page-footer" class="page-footer" role="contentinfo">
<div class="navbar" role="navigation">
<div class="inner">
<ul id="nav-footer" class="nav-footer linklist compact" role="menubar">
<li class="breadcrumbs">
<span class="crumb"><a href="http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion" data-navbar-reference="home" title="Home"><i class="icon fa-home fa-fw" aria-hidden="true"></i><span>Home</span></a></span> <span class="crumb"><a href="./index.php" data-navbar-reference="index" title="Board"><span>Board</span></a></span> </li>
<li class="responsive-menu hidden rightside dropdown-container"><a href="javascript:void(0);" class="js-responsive-menu-link responsive-menu-link dropdown-toggle"><i class="icon fa-bars fa-fw" aria-hidden="true"></i></a><div class="dropdown"><div class="pointer"><div class="pointer-inner"></div></div><ul class="dropdown-contents"></ul></div></li><li class="small-icon icon-pages icon-page- rightside">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="small-icon icon-pages icon-page- rightside">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="rightside">All times are <span title="UTC">UTC</span></li>
<li class="small-icon icon-pages icon-page-abyss-directory rightside">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about rightside">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="rightside">
<a href="./ucp.php?mode=delete_cookies" data-ajax="true" data-refresh="true" role="menuitem">
<i class="icon fa-trash fa-fw" aria-hidden="true"></i><span>Delete cookies</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-abyss-directory rightside">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about rightside">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-abyss-directory rightside">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about rightside">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="rightside" data-last-responsive="true">
<a href="./memberlist.php?mode=contactadmin" role="menuitem">
<i class="icon fa-envelope fa-fw" aria-hidden="true"></i><span>Contact us</span>
</a>
</li>
</ul>
</div>
</div>
<div class="copyright">
<strong><span>*</span>Donate Bitcoin<a href="https://mempool.space/address/bc1qsg5q4s4zq937wglfkgfjrp4ek2y5ywm69lkzr5"><!-- The text field -->
<input type="text" value="(BTC)" id="myInput">
<!-- The button used to copy the text -->
<button onclick="myFunction()">bc1qsg5q4s4zq937wglfkgfjrp4ek2y5ywm69lkzr5</button></a></strong>
<br><strong><span>*</span>EMAIL US <a href="mailto:[email protected]">[email protected]</a></strong>
<br><strong><span>*</span>Style version: 1.1.5</strong>
<p class="footer-row">
<span class="footer-copyright">Powered by <a href="https://www.phpbb.com/">phpBB</a>® Forum Software © phpBB Limited</span>
</p>
<p class="footer-row">
<span>phpBB Two Factor Authentication © <a href="https://www.phpbbextensions.io/" target="_blank">paul999</a></span>
</p>
<p class="footer-row" role="menu">
<a class="footer-link" href="./ucp.php?mode=privacy" title="Privacy" role="menuitem">
<span class="footer-link-text">Privacy</span>
</a>
|
<a class="footer-link" href="./ucp.php?mode=terms" title="Terms" role="menuitem">
<span class="footer-link-text">Terms</span>
</a>
</p>
</div>
<div id="darkenwrapper" class="darkenwrapper" data-ajax-error-title="AJAX error" data-ajax-error-text="Something went wrong when processing your request." data-ajax-error-text-abort="User aborted request." data-ajax-error-text-timeout="Your request timed out; please try again." data-ajax-error-text-parsererror="Something went wrong with the request and the server returned an invalid reply.">
<div id="darken" class="darken">&nbsp;</div>
</div>
<div id="phpbb_alert" class="phpbb_alert" data-l-err="Error" data-l-timeout-processing-req="Request timed out.">
<a href="#" class="alert_close">
<i class="icon fa-times-circle fa-fw" aria-hidden="true"></i>
</a>
<h3 class="alert_title">&nbsp;</h3><p class="alert_text"></p>
</div>
<div id="phpbb_confirm" class="phpbb_alert">
<a href="#" class="alert_close">
<i class="icon fa-times-circle fa-fw" aria-hidden="true"></i>
</a>
<div class="alert_text"></div>
</div>
</div>
</div>
<div>
<a id="bottom" class="anchor" accesskey="z"></a>
</div>
</script>
</script>
var vglnk = {
key: 'e4fd14f5d7f2bb6d80b8f8da1354718c',
sub_id: '278e906f85e6a29e5cf5e6781f9210b0'
};
(function(d, t) {
var s = d.createElement(t); s.type = 'text/javascript'; s.async = true;
s.src = '//cdn.viglink.com/api/vglnk.js';
var r = d.getElementsByTagName(t)[0]; r.parentNode.insertBefore(s, r);
}(document, 'script'));
</script>
</script>
</script>
</body></html>

+ 1012
- 0
Forums/AbyssForum/HTML_Pages/06262023/Description/viewtopicphpt849.html
File diff suppressed because it is too large
View File


+ 1468
- 0
Forums/AbyssForum/HTML_Pages/06262023/Listing/httpqyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjidonionviewforumphpf26sid6f7add746810784861a7ec31703a3757.html
File diff suppressed because it is too large
View File


+ 1462
- 0
Forums/AbyssForum/HTML_Pages/06262023/Listing/httpqyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjidonionviewforumphpf26start25.html
File diff suppressed because it is too large
View File


+ 16
- 16
Forums/AbyssForum/crawler_selenium.py View File

@ -24,8 +24,6 @@ from Forums.Initialization.prepare_parser import new_parse
from Forums.AbyssForum.parser import abyssForum_links_parser from Forums.AbyssForum.parser import abyssForum_links_parser
from Forums.Utilities.utilities import cleanHTML from Forums.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/' baseURL = 'http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/'
@ -49,6 +47,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
def opentor(): def opentor():
from Forums.Initialization.forums_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -91,6 +91,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -143,16 +145,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url): def getFullPathName(url):
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
#..\CryptBB\HTML_Pages\\
fullPath = r'..\AbyssForum\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\AbyssForum\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath
@ -170,19 +170,19 @@ def getInterestedLinks():
links = [] links = []
# Hacked Database # Hacked Database
links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=26&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=26')
# Hire a Hacker # Hire a Hacker
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=27&sid=6f7add746810784861a7ec31703a3757')
links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=27')
# # Hacking Tools # # Hacking Tools
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=28&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=28')
# # Carding Forums # # Carding Forums
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=30&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=30')
# # Social Media Hacking # # Social Media Hacking
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=32&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=32')
# # Hacking Tutorials # # Hacking Tutorials
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=12&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=12')
# # Cracking Tutorials # # Cracking Tutorials
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=13&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=13')
return links return links


+ 31
- 127
Forums/AbyssForum/parser.py View File

@ -202,139 +202,43 @@ def cryptBB_description_parser(soup):
return row return row
# This is the method to parse the Listing Pages (one page with many posts) # This is the method to parse the Listing Pages (one page with many posts)
def onniForums_listing_parser(soup):
board = "-1" # board name (the previous level of the topic in the Forum categorization tree.
# For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
nm = 0 # this variable should receive the number of topics
topic = [] # all topics
user = [] # all users of each topic
post = [] # number of posts of each topic
view = [] # number of views of each topic
addDate = [] # when the topic was created (difficult to find)
href = [] # this variable should receive all cleaned urls (we will use this to do the marge between
# Listing and Description pages)
# Finding the board (should be just one)
board = soup.find('span', {"class": "active"}).text
def AbyssForums_listing_parser(soup: BeautifulSoup):
board = "-1" # board name (the previous level of the topic in the Forum categorization tree.
# For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
nm = 0 # this variable should receive the number of topics
topic = [] # all topics
user = [] # all users of each topic
post = [] # number of posts of each topic
view = [] # number of views of each topic
addDate = [] # when the topic was created (difficult to find)
href = [] # this variable should receive all cleaned urls (we will use this to do the marge between
# Listing and Description pages)
#finding the board
board = soup.find("title").text
board = cleanString(board.strip()) board = cleanString(board.strip())
# Finding the repeated tag that corresponds to the listing of topics
type_of_posts = soup.find_all("li", {"class": re.compile("row bg\d")} )
for literature in type_of_posts:
title_of_post = literature.find("a", {"class": "topictitle"}).text
topic.append(title_of_post)
author = literature.find("div", {"class": "topic-poster responsive-hide left-box"}).find("a", {"class": "username"}).text
user.append(author)
num_post = literature.find("dd", {"class": "posts"}).text[1:-3]
post.append(num_post)
num_view = literature.find("dd", {"class": "views"}).text[1:-3]
view.append(num_view)
if int(num_post) != 0:
reply = literature.find("dd", {"class": "lastpost"}).find("a", {"class": "username"}).text
user.append(reply)
date_added = literature.find("time").text
addDate.append(date_added)
nm = len(topic)
itopics = soup.find_all('tr',{"class": "inline_row"})
index = 0
for itopic in itopics:
# For each topic found, the structure to get the rest of the information can be of two types. Testing all of them
# to don't miss any topic
# Adding the topic to the topic list
try:
topics = itopic.find('span', {"class": "subject_old"}).find('a').text
except:
topics = itopic.find('span', {"class": "subject_new"}).find('a').text
topics = re.sub("\[\w*\]", '', topics)
topic.append(cleanString(topics))
# Counting how many topics we have found so far
nm = len(topic)
# Adding the url to the list of urls
try:
link = itopic.find('span', {"class": "subject_old"}).find('a').get('href')
except:
link = itopic.find('span',{"class": "subject_new"}).find('a').get('href')
link = cleanLink(link)
href.append(link)
# Finding the author of the topic
ps = itopic.find('div', {"class":"author smalltext"}).find('a').text
author = ps.strip()
user.append(cleanString(author))
# Finding the number of replies
columns = itopic.findChildren('td',recursive=False)
posts = columns[3].text
post.append(cleanString(posts))
# Finding the number of Views
tview = columns[4].text
view.append(cleanString(tview))
# If no information about when the topic was added, just assign "-1" to the variable
#dt = itopic.find('div', {"class": "responsive-hide"}).text.split('»')[1]
#dt = dt.strip()
#date_time_obj = datetime.strptime(dt,'%a %b %d, %Y %I:%M %p')
#addDate.append(date_time_obj)
addDate.append("-1")
index += 1
return organizeTopics("CryptBB", nm, topic, board, view, post, user, addDate, href)
# if len(tag) > 0:
#
# # Finding the topic
#
# tds = tds[0].find(tag[0])
# topics = tds.text
# topics = topics.replace(u"\xbb","")
# topics = topics.strip()
# topic.append(cleanString(topics))
#
# # Counting how many topics we have found so far
#
# nm = len(topic)
#
# # Adding the url to the list of urls
#
# link = tds.findAll('a', href=True)
# link = link[0].get('href')
# link = cleanLink(link)
# href.append(link)
#
# # Finding the author of the topic
#
# ps = itopic.find('td', {"class": tag[1]}).find('p').find('a')
# if ps == None:
# ps = itopic.find('td', {"class": tag[1]}).find('p')
# ps = ps.text.replace("Started by ","")
# else:
# ps = ps.text
# author = ps.strip()
# user.append(cleanString(author))
#
# # Finding the number of replies
#
# statistics = itopic.find('td', {"class": tag[2]})
# statistics = statistics.text
# statistics = statistics.split("Replies")
# posts = statistics[0].strip()
# post.append(cleanString(posts))
#
# # Finding the number of Views
#
# views = statistics[1]
# views = views.replace("Views","")
# views = views.strip()
# view.append(cleanString(views))
#
# # As no information about when the topic was added, just assign "-1" to the variable
#
# addDate.append("-1")
#return organizeTopics("TheMajesticGarden", nm, topic, board, view, post, user, addDate, href)
#need to change this method
def abyssForum_links_parser(soup): def abyssForum_links_parser(soup):
# Returning all links that should be visited by the Crawler # Returning all links that should be visited by the Crawler


+ 19
- 20
Forums/Altenens/crawler_selenium.py View File

@ -24,8 +24,6 @@ from Forums.Initialization.prepare_parser import new_parse
from Forums.Altenens.parser import altenens_links_parser from Forums.Altenens.parser import altenens_links_parser
from Forums.Utilities.utilities import cleanHTML from Forums.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'https://altenens.is/' baseURL = 'https://altenens.is/'
@ -49,6 +47,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
def opentor(): def opentor():
from Forums.Initialization.forums_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -61,18 +61,16 @@ def opentor():
# Login using premade account credentials and do login captcha manually # Login using premade account credentials and do login captcha manually
def login(driver): def login(driver):
#click login button #click login button
login = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[1]/div/div/div/div[1]/a[1]').\
get_attribute('href')
driver.get(login)
# login.click()
# #entering username and password into input boxes
# usernameBox = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[4]/div/div/div[3]/div/div/div/form/div[1]/div/dl[1]/dd')
# #Username here
# usernameBox.send_keys('mylittlepony45')#sends string to the username box
# passwordBox = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[4]/div/div/div[3]/div/div/div/form/div[1]/div/dl[2]/dd/div/div')
# #Password here
# passwordBox.send_keys('johnnyTest@18')# sends string to passwordBox
login = driver.find_element(by=By.XPATH, value='//*[@id="top"]/div[1]/div/div/div/div[1]/a[1]')
login.click()
#entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="_xfUid-1-1688066635"]')
#Username here
usernameBox.send_keys('mylittlepony45')#sends string to the username box
passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="_xfUid-2-1688066635"]')
#Password here
passwordBox.send_keys('johnnyTest@18')# sends string to passwordBox
input("Press ENTER when CAPTCHA is completed\n") input("Press ENTER when CAPTCHA is completed\n")
@ -108,6 +106,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -160,15 +160,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url): def getFullPathName(url):
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\\Altenens\\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\\Altenens\\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 28
- 0
Forums/CryptBB/HTML_Pages/06262023/Description/Read/showthreadphptid16584.html
File diff suppressed because it is too large
View File


+ 28
- 0
Forums/CryptBB/HTML_Pages/06262023/Description/Read/showthreadphptid2628.html
File diff suppressed because it is too large
View File


+ 8
- 0
Forums/CryptBB/HTML_Pages/06262023/Listing/httpcryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebidonionforumdisplayphpfid86.html
File diff suppressed because it is too large
View File


+ 8
- 0
Forums/CryptBB/HTML_Pages/06262023/Listing/httpcryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebidonionforumdisplayphpfid86page2.html
File diff suppressed because it is too large
View File


+ 22
- 19
Forums/CryptBB/crawler_selenium.py View File

@ -17,37 +17,36 @@ from PIL import Image
import urllib.parse as urlparse import urllib.parse as urlparse
import os, re, time import os, re, time
import subprocess import subprocess
import configparser
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from Forums.Initialization.prepare_parser import new_parse from Forums.Initialization.prepare_parser import new_parse
from Forums.CryptBB.parser import cryptBB_links_parser from Forums.CryptBB.parser import cryptBB_links_parser
from Forums.Utilities.utilities import cleanHTML from Forums.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/' baseURL = 'http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/'
# Opens Tor Browser, crawls the website # Opens Tor Browser, crawls the website
def startCrawling(): def startCrawling():
opentor()
# opentor()
forumName = getForumName() forumName = getForumName()
driver = getAccess()
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# driver = getAccess()
#
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
new_parse(forumName, baseURL, False) new_parse(forumName, baseURL, False)
# Opens Tor Browser # Opens Tor Browser
def opentor(): def opentor():
from Forums.Initialization.forums_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -133,6 +132,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -186,12 +187,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url): def getFullPathName(url):
from Forums.Initialization.forums_mining import CURRENT_DATE
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\\CryptBB\\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\\CryptBB\\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath
@ -264,7 +267,7 @@ def crawlForum(driver):
driver.refresh() driver.refresh()
savePage(driver.page_source, item) savePage(driver.page_source, item)
driver.back() driver.back()
'''
#variable to check if there is a next page for the topic #variable to check if there is a next page for the topic
has_next_topic_page = True has_next_topic_page = True
counter = 1 counter = 1
@ -287,16 +290,16 @@ def crawlForum(driver):
if item == "": if item == "":
raise NoSuchElementException raise NoSuchElementException
has_next_topic_page = False
else: else:
counter += 1 counter += 1
except NoSuchElementException: except NoSuchElementException:
has_next_topic_page = False has_next_topic_page = False
# end of loop # end of loop
for i in range(counter): for i in range(counter):
driver.back() driver.back()
'''
# comment out # comment out
break break


+ 2
- 2
Forums/DB_Connection/db_connection.py View File

@ -9,8 +9,8 @@ def connectDataBase():
try: try:
config = configparser.ConfigParser()
config.read('../../setup.ini')
from Forums.Initialization.forums_mining import config
ip = config.get('PostgreSQL', 'ip') ip = config.get('PostgreSQL', 'ip')
username = config.get('PostgreSQL', 'username') username = config.get('PostgreSQL', 'username')
password = config.get('PostgreSQL', 'password') password = config.get('PostgreSQL', 'password')


+ 9
- 9
Forums/HiddenAnswers/crawler_selenium.py View File

@ -24,8 +24,6 @@ from Forums.Initialization.prepare_parser import new_parse
from Forums.HiddenAnswers.parser import hiddenanswers_links_parser from Forums.HiddenAnswers.parser import hiddenanswers_links_parser
from Forums.Utilities.utilities import cleanHTML from Forums.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/' baseURL = 'http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/'
@ -49,6 +47,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
def opentor(): def opentor():
from Forums.Initialization.forums_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -91,6 +91,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -143,16 +145,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url): def getFullPathName(url):
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
#..\CryptBB\HTML_Pages\\
fullPath = r'..\HiddenAnswers\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\HiddenAnswers\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 1
- 0
Forums/Initialization/forumsList.txt View File

@ -1 +1,2 @@
OnniForums OnniForums
CryptBB

+ 8
- 1
Forums/Initialization/forums_mining.py View File

@ -13,12 +13,18 @@ from Forums.AbyssForum.crawler_selenium import crawler as crawlerAbyssForum
from Forums.Altenens.crawler_selenium import crawler as crawlerAltenensForum from Forums.Altenens.crawler_selenium import crawler as crawlerAltenensForum
from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
<<<<<<< HEAD
=======
import configparser
>>>>>>> 98de158ee6f14b35b2a5ac61427d7de36b4ee442
import time import time
config = configparser.ConfigParser()
config.read('../../setup.ini')
CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year) CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
@ -37,7 +43,8 @@ def createDirectory(forum):
if forum == 'Reddits': if forum == 'Reddits':
pagesMainDir = '../' + forum pagesMainDir = '../' + forum
else: else:
pagesMainDir = '../' + forum + "/HTML_Pages"
# pagesMainDir = '../' + forum + "/HTML_Pages"
pagesMainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages")
if not os.path.isdir(pagesMainDir): if not os.path.isdir(pagesMainDir):
os.makedirs(pagesMainDir) os.makedirs(pagesMainDir)


+ 53
- 3
Forums/Initialization/geckodriver.log View File

@ -6224,8 +6224,13 @@ unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109 destructor()@TargetList.jsm:109
stop()@CDP.jsm:104 stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138 close()@RemoteAgent.jsm:138
<<<<<<< HEAD
1687895546413 geckodriver INFO Listening on 127.0.0.1:52237 1687895546413 geckodriver INFO Listening on 127.0.0.1:52237
1687895550932 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "52238" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileiOR21Q" 1687895550932 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "52238" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileiOR21Q"
=======
1687896430885 geckodriver INFO Listening on 127.0.0.1:50135
1687896434527 mozrunner::runner INFO Running command: "C:\\Users\\calsyslab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" ... "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofilenQCzgp"
>>>>>>> 98de158ee6f14b35b2a5ac61427d7de36b4ee442
console.log: "TorSettings: loadFromPrefs()" console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()" console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state" console.log: "TorConnect: Entering Initial state"
@ -6234,7 +6239,7 @@ console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'" console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'" console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change" console.log: "TorSettings: Observed profile-after-change"
1687895551675 Marionette INFO Marionette enabled
1687896435185 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]" console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid" console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
@ -6242,19 +6247,64 @@ JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't fin
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory. JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
<<<<<<< HEAD
DevTools listening on ws://localhost:52238/devtools/browser/ad1dc524-5cad-4983-9dd6-c7f6f3d5caee DevTools listening on ws://localhost:52238/devtools/browser/ad1dc524-5cad-4983-9dd6-c7f6f3d5caee
1687895553974 Marionette INFO Listening on port 52243 1687895553974 Marionette INFO Listening on port 52243
1687895554561 RemoteAgent WARN TLS certificate errors will be ignored for this session 1687895554561 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileiOR21Q\thumbnails) because it does not exist JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileiOR21Q\thumbnails) because it does not exist
1687895804567 Marionette INFO Stopped listening on port 52243 1687895804567 Marionette INFO Stopped listening on port 52243
=======
DevTools listening on ws://localhost:50136/devtools/browser/773adaec-44e1-4b13-9fac-c38bfb170221
1687896436579 Marionette INFO Listening on port 50142
1687896436612 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\calsyslab\AppData\Local\Temp\rust_mozprofilenQCzgp\thumbnails) because it does not exist
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/, line 2: ReferenceError: $ is not defined
1687896481968 Marionette INFO Stopped listening on port 50142
>>>>>>> 98de158ee6f14b35b2a5ac61427d7de36b4ee442
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver] JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function !!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first. JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileiOR21Q\thumbnails) because it does not exist
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\calsyslab\AppData\Local\Temp\rust_mozprofilenQCzgp\thumbnails) because it does not exist
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost ###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1687895804907 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
1687896482482 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1687897304511 geckodriver INFO Listening on 127.0.0.1:50201
1687897308111 mozrunner::runner INFO Running command: "C:\\Users\\calsyslab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" ... "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile2TNTj7"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1687897308686 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:50202/devtools/browser/c30256b0-c71f-40da-a95f-bb1313b3e35e
1687897310328 Marionette INFO Listening on port 50208
1687897310788 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/, line 2: ReferenceError: $ is not defined
1687897315273 Marionette INFO Stopped listening on port 50208
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PageThumbs.jsm, line 709: AbortError: IOUtils.profileBeforeChange getter: IOUtils: profileBeforeChange phase has already finished
1687897315776 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70 unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37 unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109 destructor()@TargetList.jsm:109


+ 55
- 21
Forums/Initialization/prepare_parser.py View File

@ -2,7 +2,7 @@ __author__ = 'DarkWeb'
import codecs import codecs
import glob import glob
import os
import os, re
import shutil import shutil
from Forums.DB_Connection.db_connection import * from Forums.DB_Connection.db_connection import *
from Forums.BestCardingWorld.parser import * from Forums.BestCardingWorld.parser import *
@ -80,7 +80,7 @@ def persist_data(url, row, cur):
#calls the different parser methods here depending on the type of html page #calls the different parser methods here depending on the type of html page
def new_parse(forum, url, createLog): def new_parse(forum, url, createLog):
from Forums.Initialization.forums_mining import CURRENT_DATE
from Forums.Initialization.forums_mining import config, CURRENT_DATE
print("Parsing The " + forum + " Forum and conduct data classification to store the information in the database.") print("Parsing The " + forum + " Forum and conduct data classification to store the information in the database.")
@ -97,7 +97,8 @@ def new_parse(forum, url, createLog):
lines = [] # listing pages lines = [] # listing pages
lns = [] # description pages lns = [] # description pages
detPage = {}
detPage = {} # first pages
other = {} # other pages
# Creating the log file for each Forum # Creating the log file for each Forum
if createLog: if createLog:
@ -109,12 +110,14 @@ def new_parse(forum, url, createLog):
" in the _Logs folder to read files from this Forum of this date again.") " in the _Logs folder to read files from this Forum of this date again.")
raise SystemExit raise SystemExit
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages")
# Reading the Listing Html Pages # Reading the Listing Html Pages
for fileListing in glob.glob(os.path.join("..\\" + forum + "\\HTML_Pages\\" + CURRENT_DATE + "\\Listing", '*.html')):
for fileListing in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing", '*.html')):
lines.append(fileListing) lines.append(fileListing)
# Reading the Description Html Pages # Reading the Description Html Pages
for fileDescription in glob.glob(os.path.join("..\\" + forum + "\\HTML_Pages\\" + CURRENT_DATE + "\\Description" ,'*.html')):
for fileDescription in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", '*.html')):
lns.append(fileDescription) lns.append(fileDescription)
# Parsing the Description Pages and put the tag's content into a dictionary (Hash table) # Parsing the Description Pages and put the tag's content into a dictionary (Hash table)
@ -153,8 +156,17 @@ def new_parse(forum, url, createLog):
# key = u"Top:" + rmm[0].upper().strip() + u" User:" + rmm[2][0].upper().strip() # key = u"Top:" + rmm[0].upper().strip() + u" User:" + rmm[2][0].upper().strip()
key = u"Url:" + os.path.basename(line2).replace(".html", "") key = u"Url:" + os.path.basename(line2).replace(".html", "")
# save file address with description record in memory
detPage[key] = {'rmm': rmm, 'filename': os.path.basename(line2)}
# check if page or page exists at the end of a string followed by a series of numbers
#if yes add to other if no add to first page dictionary
# save descritions into record in memory
check = re.compile(r'(?<=Page|page)[0-9]*')
if check.search(key):
# print(key, 'is an other page\n')
other[key] = {'rmm': rmm, 'filename': os.path.basename(line2)}
else:
# print(key, 'is a first page\n')
detPage[key] = {'rmm': rmm, 'files': [os.path.basename(line2)]}
except: except:
@ -163,6 +175,27 @@ def new_parse(forum, url, createLog):
if createLog: if createLog:
logFile.write(str(nError) + ". There was a problem to parse the file " + line2 + " in the Description section.\n") logFile.write(str(nError) + ". There was a problem to parse the file " + line2 + " in the Description section.\n")
# goes through keys from detPage and other, checks if the keys match.
# if yes adds other[key] values to detPage w/o overwritting
for key in detPage.keys():
for k in list(other.keys()):
checkkey = str(key[4:])
checkk = str(k[4:])
if checkkey in checkk:
detPage[key]['rmm'][1].extend(other[k]['rmm'][1])
detPage[key]['rmm'][2].extend(other[k]['rmm'][2])
detPage[key]['rmm'][3].extend(other[k]['rmm'][3])
detPage[key]['rmm'][4].extend(other[k]['rmm'][4])
detPage[key]['rmm'][5].extend(other[k]['rmm'][5])
detPage[key]['rmm'][6].extend(other[k]['rmm'][6])
detPage[key]['rmm'][7].extend(other[k]['rmm'][7])
detPage[key]['rmm'][8].extend(other[k]['rmm'][8])
detPage[key]['files'].append(other[k]['filename'])
other.pop(k)
# Parsing the Listing Pages and put the tag's content into a list # Parsing the Listing Pages and put the tag's content into a list
for index, line1 in enumerate(lines): for index, line1 in enumerate(lines):
@ -259,20 +292,21 @@ def new_parse(forum, url, createLog):
if not persistError: if not persistError:
# move description files of completed folder # move description files of completed folder
source = line2.replace(os.path.basename(line2), "") + detPage[key]['filename']
destination = line2.replace(os.path.basename(line2), "") + r'Read/'
try:
shutil.move(source, destination)
num_persisted_moved += 1
except:
print("There was a problem to move the file " + detPage[key]['filename'] + " in the Description section!")
nError += 1
if createLog:
logFile.write(
str(nError) + ". There was a problem to move the file " + detPage[key]['filename'] + " in the Description section!.\n")
moveError = True
for filename in detPage[key]['files']:
source = line2.replace(os.path.basename(line2), "") + filename
destination = line2.replace(os.path.basename(line2), "") + r'Read/'
try:
shutil.move(source, destination)
num_persisted_moved += 1
except:
print("There was a problem to move the file " + filename + " in the Description section!")
nError += 1
if createLog:
logFile.write(
str(nError) + ". There was a problem to move the file " + filename + " in the Description section!.\n")
moveError = True
# if the associated description page is not read or not parsed # if the associated description page is not read or not parsed
else: else:


+ 15
- 1
Forums/OnniForums/HTML_Pages/06272023/Listing/httponnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qdonionForumHackingCrackingtutorials.html
File diff suppressed because it is too large
View File


+ 14
- 16
Forums/OnniForums/crawler_selenium.py View File

@ -25,8 +25,6 @@ from Forums.Initialization.prepare_parser import new_parse
from Forums.OnniForums.parser import onniForums_links_parser from Forums.OnniForums.parser import onniForums_links_parser
from Forums.Utilities.utilities import cleanHTML from Forums.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/' baseURL = 'http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/'
@ -50,6 +48,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
def opentor(): def opentor():
from Forums.Initialization.forums_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -109,6 +109,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -163,16 +165,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url): def getFullPathName(url):
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
#..\CryptBB\HTML_Pages\\
fullPath = r'..\OnniForums\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\OnniForums\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath
@ -189,10 +189,10 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# Hacking & Cracking tutorials
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-tutorials')
# # Hacking & Cracking tutorials
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-tutorials')
# Hacking & Cracking questions # Hacking & Cracking questions
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-questions')
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-questions')
# # Exploit PoCs # # Exploit PoCs
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Exploit-PoCs') # links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Exploit-PoCs')
# # Cracked software # # Cracked software
@ -279,15 +279,13 @@ def crawlForum(driver):
for i in range(counter): for i in range(counter):
driver.back() driver.back()
# comment out, one topic per page # comment out, one topic per page
break break
# comment out, go through all pages # comment out, go through all pages
if count == 1: if count == 1:
count = 0
break
count = 0
break
try: try:
temp = driver.find_element(by=By.XPATH, value= temp = driver.find_element(by=By.XPATH, value=


+ 9
- 5
MarketPlaces/AnonymousMarketplace/crawler_selenium.py View File

@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.AnonymousMarketplace.parser import anonymous_links_parser from MarketPlaces.AnonymousMarketplace.parser import anonymous_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/' baseURL = 'http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/'
@ -52,6 +50,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,6 +90,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -154,12 +156,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\AnonymousMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\AnonymousMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 9
- 5
MarketPlaces/Apocalypse/crawler_selenium.py View File

@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.Apocalypse.parser import apocalypse_links_parser from MarketPlaces.Apocalypse.parser import apocalypse_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/' baseURL = 'http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/'
@ -52,6 +50,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,6 +90,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -170,12 +172,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\Apocalypse\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\Apocalypse\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 9
- 5
MarketPlaces/CityMarket/crawler_selenium.py View File

@ -26,8 +26,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.CityMarket.parser import city_links_parser from MarketPlaces.CityMarket.parser import city_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/' baseURL = 'http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/'
@ -53,6 +51,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -91,6 +91,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -167,12 +169,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\CityMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\CityMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 10
- 5
MarketPlaces/CypherMarketplace/crawler_selenium.py View File

@ -2,6 +2,7 @@ __author__ = 'Helium'
''' '''
CypherMarketplace Forum Crawler (Selenium) CypherMarketplace Forum Crawler (Selenium)
crawler done
''' '''
from selenium import webdriver from selenium import webdriver
@ -24,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.CypherMarketplace.parser import cyphermarketplace_links_parser from MarketPlaces.CypherMarketplace.parser import cyphermarketplace_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://6c5qa2ke2esh6ake6u6yoxjungz2czbbl7hqxl75v5k37frtzhxuk7ad.onion/' baseURL = 'http://6c5qa2ke2esh6ake6u6yoxjungz2czbbl7hqxl75v5k37frtzhxuk7ad.onion/'
@ -51,6 +50,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +90,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -167,12 +170,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\CypherMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\CypherMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 2
- 2
MarketPlaces/DB_Connection/db_connection.py View File

@ -9,8 +9,8 @@ def connectDataBase():
try: try:
config = configparser.ConfigParser()
config.read('../../setup.ini')
from MarketPlaces.Initialization.markets_mining import config
ip = config.get('PostgreSQL', 'ip') ip = config.get('PostgreSQL', 'ip')
username = config.get('PostgreSQL', 'username') username = config.get('PostgreSQL', 'username')
password = config.get('PostgreSQL', 'password') password = config.get('PostgreSQL', 'password')


+ 14
- 14
MarketPlaces/DarkFox/crawler_selenium.py View File

@ -42,16 +42,17 @@ def startCrawling():
print(driver.current_url, e) print(driver.current_url, e)
closetor(driver) closetor(driver)
new_parse(mktName, False)
new_parse(mktName, baseURL, False)
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
path = open('../../path.txt').readline().strip()
pro = subprocess.Popen(path)
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
pid = pro.pid pid = pro.pid
time.sleep(7.5) time.sleep(7.5)
input('Tor Connected. Press ENTER to continue\n') input('Tor Connected. Press ENTER to continue\n')
@ -93,12 +94,11 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
file = open('../../path.txt', 'r')
lines = file.readlines()
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(lines[0].strip())
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(lines[1].strip())
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
# ff_prof.set_preference("places.history.enabled", False) # ff_prof.set_preference("places.history.enabled", False)
# ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True) # ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
# ff_prof.set_preference("privacy.clearOnShutdown.passwords", True) # ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
@ -120,10 +120,11 @@ def createFFDriver():
ff_prof.set_preference("javascript.enabled", False) ff_prof.set_preference("javascript.enabled", False)
ff_prof.update_preferences() ff_prof.update_preferences()
service = Service(lines[2].strip())
service = Service(config.get('TOR', 'geckodriver_path'))
driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service) driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
return driver return driver
@ -185,15 +186,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\DarkFox\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\DarkFox\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 22
- 17
MarketPlaces/DarkMatter/crawler_selenium.py View File

@ -2,7 +2,8 @@ __author__ = 'Helium'
''' '''
DarkMatter Marketplace Crawler (Selenium) DarkMatter Marketplace Crawler (Selenium)
this is a small marketplace so next page links are not coded in
website has connection issues
not working still trying to debug
''' '''
from selenium import webdriver from selenium import webdriver
@ -25,8 +26,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.DarkMatter.parser import darkmatter_links_parser from MarketPlaces.DarkMatter.parser import darkmatter_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/' baseURL = 'http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/'
@ -52,6 +51,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,8 +91,11 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
ff_prof.set_preference("places.history.enabled", False) ff_prof.set_preference("places.history.enabled", False)
ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True) ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
@ -153,12 +157,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\DarkMatter\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\DarkMatter\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath
@ -180,14 +186,12 @@ def getNameFromURL(url):
def getInterestedLinks(): def getInterestedLinks():
links = [] links = []
# fraud software
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=76')
# other
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=133')
# # hacking
# digital
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=73')
# # hack guides
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=94') # links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=94')
# # carding
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=118')
# # services
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=117')
# # software/malware # # software/malware
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=121') # links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=121')
@ -220,6 +224,7 @@ def crawlForum(driver):
has_next_page = True has_next_page = True
while has_next_page: while has_next_page:
list = productPages(html) list = productPages(html)
for item in list: for item in list:
itemURL = urlparse.urljoin(baseURL, str(item)) itemURL = urlparse.urljoin(baseURL, str(item))
try: try:
@ -238,9 +243,9 @@ def crawlForum(driver):
break break
try: try:
temp = driver.find_element(by=By.XPATH, value=
'/html/body/table[1]/tbody/tr/td/form/div/div[2]/table[2]/tbody/tr')
link = temp.find_element(by=By.CLASS_NAME, value='button page-num').get_attribute('href')
nav = driver.find_element(by=By.XPATH, value='/html/body/table[1]/tbody/tr/td/form/div/div[2]/table[2]')
a = nav.find_element(by=By.LINK_TEXT, value=">")
link = a.get_attribute('href')
if link == "": if link == "":
raise NoSuchElementException raise NoSuchElementException
try: try:
@ -268,7 +273,7 @@ def crawlForum(driver):
#@param: url of any url crawled #@param: url of any url crawled
#return: true if is a description page, false if not #return: true if is a description page, false if not
def isDescriptionLink(url): def isDescriptionLink(url):
if 'product/' in url and '/products/?category' not in url:
if 'products/' in url and '/products/?category' not in url:
return True return True
return False return False


+ 2
- 2
MarketPlaces/DarkMatter/parser.py View File

@ -281,10 +281,10 @@ def darkmatter_links_parser(soup):
# Returning all links that should be visited by the Crawler # Returning all links that should be visited by the Crawler
href = [] href = []
listing = soup.findAll('td', {"class": "lefted"})
listing = soup.findAll('td', {"class": "lefted", 'colspan': '2'})
for a in listing: for a in listing:
bae = a.find('a', {"class": "lg bold"}, href=True)
bae = a.find('a', href=True)
link = bae['href'] link = bae['href']
href.append(link) href.append(link)

+ 9
- 5
MarketPlaces/DarkTor/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.DarkTor.parser import darktor_links_parser from MarketPlaces.DarkTor.parser import darktor_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion/' baseURL = 'http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion/'
@ -51,6 +49,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +89,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -153,12 +155,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\DarkTor\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\DarkTor\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 8
- 5
MarketPlaces/DigitalThriftShop/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.DigitalThriftShop.parser import digitalthriftshop_links_parser from MarketPlaces.DigitalThriftShop.parser import digitalthriftshop_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/' baseURL = 'http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/'
@ -89,6 +87,9 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -153,12 +154,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\DigitalThriftShop\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\DigitalThriftShop\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 1066
- 17
MarketPlaces/Initialization/geckodriver.log
File diff suppressed because it is too large
View File


+ 5
- 3
MarketPlaces/Initialization/markets_mining.py View File

@ -21,10 +21,11 @@ from MarketPlaces.CityMarket.crawler_selenium import crawler as crawlerCityMarke
from MarketPlaces.DarkMatter.crawler_selenium import crawler as crawlerDarkMatter from MarketPlaces.DarkMatter.crawler_selenium import crawler as crawlerDarkMatter
from MarketPlaces.M00nkeyMarket.crawler_selenium import crawler as crawlerM00nkeyMarket from MarketPlaces.M00nkeyMarket.crawler_selenium import crawler as crawlerM00nkeyMarket
import configparser
import time import time
config = configparser.ConfigParser()
config.read('../../setup.ini')
CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year) CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
@ -40,7 +41,8 @@ def getMarkets():
def createDirectory(mkt): def createDirectory(mkt):
# Package should already be there, holding crawler and parser # Package should already be there, holding crawler and parser
pagesDir = '../' + mkt + '/HTML_Pages'
# pagesDir = '../' + mkt + '/HTML_Pages'
pagesDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + mkt + "/HTML_Pages")
if not os.path.isdir(pagesDir): if not os.path.isdir(pagesDir):
os.makedirs(pagesDir) os.makedirs(pagesDir)


+ 5
- 3
MarketPlaces/Initialization/prepare_parser.py View File

@ -71,7 +71,7 @@ def persist_data(url, row, cur):
def new_parse(marketPlace, url, createLog): def new_parse(marketPlace, url, createLog):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
print("Parsing the " + marketPlace + " marketplace and conduct data classification to store the information in the database.") print("Parsing the " + marketPlace + " marketplace and conduct data classification to store the information in the database.")
@ -100,12 +100,14 @@ def new_parse(marketPlace, url, createLog):
" in the _Logs folder to read files from this Market Place of this date again.") " in the _Logs folder to read files from this Market Place of this date again.")
raise SystemExit raise SystemExit
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + marketPlace + "/HTML_Pages")
# Reading the Listing Html Pages # Reading the Listing Html Pages
for fileListing in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\" + CURRENT_DATE + "\\Listing", '*.html')):
for fileListing in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing", '*.html')):
lines.append(fileListing) lines.append(fileListing)
# Reading the Description Html Pages # Reading the Description Html Pages
for fileDescription in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\" + CURRENT_DATE + "\\Description", '*.html')):
for fileDescription in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", '*.html')):
lns.append(fileDescription) lns.append(fileDescription)
# Parsing the Description Pages and put the tag's content into a dictionary (Hash table) # Parsing the Description Pages and put the tag's content into a dictionary (Hash table)


+ 9
- 5
MarketPlaces/LionMarketplace/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.LionMarketplace.parser import lionmarketplace_links_parser from MarketPlaces.LionMarketplace.parser import lionmarketplace_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/' baseURL = 'http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/'
@ -51,6 +49,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +89,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -154,12 +156,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\LionMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\LionMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 9
- 5
MarketPlaces/M00nkeyMarket/crawler_selenium.py View File

@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.M00nkeyMarket.parser import m00nkey_links_parser from MarketPlaces.M00nkeyMarket.parser import m00nkey_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/' baseURL = 'http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/'
@ -52,6 +50,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,6 +90,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -170,12 +172,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\M00nkeyMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\M00nkeyMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 9
- 5
MarketPlaces/MikesGrandStore/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.MikesGrandStore.parser import mikesgrandstore_links_parser from MarketPlaces.MikesGrandStore.parser import mikesgrandstore_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://4yx2akutmkhwfgzlpdxiah7cknurw6vlddlq24fxa3r3ebophwgpvhyd.onion/' baseURL = 'http://4yx2akutmkhwfgzlpdxiah7cknurw6vlddlq24fxa3r3ebophwgpvhyd.onion/'
@ -51,6 +49,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +89,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -153,12 +155,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\MikesGrandStore\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\MikesGrandStore\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 21
- 17
MarketPlaces/ThiefWorld/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.ThiefWorld.parser import thiefworld_links_parser from MarketPlaces.ThiefWorld.parser import thiefworld_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/' baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/'
@ -33,24 +31,26 @@ baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion
# Opens Tor Browser, crawls the website, then parses, then closes tor # Opens Tor Browser, crawls the website, then parses, then closes tor
#acts like the main method for the crawler, another function at the end of this code calls this function later #acts like the main method for the crawler, another function at the end of this code calls this function later
def startCrawling(): def startCrawling():
opentor()
# mktName = getMKTName()
driver = getAccess()
# opentor()
mktName = getMKTName()
# driver = getAccess()
#
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# new_parse(forumName, baseURL, False)
new_parse(mktName, baseURL, False)
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +89,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -161,12 +163,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\ThiefWorld\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\ThiefWorld\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 9
- 5
MarketPlaces/Tor2door/crawler_selenium.py View File

@ -23,8 +23,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.Tor2door.parser import tor2door_links_parser from MarketPlaces.Tor2door.parser import tor2door_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://yzrrne3pveltulbavydr2kiashvlnysdwclwmklo6cyjuqpxi7ku4xqd.onion' baseURL = 'http://yzrrne3pveltulbavydr2kiashvlnysdwclwmklo6cyjuqpxi7ku4xqd.onion'
@ -48,6 +46,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -131,6 +131,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -184,12 +186,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\Tor2door\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\Tor2door\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 9
- 8
MarketPlaces/TorBay/crawler_selenium.py View File

@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.TorBay.parser import torbay_links_parser from MarketPlaces.TorBay.parser import torbay_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://torbay3253zck4ym5cbowwvrbfjjzruzthrx3np5y6owvifrnhy5ybid.onion/' baseURL = 'http://torbay3253zck4ym5cbowwvrbfjjzruzthrx3np5y6owvifrnhy5ybid.onion/'
@ -52,6 +50,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,6 +90,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -154,15 +156,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\TorBay\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\TorBay\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 9
- 5
MarketPlaces/TorMarket/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.TorMarket.parser import tormarket_links_parser from MarketPlaces.TorMarket.parser import tormarket_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1 counter = 1
baseURL = 'http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/' baseURL = 'http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/'
@ -51,6 +49,8 @@ def startCrawling():
# Opens Tor Browser # Opens Tor Browser
#prompts for ENTER input to continue #prompts for ENTER input to continue
def opentor(): def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid global pid
print("Connecting Tor...") print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path')) pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +89,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile' # Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket # to use Tor proxy and socket
def createFFDriver(): def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path')) ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path')) ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -153,12 +155,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name # Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site #@param: raw url as crawler crawls through every site
def getFullPathName(url): def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url) fileName = getNameFromURL(url)
if isDescriptionLink(url): if isDescriptionLink(url):
fullPath = r'..\TorMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else: else:
fullPath = r'..\TorMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath return fullPath


+ 3
- 2
setup.ini View File

@ -1,11 +1,12 @@
[TOR] [TOR]
firefox_binary_path = C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\firefox.exe firefox_binary_path = C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\firefox.exe
firefox_profile_path = C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\TorBrowser\\Data\\Browser\\profile.default firefox_profile_path = C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\TorBrowser\\Data\\Browser\\profile.default
geckodriver_path = C:\\NSF-REU\\dw_pipeline_test\\selenium\\geckodriver.exe geckodriver_path = C:\\NSF-REU\\dw_pipeline_test\\selenium\\geckodriver.exe
[Project] [Project]
project_directory = C:\\NSF-REU\\dw_pipeline_test
shared_folder = \\VBoxSvr\\VM_Files_(shared)
project_directory = C:\Users\Helium\\PycharmProjects\dw_pipeline_test
shared_folder = \\VBoxSvr\Shared
[PostgreSQL] [PostgreSQL]
ip = localhost ip = localhost


Loading…
Cancel
Save