Browse Source

Updates from virtual machine

main
Khoi 1 year ago
parent
commit
3784e8d533
43 changed files with 6095 additions and 375 deletions
  1. +1
    -0
      .idea/.gitignore
  2. +1
    -0
      .idea/.name
  3. +586
    -0
      Forums/AbyssForum/HTML_Pages/06262023/Description/viewtopicphpt1402.html
  4. +1012
    -0
      Forums/AbyssForum/HTML_Pages/06262023/Description/viewtopicphpt849.html
  5. +1468
    -0
      Forums/AbyssForum/HTML_Pages/06262023/Listing/httpqyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjidonionviewforumphpf26sid6f7add746810784861a7ec31703a3757.html
  6. +1462
    -0
      Forums/AbyssForum/HTML_Pages/06262023/Listing/httpqyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjidonionviewforumphpf26start25.html
  7. +16
    -16
      Forums/AbyssForum/crawler_selenium.py
  8. +31
    -127
      Forums/AbyssForum/parser.py
  9. +19
    -20
      Forums/Altenens/crawler_selenium.py
  10. +28
    -0
      Forums/CryptBB/HTML_Pages/06262023/Description/Read/showthreadphptid16584.html
  11. +28
    -0
      Forums/CryptBB/HTML_Pages/06262023/Description/Read/showthreadphptid2628.html
  12. +8
    -0
      Forums/CryptBB/HTML_Pages/06262023/Listing/httpcryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebidonionforumdisplayphpfid86.html
  13. +8
    -0
      Forums/CryptBB/HTML_Pages/06262023/Listing/httpcryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebidonionforumdisplayphpfid86page2.html
  14. +22
    -19
      Forums/CryptBB/crawler_selenium.py
  15. +2
    -2
      Forums/DB_Connection/db_connection.py
  16. +9
    -9
      Forums/HiddenAnswers/crawler_selenium.py
  17. +1
    -0
      Forums/Initialization/forumsList.txt
  18. +8
    -1
      Forums/Initialization/forums_mining.py
  19. +53
    -3
      Forums/Initialization/geckodriver.log
  20. +55
    -21
      Forums/Initialization/prepare_parser.py
  21. +15
    -1
      Forums/OnniForums/HTML_Pages/06272023/Listing/httponnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qdonionForumHackingCrackingtutorials.html
  22. +14
    -16
      Forums/OnniForums/crawler_selenium.py
  23. +9
    -5
      MarketPlaces/AnonymousMarketplace/crawler_selenium.py
  24. +9
    -5
      MarketPlaces/Apocalypse/crawler_selenium.py
  25. +9
    -5
      MarketPlaces/CityMarket/crawler_selenium.py
  26. +10
    -5
      MarketPlaces/CypherMarketplace/crawler_selenium.py
  27. +2
    -2
      MarketPlaces/DB_Connection/db_connection.py
  28. +14
    -14
      MarketPlaces/DarkFox/crawler_selenium.py
  29. +22
    -17
      MarketPlaces/DarkMatter/crawler_selenium.py
  30. +2
    -2
      MarketPlaces/DarkMatter/parser.py
  31. +9
    -5
      MarketPlaces/DarkTor/crawler_selenium.py
  32. +8
    -5
      MarketPlaces/DigitalThriftShop/crawler_selenium.py
  33. +1066
    -17
      MarketPlaces/Initialization/geckodriver.log
  34. +5
    -3
      MarketPlaces/Initialization/markets_mining.py
  35. +5
    -3
      MarketPlaces/Initialization/prepare_parser.py
  36. +9
    -5
      MarketPlaces/LionMarketplace/crawler_selenium.py
  37. +9
    -5
      MarketPlaces/M00nkeyMarket/crawler_selenium.py
  38. +9
    -5
      MarketPlaces/MikesGrandStore/crawler_selenium.py
  39. +21
    -17
      MarketPlaces/ThiefWorld/crawler_selenium.py
  40. +9
    -5
      MarketPlaces/Tor2door/crawler_selenium.py
  41. +9
    -8
      MarketPlaces/TorBay/crawler_selenium.py
  42. +9
    -5
      MarketPlaces/TorMarket/crawler_selenium.py
  43. +3
    -2
      setup.ini

+ 1
- 0
.idea/.gitignore View File

@ -2,6 +2,7 @@
/shelf/
/workspace.xml
/selenium/geckodriver.exe
setup.ini
*.html
*.log
*.png

+ 1
- 0
.idea/.name View File

@ -0,0 +1 @@
forums_mining.py

+ 586
- 0
Forums/AbyssForum/HTML_Pages/06262023/Description/viewtopicphpt1402.html View File

@ -0,0 +1,586 @@
<html dir="ltr" lang="en-gb"><head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- Chrome, Firefox OS and Opera -->
<meta name="theme-color" content="#094c8a">
<!-- Windows Phone -->
<meta name="msapplication-navbutton-color" content="#094c8a">
<!-- iOS Safari -->
<meta name="apple-mobile-web-app-status-bar-style" content="#094c8a">
<title>Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting. - Abyss Forum| forum, drugs,guns,porn,hacking,crypto</title>
<link rel="alternate" type="application/atom+xml" title="Feed - Abyss Forum| forum, drugs,guns,porn,hacking,crypto" href="/app.php/feed"> <link rel="alternate" type="application/atom+xml" title="Feed - New Topics" href="/app.php/feed/topics"> <link rel="alternate" type="application/atom+xml" title="Feed - Forum - Hacked Database" href="/app.php/feed/forum/26"> <link rel="alternate" type="application/atom+xml" title="Feed - Topic - Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting." href="/app.php/feed/topic/1402">
<link rel="canonical" href="http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewtopic.php?t=1402">
<!--
phpBB style name: Supernova
Based on style: prosilver (this is the default phpBB3 style)
Original author: Tom Beddard ( http://www.subBlue.com/ )
Modified by: Christian 2.0
Updated by: MannixMD @MannixMD
-->
<link href="./assets/css/font-awesome.min.css?assets_version=22" rel="stylesheet">
<link href="./styles/prosilver/theme/stylesheet.css?assets_version=22" rel="stylesheet">
<link href="./styles/Supernova/theme/supernova.css?assets_version=22" rel="stylesheet">
<link href="./styles/Supernova/theme/en/stylesheet.css?assets_version=22" rel="stylesheet">
<!--[if lte IE 9]>
<link href="./styles/Supernova/theme/tweaks.css?assets_version=22" rel="stylesheet">
<![endif]-->
<meta name="description" content="Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting WinPot Malware WinPot malware ATMii Malware Cutler Malware Buy ATM jackpotting virus">
</script></script>
{"@context":"https://schema.org","@type":"DiscussionForumPosting","url":"http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewtopic.php?t=1402","headline":"Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting.","description":"Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting WinPot Malware WinPot malware ATMii Malware Cutler Malware Buy ATM jackpotting virus","author":{"@type":"Person","name":"darkclonee"},"datePublished":"2023-04-29T22:33:53+00:00","articleSection":"Hacked Database","publisher":{"@type":"Organization","name":"Abyss Forum| forum, drugs,guns,porn,hacking,crypto","url":"http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion"}}
</script>
<link href="./ext/phpbb/pages/styles/prosilver/theme/pages_common.css?assets_version=22" rel="stylesheet" media="screen">
<link rel="stylesheet" href="./ext/danieltj/verifiedprofiles/styles/all/theme/verifiedprofiles.css">
</head>
<body id="phpbb" class="notouch section-viewtopic ltr hasjs">
<div id="page-header">
<div class="headerbar" role="banner">
<div class="inner">
<div id="site-description" class="site-description">
<a id="logo" class="logo" href="http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion" title="Home">
<span class="site_logo"></span>
</a>
<p class="skiplink"><a href="#start_here">Skip to content</a></p>
</div>
<div id="search-box" class="search-box search-header" role="search">
<form action="./search.php" method="get" id="search">
<fieldset>
<input name="keywords" id="keywords" type="search" maxlength="128" title="Search for keywords" class="inputbox search tiny" size="20" value="" placeholder="Search…">
<button class="button button-search" type="submit" title="Search">
<i class="icon fa-search fa-fw" aria-hidden="true"></i><span class="sr-only">Search</span>
</button>
<a href="./search.php" class="button button-search-end" title="Advanced search">
<i class="icon fa-cog fa-fw" aria-hidden="true"></i><span class="sr-only">Advanced search</span>
</a>
</fieldset>
</form>
</div>
</div>
</div>
<div class="navigation-top" role="navigation">
<ul id="nav-main" class="nav-main linklist" role="menubar">
<li id="quick-links" class="quick-links dropdown-container responsive-menu" data-skip-responsive="true">
<a href="#" class="dropdown-trigger dropdown-toggle">
<i class="icon fa-bars fa-fw" aria-hidden="true"></i><span>Quick links</span>
</a>
<div class="dropdown">
<div class="pointer"><div class="pointer-inner"></div></div>
<ul class="dropdown-contents" role="menu">
<li class="small-icon icon-pages icon-page-abyss-directory">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="separator"></li>
<li>
<a href="./search.php?search_id=unanswered" role="menuitem">
<i class="icon fa-file-o fa-fw icon-gray" aria-hidden="true"></i><span>Unanswered topics</span>
</a>
</li>
<li>
<a href="./search.php?search_id=active_topics" role="menuitem">
<i class="icon fa-file-o fa-fw icon-blue" aria-hidden="true"></i><span>Active topics</span>
</a>
</li>
<li class="separator"></li>
<li>
<a href="./search.php" role="menuitem">
<i class="icon fa-search fa-fw" aria-hidden="true"></i><span>Search</span>
</a>
</li>
<li class="separator"></li>
<li class="small-icon icon-pages icon-page-abyss-directory">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
</ul>
</div>
</li>
<li class="small-icon icon-pages icon-page-about no-bulletin">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-abyss-directory no-bulletin">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li data-skip-responsive="true">
<a href="/app.php/help/faq" rel="help" title="Frequently Asked Questions" role="menuitem">
<i class="icon fa-question-circle fa-fw" aria-hidden="true"></i><span>FAQ</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-abyss-directory no-bulletin">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about no-bulletin">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="rightside" data-skip-responsive="true">
<a href="./ucp.php?mode=login&amp;redirect=viewtopic.php%3Ft%3D1402" title="Login" accesskey="x" role="menuitem">
<i class="icon fa-power-off fa-fw" aria-hidden="true"></i><span>Login</span>
</a>
</li>
<li class="rightside" data-skip-responsive="true">
<a href="./ucp.php?mode=register" role="menuitem">
<i class="icon fa-pencil-square-o fa-fw" aria-hidden="true"></i><span>Register</span>
</a>
</li>
</ul>
</div> </div>
<div id="wrap" class="wrap">
<a id="top" class="top-anchor" accesskey="t"></a>
<div class="navbar navbar-wrap" role="navigation">
<div class="inner">
<ul id="nav-breadcrumbs" class="nav-breadcrumbs linklist navlinks" role="menubar">
<li data-skip-responsive="" class="small-icon icon-pages icon-page-abyss-directory breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList">
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="/app.php/abyss-directory" title="Welcome to abyss directory link collection"><span itemprop="name">Abyss Directory</span></a><meta itemprop="position" content="0"></span>
</li>
<li data-skip-responsive="" class="small-icon icon-pages icon-page-about breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList">
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="/app.php/about" title="about Abyss Forum"><span itemprop="name">about</span></a><meta itemprop="position" content="0"></span>
</li>
<li class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList">
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion" data-navbar-reference="home" title="Home"><i class="icon fa-home fa-fw" aria-hidden="true"></i><span itemprop="name">Home</span></a><meta itemprop="position" content="1"></span>
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="./index.php" accesskey="h" data-navbar-reference="index" title="Board"><span itemprop="name">Board</span></a><meta itemprop="position" content="2"></span>
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope="" data-forum-id="3"><a itemprop="item" href="./viewforum.php?f=3" title="Hacking"><span itemprop="name">Hacking</span></a><meta itemprop="position" content="3"></span>
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope="" data-forum-id="26"><a itemprop="item" href="./viewforum.php?f=26" title="Hacked Database"><span itemprop="name">Hacked Database</span></a><meta itemprop="position" content="4"></span>
</li>
<li data-skip-responsive="" class="small-icon icon-pages icon-page-abyss-directory breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList">
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="/app.php/abyss-directory" title="Welcome to abyss directory link collection"><span itemprop="name">Abyss Directory</span></a><meta itemprop="position" content="5"></span>
</li>
<li data-skip-responsive="" class="small-icon icon-pages icon-page-about breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList">
<span class="crumb" itemtype="https://schema.org/ListItem" itemprop="itemListElement" itemscope=""><a itemprop="item" href="/app.php/about" title="about Abyss Forum"><span itemprop="name">about</span></a><meta itemprop="position" content="6"></span>
</li>
<li class="rightside responsive-search">
<a href="./search.php" title="View the advanced search options" role="menuitem">
<i class="icon fa-search fa-fw" aria-hidden="true"></i><span class="sr-only">Search</span>
</a>
</li>
</ul>
</div>
</div>
<a id="start_here" class="anchor"></a>
<div id="page-body" class="page-body" role="main">
<h2 class="topic-title"><a href="./viewtopic.php?t=1402">Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting.</a></h2>
<!-- NOTE: remove the style="display: none" when you want to have the forum description on the topic body -->
<div style="display: none !important;">Buy and sell hacked database's, accounts, logs etc<br></div>
<div class="action-bar bar-top">
<a href="./posting.php?mode=reply&amp;t=1402" class="button sn-btn" title="Post a reply">
<span>Post Reply</span> <i class="icon fa-reply fa-fw" aria-hidden="true"></i>
</a>
<div class="dropdown-container dropdown-button-control topic-tools">
<span title="Topic tools" class="button button-secondary dropdown-trigger dropdown-select sn-btn dropdown-toggle">
<i class="icon fa-wrench fa-fw" aria-hidden="true"></i>
<span class="caret"><i class="icon fa-sort-down fa-fw" aria-hidden="true"></i></span>
</span>
<div class="dropdown">
<div class="pointer"><div class="pointer-inner"></div></div>
<ul class="dropdown-contents">
<li>
<a href="./viewtopic.php?t=1402&amp;view=print" title="Print view" accesskey="p">
<i class="icon fa-print fa-fw" aria-hidden="true"></i><span>Print view</span>
</a>
</li>
</ul>
</div>
</div>
<div class="search-box" role="search">
<form method="get" id="topic-search" action="./search.php">
<fieldset>
<input class="inputbox search tiny" type="search" name="keywords" id="search_keywords" size="20" placeholder="Search this topic…">
<button class="button button-search" type="submit" title="Search">
<i class="icon fa-search fa-fw" aria-hidden="true"></i><span class="sr-only">Search</span>
</button>
<a href="./search.php" class="button button-search-end" title="Advanced search">
<i class="icon fa-cog fa-fw" aria-hidden="true"></i><span class="sr-only">Advanced search</span>
</a>
<input type="hidden" name="t" value="1402">
<input type="hidden" name="sf" value="msgonly">
</fieldset>
</form>
</div>
<div class="pagination">
1 post
• Page <strong>1</strong> of <strong>1</strong>
</div>
</div>
<div id="p1586" class="post has-profile bg2">
<div class="inner">
<dl class="postprofile" id="profile1586">
<dt class="no-profile-rank no-avatar">
<div class="avatar-container">
</div>
<a href="./memberlist.php?mode=viewprofile&amp;u=789" class="username">darkclonee</a> </dt>
<dd class="profile-posts"><strong>Posts:</strong> <a href="./search.php?author_id=789&amp;sr=posts">13</a></dd> <dd class="profile-joined"><strong>Joined:</strong> Sat Apr 29, 2023 10:33 am</dd>
</dl>
<div class="postbody">
<div id="post_content1586">
<h3 class="first">
<a href="./viewtopic.php?p=1586#p1586">Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting.</a>
</h3>
<ul class="post-buttons">
<li>
<a href="./posting.php?mode=quote&amp;p=1586" title="Reply with quote" class="button button-icon-only btn-blue">
<i class="icon fa-quote-left fa-fw" aria-hidden="true"></i><span class="sr-only">Quote</span>
</a>
</li>
<li class="responsive-menu hidden dropdown-container"><a href="javascript:void(0);" class="js-responsive-menu-link responsive-menu-link dropdown-toggle"><i class="icon fa-bars fa-fw" aria-hidden="true"></i></a><div class="dropdown"><div class="pointer"><div class="pointer-inner"></div></div><ul class="dropdown-contents"></ul></div></li></ul>
<p class="author">
<a class="unread" href="./viewtopic.php?p=1586#p1586" title="Post">
<i class="icon fa-file fa-fw icon-lightgray icon-md" aria-hidden="true"></i><span class="sr-only">Post</span>
</a>
<span class="responsive-hide">by <strong><a href="./memberlist.php?mode=viewprofile&amp;u=789" class="username">darkclonee</a></strong> » </span><time datetime="2023-04-29T22:33:53+00:00">Sat Apr 29, 2023 10:33 pm</time>
</p>
<div class="content">Get List For WinPot malware,cutlet maker, Yoda and UPX type ATM jackpotting <br>
<br>
WinPot Malware<br>
WinPot malware<br>
ATMii Malware<br>
Cutler Malware<br>
<br>
Buy ATM jackpotting virus. WinPot requires almost no advanced knowledge or professional computer skills. The fastest way to make money.<br>
This software will help you bring down an ATM machine and you would be able to get the cash.<br>
Contact if you are interested very affordable.<br>
<br>
CONTACT<br>
General support wickr me...jmorgan420<br>
Email..... (<a href="mailto:[email protected]">[email protected]</a>)<br>
Telegram.....Bradley_bil7<br>
WhatsApp....+13239891049</div>
</div>
</div>
<div class="back2top">
<a href="#top" class="top" title="Top">
<i class="icon fa-chevron-circle-up fa-fw icon-gray" aria-hidden="true"></i>
<span class="sr-only">Top</span>
</a>
</div>
</div>
</div>
<div class="action-bar bar-bottom">
<a href="./posting.php?mode=reply&amp;t=1402" class="button sn-btn" title="Post a reply">
<span>Post Reply</span> <i class="icon fa-reply fa-fw" aria-hidden="true"></i>
</a>
<div class="dropdown-container dropdown-button-control topic-tools">
<span title="Topic tools" class="button button-secondary dropdown-trigger dropdown-select sn-btn dropdown-toggle">
<i class="icon fa-wrench fa-fw" aria-hidden="true"></i>
<span class="caret"><i class="icon fa-sort-down fa-fw" aria-hidden="true"></i></span>
</span>
<div class="dropdown">
<div class="pointer"><div class="pointer-inner"></div></div>
<ul class="dropdown-contents">
<li>
<a href="./viewtopic.php?t=1402&amp;view=print" title="Print view" accesskey="p">
<i class="icon fa-print fa-fw" aria-hidden="true"></i><span>Print view</span>
</a>
</li>
</ul>
</div>
</div>
<div class="pagination">
1 post
• Page <strong>1</strong> of <strong>1</strong>
</div>
</div>
<div class="action-bar actions-jump">
<p class="jumpbox-return">
<a href="./viewforum.php?f=26" class="left-box arrow-left" accesskey="r">
<i class="icon fa-angle-left fa-fw icon-black" aria-hidden="true"></i><span>Return to “Hacked Database”</span>
</a>
</p>
<div class="jumpbox dropdown-container dropdown-container-right dropdown-up dropdown-left dropdown-button-control" id="jumpbox">
<span title="Jump to" class="button button-secondary dropdown-trigger dropdown-select sn-btn dropdown-toggle">
<span>Jump to</span>
<span class="caret"><i class="icon fa-sort-down fa-fw" aria-hidden="true"></i></span>
</span>
<div class="dropdown">
<div class="pointer"><div class="pointer-inner"></div></div>
<ul class="dropdown-contents">
<li><a href="./viewforum.php?f=1" class="jumpbox-cat-link"> <span> General</span></a></li>
<li><a href="./viewforum.php?f=2" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Welcome to Abyss Forum</span></a></li>
<li><a href="./viewforum.php?f=3" class="jumpbox-cat-link"> <span> Hacking</span></a></li>
<li><a href="./viewforum.php?f=26" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Hacked Database</span></a></li>
<li><a href="./viewforum.php?f=27" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Hacking Ondemand (Hire a Hacker)</span></a></li>
<li><a href="./viewforum.php?f=28" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Hacking Tools(Malware, ransomware,Exploit)</span></a></li>
<li><a href="./viewforum.php?f=29" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Anonymity and Security</span></a></li>
<li><a href="./viewforum.php?f=30" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Carding Forum</span></a></li>
<li><a href="./viewforum.php?f=31" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Sell CC &amp; CVV</span></a></li>
<li><a href="./viewforum.php?f=32" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Social Media Hacking</span></a></li>
<li><a href="./viewforum.php?f=4" class="jumpbox-cat-link"> <span> Ponography</span></a></li>
<li><a href="./viewforum.php?f=14" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Soft Porn</span></a></li>
<li><a href="./viewforum.php?f=15" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Hard Porn</span></a></li>
<li><a href="./viewforum.php?f=5" class="jumpbox-cat-link"> <span> MarketPlaces</span></a></li>
<li><a href="./viewforum.php?f=16" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Drug Stores</span></a></li>
<li><a href="./viewforum.php?f=17" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Gun Shops</span></a></li>
<li><a href="./viewforum.php?f=18" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Documents</span></a></li>
<li><a href="./viewforum.php?f=19" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Banknotes (Fake Bills)</span></a></li>
<li><a href="./viewforum.php?f=20" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Transfers</span></a></li>
<li><a href="./viewforum.php?f=6" class="jumpbox-cat-link"> <span> Money Laundering and crypto currency</span></a></li>
<li><a href="./viewforum.php?f=23" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Cryptocurrency</span></a></li>
<li><a href="./viewforum.php?f=24" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Money Laundering</span></a></li>
<li><a href="./viewforum.php?f=25" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Crypto Mixers</span></a></li>
<li><a href="./viewforum.php?f=37" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Crypto Wallets</span></a></li>
<li><a href="./viewforum.php?f=7" class="jumpbox-forum-link"> <span> Extremist Groups</span></a></li>
<li><a href="./viewforum.php?f=8" class="jumpbox-cat-link"> <span> Onion Links</span></a></li>
<li><a href="./viewforum.php?f=22" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Onion Links</span></a></li>
<li><a href="./viewforum.php?f=10" class="jumpbox-cat-link"> <span> Tutorials</span></a></li>
<li><a href="./viewforum.php?f=11" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; General Tutorials</span></a></li>
<li><a href="./viewforum.php?f=12" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Hacking Tutorials</span></a></li>
<li><a href="./viewforum.php?f=13" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Cracking Tutorials</span></a></li>
<li><a href="./viewforum.php?f=21" class="jumpbox-forum-link"> <span> Politics</span></a></li>
<li><a href="./viewforum.php?f=34" class="jumpbox-forum-link"> <span> Research Chemicals</span></a></li>
<li><a href="./viewforum.php?f=35" class="jumpbox-forum-link"> <span> Recomended</span></a></li>
<li><a href="./viewforum.php?f=36" class="jumpbox-forum-link"> <span> Scamming Bastards</span></a></li>
<li><a href="./viewforum.php?f=38" class="jumpbox-cat-link"> <span> Organ Donation</span></a></li>
<li><a href="./viewforum.php?f=40" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Organ Market</span></a></li>
<li><a href="./viewforum.php?f=39" class="jumpbox-cat-link"> <span> Buy Girls 8-18</span></a></li>
<li><a href="./viewforum.php?f=41" class="jumpbox-sub-link"><span class="spacer"></span> <span>&nbsp; Human Trafficking</span></a></li>
</ul>
</div>
</div>
</div>
</div>
<div id="page-footer" class="page-footer" role="contentinfo">
<div class="navbar" role="navigation">
<div class="inner">
<ul id="nav-footer" class="nav-footer linklist compact" role="menubar">
<li class="breadcrumbs">
<span class="crumb"><a href="http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion" data-navbar-reference="home" title="Home"><i class="icon fa-home fa-fw" aria-hidden="true"></i><span>Home</span></a></span> <span class="crumb"><a href="./index.php" data-navbar-reference="index" title="Board"><span>Board</span></a></span> </li>
<li class="responsive-menu hidden rightside dropdown-container"><a href="javascript:void(0);" class="js-responsive-menu-link responsive-menu-link dropdown-toggle"><i class="icon fa-bars fa-fw" aria-hidden="true"></i></a><div class="dropdown"><div class="pointer"><div class="pointer-inner"></div></div><ul class="dropdown-contents"></ul></div></li><li class="small-icon icon-pages icon-page- rightside">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="small-icon icon-pages icon-page- rightside">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="rightside">All times are <span title="UTC">UTC</span></li>
<li class="small-icon icon-pages icon-page-abyss-directory rightside">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about rightside">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="rightside">
<a href="./ucp.php?mode=delete_cookies" data-ajax="true" data-refresh="true" role="menuitem">
<i class="icon fa-trash fa-fw" aria-hidden="true"></i><span>Delete cookies</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-abyss-directory rightside">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about rightside">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-abyss-directory rightside">
<a href="/app.php/abyss-directory" title="Welcome to abyss directory link collection" role="menuitem">
<span>Abyss Directory</span>
</a>
</li>
<li class="small-icon icon-pages icon-page-about rightside">
<a href="/app.php/about" title="about Abyss Forum" role="menuitem">
<span>about</span>
</a>
</li>
<li class="rightside" data-last-responsive="true">
<a href="./memberlist.php?mode=contactadmin" role="menuitem">
<i class="icon fa-envelope fa-fw" aria-hidden="true"></i><span>Contact us</span>
</a>
</li>
</ul>
</div>
</div>
<div class="copyright">
<strong><span>*</span>Donate Bitcoin<a href="https://mempool.space/address/bc1qsg5q4s4zq937wglfkgfjrp4ek2y5ywm69lkzr5"><!-- The text field -->
<input type="text" value="(BTC)" id="myInput">
<!-- The button used to copy the text -->
<button onclick="myFunction()">bc1qsg5q4s4zq937wglfkgfjrp4ek2y5ywm69lkzr5</button></a></strong>
<br><strong><span>*</span>EMAIL US <a href="mailto:[email protected]">[email protected]</a></strong>
<br><strong><span>*</span>Style version: 1.1.5</strong>
<p class="footer-row">
<span class="footer-copyright">Powered by <a href="https://www.phpbb.com/">phpBB</a>® Forum Software © phpBB Limited</span>
</p>
<p class="footer-row">
<span>phpBB Two Factor Authentication © <a href="https://www.phpbbextensions.io/" target="_blank">paul999</a></span>
</p>
<p class="footer-row" role="menu">
<a class="footer-link" href="./ucp.php?mode=privacy" title="Privacy" role="menuitem">
<span class="footer-link-text">Privacy</span>
</a>
|
<a class="footer-link" href="./ucp.php?mode=terms" title="Terms" role="menuitem">
<span class="footer-link-text">Terms</span>
</a>
</p>
</div>
<div id="darkenwrapper" class="darkenwrapper" data-ajax-error-title="AJAX error" data-ajax-error-text="Something went wrong when processing your request." data-ajax-error-text-abort="User aborted request." data-ajax-error-text-timeout="Your request timed out; please try again." data-ajax-error-text-parsererror="Something went wrong with the request and the server returned an invalid reply.">
<div id="darken" class="darken">&nbsp;</div>
</div>
<div id="phpbb_alert" class="phpbb_alert" data-l-err="Error" data-l-timeout-processing-req="Request timed out.">
<a href="#" class="alert_close">
<i class="icon fa-times-circle fa-fw" aria-hidden="true"></i>
</a>
<h3 class="alert_title">&nbsp;</h3><p class="alert_text"></p>
</div>
<div id="phpbb_confirm" class="phpbb_alert">
<a href="#" class="alert_close">
<i class="icon fa-times-circle fa-fw" aria-hidden="true"></i>
</a>
<div class="alert_text"></div>
</div>
</div>
</div>
<div>
<a id="bottom" class="anchor" accesskey="z"></a>
</div>
</script>
</script>
var vglnk = {
key: 'e4fd14f5d7f2bb6d80b8f8da1354718c',
sub_id: '278e906f85e6a29e5cf5e6781f9210b0'
};
(function(d, t) {
var s = d.createElement(t); s.type = 'text/javascript'; s.async = true;
s.src = '//cdn.viglink.com/api/vglnk.js';
var r = d.getElementsByTagName(t)[0]; r.parentNode.insertBefore(s, r);
}(document, 'script'));
</script>
</script>
</script>
</body></html>

+ 1012
- 0
Forums/AbyssForum/HTML_Pages/06262023/Description/viewtopicphpt849.html
File diff suppressed because it is too large
View File


+ 1468
- 0
Forums/AbyssForum/HTML_Pages/06262023/Listing/httpqyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjidonionviewforumphpf26sid6f7add746810784861a7ec31703a3757.html
File diff suppressed because it is too large
View File


+ 1462
- 0
Forums/AbyssForum/HTML_Pages/06262023/Listing/httpqyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjidonionviewforumphpf26start25.html
File diff suppressed because it is too large
View File


+ 16
- 16
Forums/AbyssForum/crawler_selenium.py View File

@ -24,8 +24,6 @@ from Forums.Initialization.prepare_parser import new_parse
from Forums.AbyssForum.parser import abyssForum_links_parser
from Forums.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/'
@ -49,6 +47,8 @@ def startCrawling():
# Opens Tor Browser
def opentor():
from Forums.Initialization.forums_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -91,6 +91,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -143,16 +145,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
#..\CryptBB\HTML_Pages\\
fullPath = r'..\AbyssForum\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\AbyssForum\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath
@ -170,19 +170,19 @@ def getInterestedLinks():
links = []
# Hacked Database
links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=26&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=26')
# Hire a Hacker
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=27&sid=6f7add746810784861a7ec31703a3757')
links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=27')
# # Hacking Tools
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=28&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=28')
# # Carding Forums
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=30&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=30')
# # Social Media Hacking
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=32&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=32')
# # Hacking Tutorials
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=12&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=12')
# # Cracking Tutorials
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=13&sid=6f7add746810784861a7ec31703a3757')
# links.append('http://qyvjopwdgjq52ehsx6paonv2ophy3p4ivfkul4svcaw6qxlzsaboyjid.onion/viewforum.php?f=13')
return links


+ 31
- 127
Forums/AbyssForum/parser.py View File

@ -202,139 +202,43 @@ def cryptBB_description_parser(soup):
return row
# This is the method to parse the Listing Pages (one page with many posts)
def onniForums_listing_parser(soup):
board = "-1" # board name (the previous level of the topic in the Forum categorization tree.
# For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
nm = 0 # this variable should receive the number of topics
topic = [] # all topics
user = [] # all users of each topic
post = [] # number of posts of each topic
view = [] # number of views of each topic
addDate = [] # when the topic was created (difficult to find)
href = [] # this variable should receive all cleaned urls (we will use this to do the marge between
# Listing and Description pages)
# Finding the board (should be just one)
board = soup.find('span', {"class": "active"}).text
def AbyssForums_listing_parser(soup: BeautifulSoup):
board = "-1" # board name (the previous level of the topic in the Forum categorization tree.
# For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
nm = 0 # this variable should receive the number of topics
topic = [] # all topics
user = [] # all users of each topic
post = [] # number of posts of each topic
view = [] # number of views of each topic
addDate = [] # when the topic was created (difficult to find)
href = [] # this variable should receive all cleaned urls (we will use this to do the marge between
# Listing and Description pages)
#finding the board
board = soup.find("title").text
board = cleanString(board.strip())
# Finding the repeated tag that corresponds to the listing of topics
type_of_posts = soup.find_all("li", {"class": re.compile("row bg\d")} )
for literature in type_of_posts:
title_of_post = literature.find("a", {"class": "topictitle"}).text
topic.append(title_of_post)
author = literature.find("div", {"class": "topic-poster responsive-hide left-box"}).find("a", {"class": "username"}).text
user.append(author)
num_post = literature.find("dd", {"class": "posts"}).text[1:-3]
post.append(num_post)
num_view = literature.find("dd", {"class": "views"}).text[1:-3]
view.append(num_view)
if int(num_post) != 0:
reply = literature.find("dd", {"class": "lastpost"}).find("a", {"class": "username"}).text
user.append(reply)
date_added = literature.find("time").text
addDate.append(date_added)
nm = len(topic)
itopics = soup.find_all('tr',{"class": "inline_row"})
index = 0
for itopic in itopics:
# For each topic found, the structure to get the rest of the information can be of two types. Testing all of them
# to don't miss any topic
# Adding the topic to the topic list
try:
topics = itopic.find('span', {"class": "subject_old"}).find('a').text
except:
topics = itopic.find('span', {"class": "subject_new"}).find('a').text
topics = re.sub("\[\w*\]", '', topics)
topic.append(cleanString(topics))
# Counting how many topics we have found so far
nm = len(topic)
# Adding the url to the list of urls
try:
link = itopic.find('span', {"class": "subject_old"}).find('a').get('href')
except:
link = itopic.find('span',{"class": "subject_new"}).find('a').get('href')
link = cleanLink(link)
href.append(link)
# Finding the author of the topic
ps = itopic.find('div', {"class":"author smalltext"}).find('a').text
author = ps.strip()
user.append(cleanString(author))
# Finding the number of replies
columns = itopic.findChildren('td',recursive=False)
posts = columns[3].text
post.append(cleanString(posts))
# Finding the number of Views
tview = columns[4].text
view.append(cleanString(tview))
# If no information about when the topic was added, just assign "-1" to the variable
#dt = itopic.find('div', {"class": "responsive-hide"}).text.split('»')[1]
#dt = dt.strip()
#date_time_obj = datetime.strptime(dt,'%a %b %d, %Y %I:%M %p')
#addDate.append(date_time_obj)
addDate.append("-1")
index += 1
return organizeTopics("CryptBB", nm, topic, board, view, post, user, addDate, href)
# if len(tag) > 0:
#
# # Finding the topic
#
# tds = tds[0].find(tag[0])
# topics = tds.text
# topics = topics.replace(u"\xbb","")
# topics = topics.strip()
# topic.append(cleanString(topics))
#
# # Counting how many topics we have found so far
#
# nm = len(topic)
#
# # Adding the url to the list of urls
#
# link = tds.findAll('a', href=True)
# link = link[0].get('href')
# link = cleanLink(link)
# href.append(link)
#
# # Finding the author of the topic
#
# ps = itopic.find('td', {"class": tag[1]}).find('p').find('a')
# if ps == None:
# ps = itopic.find('td', {"class": tag[1]}).find('p')
# ps = ps.text.replace("Started by ","")
# else:
# ps = ps.text
# author = ps.strip()
# user.append(cleanString(author))
#
# # Finding the number of replies
#
# statistics = itopic.find('td', {"class": tag[2]})
# statistics = statistics.text
# statistics = statistics.split("Replies")
# posts = statistics[0].strip()
# post.append(cleanString(posts))
#
# # Finding the number of Views
#
# views = statistics[1]
# views = views.replace("Views","")
# views = views.strip()
# view.append(cleanString(views))
#
# # As no information about when the topic was added, just assign "-1" to the variable
#
# addDate.append("-1")
#return organizeTopics("TheMajesticGarden", nm, topic, board, view, post, user, addDate, href)
#need to change this method
def abyssForum_links_parser(soup):
# Returning all links that should be visited by the Crawler


+ 19
- 20
Forums/Altenens/crawler_selenium.py View File

@ -24,8 +24,6 @@ from Forums.Initialization.prepare_parser import new_parse
from Forums.Altenens.parser import altenens_links_parser
from Forums.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'https://altenens.is/'
@ -49,6 +47,8 @@ def startCrawling():
# Opens Tor Browser
def opentor():
from Forums.Initialization.forums_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -61,18 +61,16 @@ def opentor():
# Login using premade account credentials and do login captcha manually
def login(driver):
#click login button
login = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[1]/div/div/div/div[1]/a[1]').\
get_attribute('href')
driver.get(login)
# login.click()
# #entering username and password into input boxes
# usernameBox = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[4]/div/div/div[3]/div/div/div/form/div[1]/div/dl[1]/dd')
# #Username here
# usernameBox.send_keys('mylittlepony45')#sends string to the username box
# passwordBox = driver.find_element(by=By.XPATH, value='/html/body/div[1]/div[4]/div/div/div[3]/div/div/div/form/div[1]/div/dl[2]/dd/div/div')
# #Password here
# passwordBox.send_keys('johnnyTest@18')# sends string to passwordBox
login = driver.find_element(by=By.XPATH, value='//*[@id="top"]/div[1]/div/div/div/div[1]/a[1]')
login.click()
#entering username and password into input boxes
usernameBox = driver.find_element(by=By.XPATH, value='//*[@id="_xfUid-1-1688066635"]')
#Username here
usernameBox.send_keys('mylittlepony45')#sends string to the username box
passwordBox = driver.find_element(by=By.XPATH, value='//*[@id="_xfUid-2-1688066635"]')
#Password here
passwordBox.send_keys('johnnyTest@18')# sends string to passwordBox
input("Press ENTER when CAPTCHA is completed\n")
@ -108,6 +106,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -160,15 +160,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\\Altenens\\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\\Altenens\\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 28
- 0
Forums/CryptBB/HTML_Pages/06262023/Description/Read/showthreadphptid16584.html
File diff suppressed because it is too large
View File


+ 28
- 0
Forums/CryptBB/HTML_Pages/06262023/Description/Read/showthreadphptid2628.html
File diff suppressed because it is too large
View File


+ 8
- 0
Forums/CryptBB/HTML_Pages/06262023/Listing/httpcryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebidonionforumdisplayphpfid86.html
File diff suppressed because it is too large
View File


+ 8
- 0
Forums/CryptBB/HTML_Pages/06262023/Listing/httpcryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebidonionforumdisplayphpfid86page2.html
File diff suppressed because it is too large
View File


+ 22
- 19
Forums/CryptBB/crawler_selenium.py View File

@ -17,37 +17,36 @@ from PIL import Image
import urllib.parse as urlparse
import os, re, time
import subprocess
import configparser
from bs4 import BeautifulSoup
from Forums.Initialization.prepare_parser import new_parse
from Forums.CryptBB.parser import cryptBB_links_parser
from Forums.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/'
# Opens Tor Browser, crawls the website
def startCrawling():
opentor()
# opentor()
forumName = getForumName()
driver = getAccess()
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# driver = getAccess()
#
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
new_parse(forumName, baseURL, False)
# Opens Tor Browser
def opentor():
from Forums.Initialization.forums_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -133,6 +132,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -186,12 +187,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
from Forums.Initialization.forums_mining import CURRENT_DATE
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\\CryptBB\\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\\CryptBB\\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath
@ -264,7 +267,7 @@ def crawlForum(driver):
driver.refresh()
savePage(driver.page_source, item)
driver.back()
'''
#variable to check if there is a next page for the topic
has_next_topic_page = True
counter = 1
@ -287,16 +290,16 @@ def crawlForum(driver):
if item == "":
raise NoSuchElementException
has_next_topic_page = False
else:
counter += 1
except NoSuchElementException:
has_next_topic_page = False
# end of loop
for i in range(counter):
driver.back()
'''
# comment out
break


+ 2
- 2
Forums/DB_Connection/db_connection.py View File

@ -9,8 +9,8 @@ def connectDataBase():
try:
config = configparser.ConfigParser()
config.read('../../setup.ini')
from Forums.Initialization.forums_mining import config
ip = config.get('PostgreSQL', 'ip')
username = config.get('PostgreSQL', 'username')
password = config.get('PostgreSQL', 'password')


+ 9
- 9
Forums/HiddenAnswers/crawler_selenium.py View File

@ -24,8 +24,6 @@ from Forums.Initialization.prepare_parser import new_parse
from Forums.HiddenAnswers.parser import hiddenanswers_links_parser
from Forums.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://7eoz4h2nvw4zlr7gvlbutinqqpm546f5egswax54az6lt2u7e3t6d7yd.onion/'
@ -49,6 +47,8 @@ def startCrawling():
# Opens Tor Browser
def opentor():
from Forums.Initialization.forums_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -91,6 +91,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -143,16 +145,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
#..\CryptBB\HTML_Pages\\
fullPath = r'..\HiddenAnswers\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\HiddenAnswers\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 1
- 0
Forums/Initialization/forumsList.txt View File

@ -1 +1,2 @@
OnniForums
CryptBB

+ 8
- 1
Forums/Initialization/forums_mining.py View File

@ -13,12 +13,18 @@ from Forums.AbyssForum.crawler_selenium import crawler as crawlerAbyssForum
from Forums.Altenens.crawler_selenium import crawler as crawlerAltenensForum
from Forums.HiddenAnswers.crawler_selenium import crawler as crawlerHiddenAnswers
<<<<<<< HEAD
=======
import configparser
>>>>>>> 98de158ee6f14b35b2a5ac61427d7de36b4ee442
import time
config = configparser.ConfigParser()
config.read('../../setup.ini')
CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
@ -37,7 +43,8 @@ def createDirectory(forum):
if forum == 'Reddits':
pagesMainDir = '../' + forum
else:
pagesMainDir = '../' + forum + "/HTML_Pages"
# pagesMainDir = '../' + forum + "/HTML_Pages"
pagesMainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages")
if not os.path.isdir(pagesMainDir):
os.makedirs(pagesMainDir)


+ 53
- 3
Forums/Initialization/geckodriver.log View File

@ -6224,8 +6224,13 @@ unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
<<<<<<< HEAD
1687895546413 geckodriver INFO Listening on 127.0.0.1:52237
1687895550932 mozrunner::runner INFO Running command: "C:\\Users\\\\Helium\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" "52238" "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\Helium\\AppData\\Local\\Temp\\rust_mozprofileiOR21Q"
=======
1687896430885 geckodriver INFO Listening on 127.0.0.1:50135
1687896434527 mozrunner::runner INFO Running command: "C:\\Users\\calsyslab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" ... "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofilenQCzgp"
>>>>>>> 98de158ee6f14b35b2a5ac61427d7de36b4ee442
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
@ -6234,7 +6239,7 @@ console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1687895551675 Marionette INFO Marionette enabled
1687896435185 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
@ -6242,19 +6247,64 @@ JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't fin
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
<<<<<<< HEAD
DevTools listening on ws://localhost:52238/devtools/browser/ad1dc524-5cad-4983-9dd6-c7f6f3d5caee
1687895553974 Marionette INFO Listening on port 52243
1687895554561 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileiOR21Q\thumbnails) because it does not exist
1687895804567 Marionette INFO Stopped listening on port 52243
=======
DevTools listening on ws://localhost:50136/devtools/browser/773adaec-44e1-4b13-9fac-c38bfb170221
1687896436579 Marionette INFO Listening on port 50142
1687896436612 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\calsyslab\AppData\Local\Temp\rust_mozprofilenQCzgp\thumbnails) because it does not exist
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/, line 2: ReferenceError: $ is not defined
1687896481968 Marionette INFO Stopped listening on port 50142
>>>>>>> 98de158ee6f14b35b2a5ac61427d7de36b4ee442
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\Helium\AppData\Local\Temp\rust_mozprofileiOR21Q\thumbnails) because it does not exist
JavaScript error: resource://gre/modules/PromiseWorker.jsm, line 106: Error: Could not get children of file(C:\Users\calsyslab\AppData\Local\Temp\rust_mozprofilenQCzgp\thumbnails) because it does not exist
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
###!!! [Parent][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1687895804907 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
1687896482482 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109
stop()@CDP.jsm:104
close()@RemoteAgent.jsm:138
1687897304511 geckodriver INFO Listening on 127.0.0.1:50201
1687897308111 mozrunner::runner INFO Running command: "C:\\Users\\calsyslab\\Desktop\\Tor Browser\\Browser\\firefox.exe" "--marionette" "--remote-debugging-port" ... "--remote-allow-hosts" "localhost" "-no-remote" "-profile" "C:\\Users\\CALSYS~1\\AppData\\Local\\Temp\\rust_mozprofile2TNTj7"
console.log: "TorSettings: loadFromPrefs()"
console.log: "TorConnect: init()"
console.log: "TorConnect: Entering Initial state"
console.log: "TorConnect: Observed profile-after-change"
console.log: "TorConnect: Observing topic 'TorProcessExited'"
console.log: "TorConnect: Observing topic 'TorLogHasWarnOrErr'"
console.log: "TorConnect: Observing topic 'torsettings:ready'"
console.log: "TorSettings: Observed profile-after-change"
1687897308686 Marionette INFO Marionette enabled
console.log: "TorConnect: Will load after bootstrap => [about:blank]"
console.error: "Could not load engine [email protected]: Error: Extension is invalid"
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XULStore.jsm, line 66: Error: Can't find profile directory.
JavaScript error: resource://gre/modules/XPCOMUtils.jsm, line 161: TypeError: Cc[aContract] is undefined
DevTools listening on ws://localhost:50202/devtools/browser/c30256b0-c71f-40da-a95f-bb1313b3e35e
1687897310328 Marionette INFO Listening on port 50208
1687897310788 RemoteAgent WARN TLS certificate errors will be ignored for this session
JavaScript error: http://cryptbbtg65gibadeeo2awe3j7s6evg7eklserehqr4w4e2bis5tebid.onion/, line 2: ReferenceError: $ is not defined
1687897315273 Marionette INFO Stopped listening on port 50208
JavaScript error: resource:///modules/Interactions.jsm, line 209: NS_ERROR_FAILURE: Component returned failure code: 0x80004005 (NS_ERROR_FAILURE) [nsIUserIdleService.removeIdleObserver]
!!! error running onStopped callback: TypeError: callback is not a function
JavaScript error: resource:///modules/sessionstore/SessionFile.jsm, line 375: Error: _initWorker called too early! Please read the session file from disk first.
JavaScript error: resource://gre/modules/PageThumbs.jsm, line 709: AbortError: IOUtils.profileBeforeChange getter: IOUtils: profileBeforeChange phase has already finished
1687897315776 RemoteAgent ERROR unable to stop listener: [Exception... "Component returned failure code: 0x8000ffff (NS_ERROR_UNEXPECTED) [nsIWindowMediator.getEnumerator]" nsresult: "0x8000ffff (NS_ERROR_UNEXPECTED)" location: "JS frame :: chrome://remote/content/cdp/observers/TargetObserver.jsm :: stop :: line 64" data: no] Stack trace: stop()@TargetObserver.jsm:64
unwatchForTabs()@TargetList.jsm:70
unwatchForTargets()@TargetList.jsm:37
destructor()@TargetList.jsm:109


+ 55
- 21
Forums/Initialization/prepare_parser.py View File

@ -2,7 +2,7 @@ __author__ = 'DarkWeb'
import codecs
import glob
import os
import os, re
import shutil
from Forums.DB_Connection.db_connection import *
from Forums.BestCardingWorld.parser import *
@ -80,7 +80,7 @@ def persist_data(url, row, cur):
#calls the different parser methods here depending on the type of html page
def new_parse(forum, url, createLog):
from Forums.Initialization.forums_mining import CURRENT_DATE
from Forums.Initialization.forums_mining import config, CURRENT_DATE
print("Parsing The " + forum + " Forum and conduct data classification to store the information in the database.")
@ -97,7 +97,8 @@ def new_parse(forum, url, createLog):
lines = [] # listing pages
lns = [] # description pages
detPage = {}
detPage = {} # first pages
other = {} # other pages
# Creating the log file for each Forum
if createLog:
@ -109,12 +110,14 @@ def new_parse(forum, url, createLog):
" in the _Logs folder to read files from this Forum of this date again.")
raise SystemExit
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + forum + "/HTML_Pages")
# Reading the Listing Html Pages
for fileListing in glob.glob(os.path.join("..\\" + forum + "\\HTML_Pages\\" + CURRENT_DATE + "\\Listing", '*.html')):
for fileListing in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing", '*.html')):
lines.append(fileListing)
# Reading the Description Html Pages
for fileDescription in glob.glob(os.path.join("..\\" + forum + "\\HTML_Pages\\" + CURRENT_DATE + "\\Description" ,'*.html')):
for fileDescription in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", '*.html')):
lns.append(fileDescription)
# Parsing the Description Pages and put the tag's content into a dictionary (Hash table)
@ -153,8 +156,17 @@ def new_parse(forum, url, createLog):
# key = u"Top:" + rmm[0].upper().strip() + u" User:" + rmm[2][0].upper().strip()
key = u"Url:" + os.path.basename(line2).replace(".html", "")
# save file address with description record in memory
detPage[key] = {'rmm': rmm, 'filename': os.path.basename(line2)}
# check if page or page exists at the end of a string followed by a series of numbers
#if yes add to other if no add to first page dictionary
# save descritions into record in memory
check = re.compile(r'(?<=Page|page)[0-9]*')
if check.search(key):
# print(key, 'is an other page\n')
other[key] = {'rmm': rmm, 'filename': os.path.basename(line2)}
else:
# print(key, 'is a first page\n')
detPage[key] = {'rmm': rmm, 'files': [os.path.basename(line2)]}
except:
@ -163,6 +175,27 @@ def new_parse(forum, url, createLog):
if createLog:
logFile.write(str(nError) + ". There was a problem to parse the file " + line2 + " in the Description section.\n")
# goes through keys from detPage and other, checks if the keys match.
# if yes adds other[key] values to detPage w/o overwritting
for key in detPage.keys():
for k in list(other.keys()):
checkkey = str(key[4:])
checkk = str(k[4:])
if checkkey in checkk:
detPage[key]['rmm'][1].extend(other[k]['rmm'][1])
detPage[key]['rmm'][2].extend(other[k]['rmm'][2])
detPage[key]['rmm'][3].extend(other[k]['rmm'][3])
detPage[key]['rmm'][4].extend(other[k]['rmm'][4])
detPage[key]['rmm'][5].extend(other[k]['rmm'][5])
detPage[key]['rmm'][6].extend(other[k]['rmm'][6])
detPage[key]['rmm'][7].extend(other[k]['rmm'][7])
detPage[key]['rmm'][8].extend(other[k]['rmm'][8])
detPage[key]['files'].append(other[k]['filename'])
other.pop(k)
# Parsing the Listing Pages and put the tag's content into a list
for index, line1 in enumerate(lines):
@ -259,20 +292,21 @@ def new_parse(forum, url, createLog):
if not persistError:
# move description files of completed folder
source = line2.replace(os.path.basename(line2), "") + detPage[key]['filename']
destination = line2.replace(os.path.basename(line2), "") + r'Read/'
try:
shutil.move(source, destination)
num_persisted_moved += 1
except:
print("There was a problem to move the file " + detPage[key]['filename'] + " in the Description section!")
nError += 1
if createLog:
logFile.write(
str(nError) + ". There was a problem to move the file " + detPage[key]['filename'] + " in the Description section!.\n")
moveError = True
for filename in detPage[key]['files']:
source = line2.replace(os.path.basename(line2), "") + filename
destination = line2.replace(os.path.basename(line2), "") + r'Read/'
try:
shutil.move(source, destination)
num_persisted_moved += 1
except:
print("There was a problem to move the file " + filename + " in the Description section!")
nError += 1
if createLog:
logFile.write(
str(nError) + ". There was a problem to move the file " + filename + " in the Description section!.\n")
moveError = True
# if the associated description page is not read or not parsed
else:


+ 15
- 1
Forums/OnniForums/HTML_Pages/06272023/Listing/httponnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qdonionForumHackingCrackingtutorials.html
File diff suppressed because it is too large
View File


+ 14
- 16
Forums/OnniForums/crawler_selenium.py View File

@ -25,8 +25,6 @@ from Forums.Initialization.prepare_parser import new_parse
from Forums.OnniForums.parser import onniForums_links_parser
from Forums.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/'
@ -50,6 +48,8 @@ def startCrawling():
# Opens Tor Browser
def opentor():
from Forums.Initialization.forums_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -109,6 +109,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from Forums.Initialization.forums_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -163,16 +165,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
from Forums.Initialization.forums_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "Forums/" + getForumName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
#..\CryptBB\HTML_Pages\\
fullPath = r'..\OnniForums\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\OnniForums\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath
@ -189,10 +189,10 @@ def getNameFromURL(url):
def getInterestedLinks():
links = []
# Hacking & Cracking tutorials
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-tutorials')
# # Hacking & Cracking tutorials
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-tutorials')
# Hacking & Cracking questions
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-questions')
links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Hacking-Cracking-questions')
# # Exploit PoCs
# links.append('http://onnii6niq53gv3rvjpi7z5axkasurk2x5w5lwliep4qyeb2azagxn4qd.onion/Forum-Exploit-PoCs')
# # Cracked software
@ -279,15 +279,13 @@ def crawlForum(driver):
for i in range(counter):
driver.back()
# comment out, one topic per page
break
# comment out, go through all pages
if count == 1:
count = 0
break
count = 0
break
try:
temp = driver.find_element(by=By.XPATH, value=


+ 9
- 5
MarketPlaces/AnonymousMarketplace/crawler_selenium.py View File

@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.AnonymousMarketplace.parser import anonymous_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://3fqr7fgjaslhgmeiin5e2ky6ra5xkiafyzg7i36sfcehv3jvpgydteqd.onion/'
@ -52,6 +50,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,6 +90,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -154,12 +156,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\AnonymousMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\AnonymousMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 9
- 5
MarketPlaces/Apocalypse/crawler_selenium.py View File

@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.Apocalypse.parser import apocalypse_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://apocam5hnoqskkmhr325nivjuh5phbmmggadxgcjabzzirap5iklkxad.onion/'
@ -52,6 +50,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,6 +90,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -170,12 +172,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\Apocalypse\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\Apocalypse\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 9
- 5
MarketPlaces/CityMarket/crawler_selenium.py View File

@ -26,8 +26,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.CityMarket.parser import city_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://wsptlnuoo3johqzcdlwuj5zcwfh2dwmswz6hahqctuxttvxpanypmwad.onion/'
@ -53,6 +51,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -91,6 +91,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -167,12 +169,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\CityMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\CityMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 10
- 5
MarketPlaces/CypherMarketplace/crawler_selenium.py View File

@ -2,6 +2,7 @@ __author__ = 'Helium'
'''
CypherMarketplace Forum Crawler (Selenium)
crawler done
'''
from selenium import webdriver
@ -24,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.CypherMarketplace.parser import cyphermarketplace_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://6c5qa2ke2esh6ake6u6yoxjungz2czbbl7hqxl75v5k37frtzhxuk7ad.onion/'
@ -51,6 +50,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +90,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -167,12 +170,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\CypherMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\CypherMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 2
- 2
MarketPlaces/DB_Connection/db_connection.py View File

@ -9,8 +9,8 @@ def connectDataBase():
try:
config = configparser.ConfigParser()
config.read('../../setup.ini')
from MarketPlaces.Initialization.markets_mining import config
ip = config.get('PostgreSQL', 'ip')
username = config.get('PostgreSQL', 'username')
password = config.get('PostgreSQL', 'password')


+ 14
- 14
MarketPlaces/DarkFox/crawler_selenium.py View File

@ -42,16 +42,17 @@ def startCrawling():
print(driver.current_url, e)
closetor(driver)
new_parse(mktName, False)
new_parse(mktName, baseURL, False)
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
path = open('../../path.txt').readline().strip()
pro = subprocess.Popen(path)
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
pid = pro.pid
time.sleep(7.5)
input('Tor Connected. Press ENTER to continue\n')
@ -93,12 +94,11 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
file = open('../../path.txt', 'r')
lines = file.readlines()
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(lines[0].strip())
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(lines[1].strip())
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
# ff_prof.set_preference("places.history.enabled", False)
# ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
# ff_prof.set_preference("privacy.clearOnShutdown.passwords", True)
@ -120,10 +120,11 @@ def createFFDriver():
ff_prof.set_preference("javascript.enabled", False)
ff_prof.update_preferences()
service = Service(lines[2].strip())
service = Service(config.get('TOR', 'geckodriver_path'))
driver = webdriver.Firefox(firefox_binary=ff_binary, firefox_profile=ff_prof, service=service)
return driver
@ -185,15 +186,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\DarkFox\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\DarkFox\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 22
- 17
MarketPlaces/DarkMatter/crawler_selenium.py View File

@ -2,7 +2,8 @@ __author__ = 'Helium'
'''
DarkMatter Marketplace Crawler (Selenium)
this is a small marketplace so next page links are not coded in
website has connection issues
not working still trying to debug
'''
from selenium import webdriver
@ -25,8 +26,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.DarkMatter.parser import darkmatter_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/'
@ -52,6 +51,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,8 +91,11 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
ff_prof.set_preference("places.history.enabled", False)
ff_prof.set_preference("privacy.clearOnShutdown.offlineApps", True)
@ -153,12 +157,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\DarkMatter\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\DarkMatter\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath
@ -180,14 +186,12 @@ def getNameFromURL(url):
def getInterestedLinks():
links = []
# fraud software
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=76')
# other
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=133')
# # hacking
# digital
links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=73')
# # hack guides
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=94')
# # carding
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=118')
# # services
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=117')
# # software/malware
# links.append('http://darkmat3kdxestusl437urshpsravq7oqb7t3m36u2l62vnmmldzdmid.onion/market/products/?category=121')
@ -220,6 +224,7 @@ def crawlForum(driver):
has_next_page = True
while has_next_page:
list = productPages(html)
for item in list:
itemURL = urlparse.urljoin(baseURL, str(item))
try:
@ -238,9 +243,9 @@ def crawlForum(driver):
break
try:
temp = driver.find_element(by=By.XPATH, value=
'/html/body/table[1]/tbody/tr/td/form/div/div[2]/table[2]/tbody/tr')
link = temp.find_element(by=By.CLASS_NAME, value='button page-num').get_attribute('href')
nav = driver.find_element(by=By.XPATH, value='/html/body/table[1]/tbody/tr/td/form/div/div[2]/table[2]')
a = nav.find_element(by=By.LINK_TEXT, value=">")
link = a.get_attribute('href')
if link == "":
raise NoSuchElementException
try:
@ -268,7 +273,7 @@ def crawlForum(driver):
#@param: url of any url crawled
#return: true if is a description page, false if not
def isDescriptionLink(url):
if 'product/' in url and '/products/?category' not in url:
if 'products/' in url and '/products/?category' not in url:
return True
return False


+ 2
- 2
MarketPlaces/DarkMatter/parser.py View File

@ -281,10 +281,10 @@ def darkmatter_links_parser(soup):
# Returning all links that should be visited by the Crawler
href = []
listing = soup.findAll('td', {"class": "lefted"})
listing = soup.findAll('td', {"class": "lefted", 'colspan': '2'})
for a in listing:
bae = a.find('a', {"class": "lg bold"}, href=True)
bae = a.find('a', href=True)
link = bae['href']
href.append(link)

+ 9
- 5
MarketPlaces/DarkTor/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.DarkTor.parser import darktor_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://zuauw53dukqdmll5p3fld26ns2gepcyfmbofobjczdni6ecmkoitnfid.onion/'
@ -51,6 +49,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +89,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -153,12 +155,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\DarkTor\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\DarkTor\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 8
- 5
MarketPlaces/DigitalThriftShop/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.DigitalThriftShop.parser import digitalthriftshop_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://kw4zlnfhxje7top26u57iosg55i7dzuljjcyswo2clgc3mdliviswwyd.onion/'
@ -89,6 +87,9 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -153,12 +154,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\DigitalThriftShop\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\DigitalThriftShop\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 1066
- 17
MarketPlaces/Initialization/geckodriver.log
File diff suppressed because it is too large
View File


+ 5
- 3
MarketPlaces/Initialization/markets_mining.py View File

@ -21,10 +21,11 @@ from MarketPlaces.CityMarket.crawler_selenium import crawler as crawlerCityMarke
from MarketPlaces.DarkMatter.crawler_selenium import crawler as crawlerDarkMatter
from MarketPlaces.M00nkeyMarket.crawler_selenium import crawler as crawlerM00nkeyMarket
import configparser
import time
config = configparser.ConfigParser()
config.read('../../setup.ini')
CURRENT_DATE = str("%02d" % date.today().month) + str("%02d" % date.today().day) + str("%04d" % date.today().year)
@ -40,7 +41,8 @@ def getMarkets():
def createDirectory(mkt):
# Package should already be there, holding crawler and parser
pagesDir = '../' + mkt + '/HTML_Pages'
# pagesDir = '../' + mkt + '/HTML_Pages'
pagesDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + mkt + "/HTML_Pages")
if not os.path.isdir(pagesDir):
os.makedirs(pagesDir)


+ 5
- 3
MarketPlaces/Initialization/prepare_parser.py View File

@ -71,7 +71,7 @@ def persist_data(url, row, cur):
def new_parse(marketPlace, url, createLog):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
print("Parsing the " + marketPlace + " marketplace and conduct data classification to store the information in the database.")
@ -100,12 +100,14 @@ def new_parse(marketPlace, url, createLog):
" in the _Logs folder to read files from this Market Place of this date again.")
raise SystemExit
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + marketPlace + "/HTML_Pages")
# Reading the Listing Html Pages
for fileListing in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\" + CURRENT_DATE + "\\Listing", '*.html')):
for fileListing in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Listing", '*.html')):
lines.append(fileListing)
# Reading the Description Html Pages
for fileDescription in glob.glob(os.path.join("..\\" + marketPlace + "\\HTML_Pages\\" + CURRENT_DATE + "\\Description", '*.html')):
for fileDescription in glob.glob(os.path.join(mainDir, CURRENT_DATE + "\\Description", '*.html')):
lns.append(fileDescription)
# Parsing the Description Pages and put the tag's content into a dictionary (Hash table)


+ 9
- 5
MarketPlaces/LionMarketplace/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.LionMarketplace.parser import lionmarketplace_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://lionznqc2hg2wsp5vgruqait4cpknihwlje6hkjyi52lcl5ivyf7bcad.onion/'
@ -51,6 +49,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +89,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -154,12 +156,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\LionMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\LionMarketplace\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 9
- 5
MarketPlaces/M00nkeyMarket/crawler_selenium.py View File

@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.M00nkeyMarket.parser import m00nkey_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://moonkey4f2mkcp6hpackeea356puiry27h3dz3hzbt3adbmsk4gs7wyd.onion/'
@ -52,6 +50,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,6 +90,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -170,12 +172,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\M00nkeyMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\M00nkeyMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 9
- 5
MarketPlaces/MikesGrandStore/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.MikesGrandStore.parser import mikesgrandstore_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://4yx2akutmkhwfgzlpdxiah7cknurw6vlddlq24fxa3r3ebophwgpvhyd.onion/'
@ -51,6 +49,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +89,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -153,12 +155,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\MikesGrandStore\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\MikesGrandStore\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 21
- 17
MarketPlaces/ThiefWorld/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.ThiefWorld.parser import thiefworld_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion/'
@ -33,24 +31,26 @@ baseURL = 'http://qsw7iurcrdwyml5kg4oxbmtqrcnpxiag3iumdarefzeunnyc2dnyljad.onion
# Opens Tor Browser, crawls the website, then parses, then closes tor
#acts like the main method for the crawler, another function at the end of this code calls this function later
def startCrawling():
opentor()
# mktName = getMKTName()
driver = getAccess()
# opentor()
mktName = getMKTName()
# driver = getAccess()
#
# if driver != 'down':
# try:
# login(driver)
# crawlForum(driver)
# except Exception as e:
# print(driver.current_url, e)
# closetor(driver)
if driver != 'down':
try:
login(driver)
crawlForum(driver)
except Exception as e:
print(driver.current_url, e)
closetor(driver)
# new_parse(forumName, baseURL, False)
new_parse(mktName, baseURL, False)
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +89,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -161,12 +163,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\ThiefWorld\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\ThiefWorld\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 9
- 5
MarketPlaces/Tor2door/crawler_selenium.py View File

@ -23,8 +23,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.Tor2door.parser import tor2door_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://yzrrne3pveltulbavydr2kiashvlnysdwclwmklo6cyjuqpxi7ku4xqd.onion'
@ -48,6 +46,8 @@ def startCrawling():
# Opens Tor Browser
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -131,6 +131,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -184,12 +186,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\Tor2door\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\Tor2door\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 9
- 8
MarketPlaces/TorBay/crawler_selenium.py View File

@ -25,8 +25,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.TorBay.parser import torbay_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://torbay3253zck4ym5cbowwvrbfjjzruzthrx3np5y6owvifrnhy5ybid.onion/'
@ -52,6 +50,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -90,6 +90,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -154,15 +156,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\TorBay\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\TorBay\HTML_Pages\\' + str(
"%02d" % date.today().month) + str("%02d" % date.today().day) + str(
"%04d" % date.today().year) + r'\\' + r'Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 9
- 5
MarketPlaces/TorMarket/crawler_selenium.py View File

@ -24,8 +24,6 @@ from MarketPlaces.Initialization.prepare_parser import new_parse
from MarketPlaces.TorMarket.parser import tormarket_links_parser
from MarketPlaces.Utilities.utilities import cleanHTML
config = configparser.ConfigParser()
config.read('../../setup.ini')
counter = 1
baseURL = 'http://22222253ebafysmwyrl4uxfcs2xm6k7zb4xyse2csne73atvxu53gfad.onion/'
@ -51,6 +49,8 @@ def startCrawling():
# Opens Tor Browser
#prompts for ENTER input to continue
def opentor():
from MarketPlaces.Initialization.markets_mining import config
global pid
print("Connecting Tor...")
pro = subprocess.Popen(config.get('TOR', 'firefox_binary_path'))
@ -89,6 +89,8 @@ def closetor(driver):
# Creates FireFox 'driver' and configure its 'Profile'
# to use Tor proxy and socket
def createFFDriver():
from MarketPlaces.Initialization.markets_mining import config
ff_binary = FirefoxBinary(config.get('TOR', 'firefox_binary_path'))
ff_prof = FirefoxProfile(config.get('TOR', 'firefox_profile_path'))
@ -153,12 +155,14 @@ def savePage(page, url):
# Gets the full path of the page to be saved along with its appropriate file name
#@param: raw url as crawler crawls through every site
def getFullPathName(url):
from MarketPlaces.Initialization.markets_mining import CURRENT_DATE
from MarketPlaces.Initialization.markets_mining import config, CURRENT_DATE
mainDir = os.path.join(config.get('Project', 'shared_folder'), "MarketPlaces/" + getMKTName() + "/HTML_Pages")
fileName = getNameFromURL(url)
if isDescriptionLink(url):
fullPath = r'..\TorMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Description\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Description\\' + fileName + '.html')
else:
fullPath = r'..\TorMarket\HTML_Pages\\' + CURRENT_DATE + r'\\Listing\\' + fileName + '.html'
fullPath = os.path.join(mainDir, CURRENT_DATE + r'\\Listing\\' + fileName + '.html')
return fullPath


+ 3
- 2
setup.ini View File

@ -1,11 +1,12 @@
[TOR]
firefox_binary_path = C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\firefox.exe
firefox_profile_path = C:\\Users\\minhkhoitran\\Desktop\\Tor Browser\\Browser\\TorBrowser\\Data\\Browser\\profile.default
geckodriver_path = C:\\NSF-REU\\dw_pipeline_test\\selenium\\geckodriver.exe
[Project]
project_directory = C:\\NSF-REU\\dw_pipeline_test
shared_folder = \\VBoxSvr\\VM_Files_(shared)
project_directory = C:\Users\Helium\\PycharmProjects\dw_pipeline_test
shared_folder = \\VBoxSvr\Shared
[PostgreSQL]
ip = localhost


Loading…
Cancel
Save