moved Vortex back to test

1 year ago · 07e71ec2ea
--- a/MarketPlaces/Vortex/crawler_selenium.py
+++ b/MarketPlaces/Vortex/crawler_selenium.py
@ -26,19 +26,19 @@ from MarketPlaces.Vortex.parser import vortex_links_parser
 from MarketPlaces.Utilities.utilities import cleanHTML
 counter = 1
-baseURL = 'http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/login'
+baseURL = 'http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/'
 def startCrawling():
    mktName = getMKTName()
-    # driver = getAccess()
+    driver = getAccess()
-    #
+    if driver != 'down':
-    # if driver != 'down':
+        try:
-    #     try:
+            login(driver)
-    #         login(driver)
+            crawlForum(driver)
-    #         crawlForum(driver)
+        except Exception as e:
-    #     except Exception as e:
+            print(driver.current_url, e)
-    #         print(driver.current_url, e)
+        closeDriver(driver)
    #     closeDriver(driver)
    new_parse(mktName, baseURL, True)
@ -137,15 +137,14 @@ def login(driver):
    input("Press ENTER when captcha is solved")
-    try:
+    # try:
-        agree_button = driver.find_element(by=By.NAME, value='login')
+    #     agree_button = driver.find_element(by=By.NAME, value='login')
-        agree_button.click()
+    #     agree_button.click()
-    except Exception as e:
+    # except Exception as e:
-        print('Problem with clicking login button', e)
+    #     print('Problem with clicking login button', e)
-
+    #
-    WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
+    # WebDriverWait(driver, 100).until(EC.visibility_of_element_located(
-        (By.XPATH, '//*[@id="main"]')))
+    #     (By.XPATH, '//*[@id="main"]')))
 def savePage(driver, page, url):
@ -186,11 +185,11 @@ def getInterestedLinks():
    links = []
    # security and hacking
-    # links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Security+%26+Hacking')
+    links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Security+%26+Hacking')
    # fraud
    links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Fraud')
    # malware, nothing here for now
-    # links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Cracked+softwares%26comma%3B+Botnets+%26+Malware')
+    links.append('http://mq7ozbnrqdjc6cof3yakegs44kmo6vl3ajcyzdeya3zjtmi65jtmwqid.onion/home?cat=Cracked+softwares%26comma%3B+Botnets+%26+Malware')
    return links
@ -228,12 +227,12 @@ def crawlForum(driver):
                    savePage(driver, driver.page_source, item)
                    driver.back()
-                    # comment out
+                #     # comment out
-                    break
+                #     break
                #
-                # comment out
+                # # comment out
-                if count == 1:
+                # if count == 1:
-                    break
+                #     break
                try:
                    temp = driver.find_element(by=By.XPATH, value = '//*[@id="main"]')
--- a/MarketPlaces/Vortex/parser.py
+++ b/MarketPlaces/Vortex/parser.py
@ -142,10 +142,10 @@ def vortex_listing_parser(soup):
    href = []  # 22 Product_Links y
    temp = soup.find('main', {'id': 'main'}).find('section', {'id':'page_container'})
-    listings = temp.findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-5"})
+    listings = temp.findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-0"})
-    cat = soup.find('section', {'class': 'row px-md-4 mx-0 mb-3'}).find('ol').find_all('li')
+    # cat = soup.find('section', {'class': 'row px-md-4 mx-0 my-3'}).find('ol').find_all('li')
-    cat = cat[1].find('a').text
+    # cat = cat[1].find('a').text
    # Populating the Number of Products
    nm = len(listings)
@ -203,9 +203,9 @@ def vortex_listing_parser(soup):
            MSValue = me
        MS.append(MSValue)
-        # Finding the category - check
+        # # Finding the category - check
-        category_text = cleanString(cat).strip()
+        # category_text = cleanString(cat).strip()
-        category.append(category_text)
+        # category.append(category_text)
        # Finding the hrefs - check
        description_link = listing.find('h4').find('a')['href']
@ -278,7 +278,7 @@ def vortex_links_parser(soup):
    # Returning all links that should be visited by the Crawler
    href = []
-    listings = soup.find('main').findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-5"})
+    listings = soup.find('main').findAll('div', {"class": "product-card col-sm-6 col-md-3 col-xl-4 mb-0"})
    for listing in listings:
        # Adding the url to the list of urls