__author__ = 'DarkWeb'

import re

# Here, we are importing the auxiliary functions to clean or convert data
from Forums.Utilities.utilities import *

# Here, we are importing BeautifulSoup to search through the HTML tree
from bs4 import BeautifulSoup


# This is the method to parse the Description Pages (one page to each topic in the Listing Pages)
#parses description pages, so takes html pages of description pages using soup object, and parses it for info it needs
#stores info it needs in different lists, these lists are returned after being organized
#@param: soup object looking at html page of description page
#return: 'row' that contains a variety of lists that each hold info on the description page
def darknetarmy_description_parser(soup):
    # Fields to be parsed

    topic = "-1"  # 0 topic name
    user = []  # 1 all users of each post
    status = []  # 2 all user's authority in each post such as (adm, member, dangerous)
    reputation = []  # 3 all users's karma in each post (usually found as a number)
    interest = []  # 4 all user's interest in each post
    sign = []  # 5 all user's signature in each post (usually a standard message after the content of the post)
    post = []  # 6 all messages of each post
    feedback = []  # 7 all feedbacks of each user (this was found in just one Forum and with a number format)
    addDate = []  # 8 all dated of each post
    image_user = []  # 9 all user avatars of each post
    image_post = []  # 10 all first images of each post

    # Finding the topic (should be just one coming from the Listing Page)

    topic = soup.find("h1", {"class": "p-title-value"})
    topic = topic.text
    topic = topic.replace(",", "")
    topic = topic.replace("\n", "")
    topic = cleanString(topic.strip())

    # Finding the repeated tag that corresponds to the listing of posts

    # posts = soup.find("form", {"name": "quickModForm"}).findAll('div', {"class": "windowbg"}) + \
    #         soup.find("form", {"name": "quickModForm"}).findAll('div', {"class": "windowbg2"})

    posts = soup.findAll('article', class_=re.compile("message message--post js-post js-inlineModContainer.*"))

    # For each message (post), get all the fields we are interested to:
    for ipost in posts:

        # Finding a first level of the HTML page

        #post_wrapper = ipost.find('div', {"class": "post_wrapper"}).find('div', {"class": "poster"})
        post_wrapper = ipost.find('div', {"class": "message-inner"})
        # Finding the author (user) of the post

        # users
        author = post_wrapper.find('div', {'class': 'message-userName'}).find('h4').text
        user.append(cleanString(author))  # Remember to clean the problematic characters

        # Finding the status of the author
        try:
            membergroup = post_wrapper.find('h5', {'class': 'userTitle message-userTitle'}).text
        except:
            membergroup = '-1'

        status.append(cleanString(membergroup))

        # reputation
        temp = post_wrapper.find('div', {'class': 'message-userExtras'}).find_all('dl')
        rep = temp[2].find('dd').text
        if 'K' or 'k' in rep:
            rep = rep.replace('K', '000').replace('k', '000')
        reputation.append(rep)

        # na
        interest.append('-1')
        sign.append('-1')
        feedback.append('-1')
        image_post.append('-1')

        try:
            message = post_wrapper.find('article', {'class': 'message-body js-selectToQuote'}).text
            message = cleanString(message.strip())
        except:
            message = post_wrapper.find('div', {'content': 'message-content js-messageContent'}).text

            message = cleanString(message.strip())

        post.append(message)


        time = post_wrapper.find('ul', class_ = re.compile(r'message-attribution-main listInline.*')).find('time').text
        if ',' in time:
            time = time.replace(',', '')
        if 'today' in time:
            today = datetime.today()
            time = today
        elif 'at' or 'AM' or 'PM' in time:
            today = datetime.today()
            start_of_week = today - timedelta(days=(today.weekday() + 1) % 7)
            days_mapping = {
                'Sunday': start_of_week,
                'Monday': start_of_week + timedelta(days=1),
                'Tuesday': start_of_week + timedelta(days=2),
                'Wednesday': start_of_week + timedelta(days=3),
                'Thursday': start_of_week + timedelta(days=4),
                'Friday': start_of_week + timedelta(days=5),
                'Saturday': start_of_week + timedelta(days=6),
            }
            for day, date in days_mapping.items():
                if day in time:
                    time = date.strftime('%Y-%m-%d')
                    break
        addDate.append(time)

        try:
            image = post_wrapper.find('div', {'class': 'message-avatar '}).find('img').get('src').split('base64,')[-1]
        except:
            image = '-1'
        image_user.append(image)

    # Populate the final variable (this should be a list with all fields scraped)

    row = (topic, user, status, reputation, interest, sign, post, feedback, addDate, image_user, image_post)

    # Sending the results

    return row


# This is the method to parse the Listing Pages (one page with many posts)
#parses listing pages, so takes html pages of listing pages using soup object, and parses it for info it needs
#stores info it needs in different lists, these lists are returned after being organized
#@param: soup object looking at html page of listing page
#return: 'row' that contains a variety of lists that each hold info on the listing page
def darknetarmy_listing_parser(soup):
    nm = 0  # *this variable should receive the number of topics
    forum = "DarkNetArmy"  # 0 *forum name
    board = "-1"  # 1 *board name (the previous level of the topic in the Forum categorization tree.
    # For instance: Security/Malware/Tools to hack Facebook. The board here should be Malware)
    author = []  # 2 *all authors of each topic
    topic = []  # 3 *all topics
    views = []  # 4 number of views of each topic
    posts = []  # 5 number of posts of each topic
    href = []  # 6 this variable should receive all cleaned urls (we will use this to do the marge between
    # Listing and Description pages)
    addDate = []  # 7 when the topic was created (difficult to find)
    image_author = []  # 8 all author avatars used in each topic

    # Finding the board (should be just one)

    board = soup.find('h1', {"class": "p-title-value"}).text
    board = board.replace(u"\xbb", "")
    board = cleanString(board.strip())

    # Finding the repeated tag that corresponds to the listing of topics

    itopics = soup.find('div', {"class": "structItemContainer-group js-threadList"}).find_all('div', class_=re.compile(
        r'^structItem structItem--thread js-inlineModContainer js-threadListItem.*'))

    nm = len(itopics)

    index = 0
    for itopic in itopics:
        # authors
        a = itopic.find('ul', {"class": "structItem-parts"}).find('li').text
        a = cleanString(a.strip())
        author.append(a)

        # topic
        top = itopic.find('div', {"class": 'structItem-title'}).text
        top = cleanString(top.strip())
        topic.append(top)

        # href
        ref = itopic.find('div', {"class": 'structItem-title'}).find('a').get('href')
        href.append(ref)

        # image
        try:
            image = itopic.find('div', {"class": 'structItem-iconContainer'}).find('img').get('src').split('base64,')[
                -1]
        except:
            image = '-1'

        image_author.append(image)

        # add date
        try:
            time = soup.find('li', {"class": 'structItem-startDate'}).find('time').text
            if ',' in time:
                time = time.replace(',', '')
            time = time.strip()
            if 'today' in time:
                today = datetime.today()
                time = today
            elif 'at' or 'AM' or 'PM' in time:
                today = datetime.today()
                start_of_week = today - timedelta(days=(today.weekday() + 1) % 7)
                days_mapping = {
                    'Sunday': start_of_week,
                    'Monday': start_of_week + timedelta(days=1),
                    'Tuesday': start_of_week + timedelta(days=2),
                    'Wednesday': start_of_week + timedelta(days=3),
                    'Thursday': start_of_week + timedelta(days=4),
                    'Friday': start_of_week + timedelta(days=5),
                    'Saturday': start_of_week + timedelta(days=6),
                }
                for day, date in days_mapping.items():
                    if day in time:
                        time = date.strftime('%Y-%m-%d')
                        break
            addDate.append(time)
        except:
            addDate.append('-1')

        try:
            temp = itopic.find('div', class_=re.compile(r'^structItem-cell structItem-cell--meta.*')).find_all('dl')
            try:
                reply = temp[0].find('dd').text
                reply = cleanString(reply.strip())
                if 'K' or 'k' in reply:
                    reply = reply.replace('K', '000').replace('k', '000')
            except:
                reply = '-1'
            posts.append(reply)

            # views
            try:
                view = temp[1].find('dd').text
                view = cleanString(view.strip())
                if 'K' or 'k' in view:
                    view = view.replace('K', '000').replace('k', '000')
            except:
                view = '-1'
            views.append(view)
        except:
            reply = '-1'
            posts.append(reply)
            view = '-1'
            views.append(view)

    return organizeTopics(forum, nm, board, author, topic, views, posts, href, addDate, image_author)


#called by the crawler to get description links on a listing page
#@param: beautifulsoup object that is using the correct html page (listing page)
#return: list of description links from a listing page
def darknetarmy_links_parser(soup):
    # Returning all links that should be visited by the Crawler

    href = []

    listing = soup.find('div', {"class": "block-container block-container--nodes"}).findAll('div', {
        "class": "structItem-title"})

    for a in listing:
        bae = a.find('a', href=True)
        link = bae['href']
        href.append(link)

    return href