import re
import string
import iso8601
from prettytoml import tokens
from prettytoml.tokens import TYPE_BOOLEAN, TYPE_INTEGER, TYPE_FLOAT, TYPE_DATE, \
    TYPE_MULTILINE_STRING, TYPE_BARE_STRING, TYPE_MULTILINE_LITERAL_STRING, TYPE_LITERAL_STRING, \
    TYPE_STRING
import codecs
import six
from prettytoml.tokens.errors import MalformedDateError
from .errors import BadEscapeCharacter
import functools
import operator


def deserialize(token):
    """
    Deserializes the value of a single tokens.Token instance based on its type.

    Raises DeserializationError when appropriate.
    """
    
    if token.type == TYPE_BOOLEAN:
        return _to_boolean(token)
    elif token.type == TYPE_INTEGER:
        return _to_int(token)
    elif token.type == TYPE_FLOAT:
        return _to_float(token)
    elif token.type == TYPE_DATE:
        return _to_date(token)
    elif token.type in (TYPE_STRING, TYPE_MULTILINE_STRING, TYPE_BARE_STRING,
                        TYPE_LITERAL_STRING, TYPE_MULTILINE_LITERAL_STRING):
        return _to_string(token)
    else:
        raise Exception('This should never happen!')


def _unescape_str(text):
    """
    Unescapes a string according the TOML spec. Raises BadEscapeCharacter when appropriate.
    """

    # Detect bad escape jobs
    bad_escape_regexp = re.compile(r'([^\\]|^)\\[^btnfr"\\uU]')
    if bad_escape_regexp.findall(text):
        raise BadEscapeCharacter

    # Do the unescaping
    if six.PY2:
        return _unicode_escaped_string(text).decode('string-escape').decode('unicode-escape')
    else:
        return codecs.decode(_unicode_escaped_string(text), 'unicode-escape')


def _unicode_escaped_string(text):
    """
    Escapes all unicode characters in the given string
    """

    if six.PY2:
        text = unicode(text)

    def is_unicode(c):
        return c.lower() not in string.ascii_letters + string.whitespace + string.punctuation + string.digits

    def escape_unicode_char(x):
        if six.PY2:
            return x.encode('unicode-escape')
        else:
            return codecs.encode(x, 'unicode-escape')

    if any(is_unicode(c) for c in text):
        homogeneous_chars = tuple(escape_unicode_char(c) if is_unicode(c) else c.encode() for c in text)
        homogeneous_bytes = functools.reduce(operator.add, homogeneous_chars)
        return homogeneous_bytes.decode()
    else:
        return text


def _to_string(token):
    if token.type == tokens.TYPE_BARE_STRING:
        return token.source_substring

    elif token.type == tokens.TYPE_STRING:
        escaped = token.source_substring[1:-1]
        return _unescape_str(escaped)

    elif token.type == tokens.TYPE_MULTILINE_STRING:
        escaped = token.source_substring[3:-3]

        # Drop the first newline if existed
        if escaped and escaped[0] == '\n':
            escaped = escaped[1:]

        # Remove all occurrences of a slash-newline-zero-or-more-whitespace patterns
        escaped = re.sub(r'\\\n\s*', repl='', string=escaped, flags=re.DOTALL)
        return _unescape_str(escaped)

    elif token.type == tokens.TYPE_LITERAL_STRING:
        return token.source_substring[1:-1]

    elif token.type == tokens.TYPE_MULTILINE_LITERAL_STRING:
        text = token.source_substring[3:-3]
        if text[0] == '\n':
            text = text[1:]
        return text

    raise RuntimeError('Control should never reach here.')


def _to_int(token):
    return int(token.source_substring.replace('_', ''))


def _to_float(token):
    assert token.type == tokens.TYPE_FLOAT
    string = token.source_substring.replace('_', '')
    return float(string)


def _to_boolean(token):
    return token.source_substring == 'true'


_correct_date_format = re.compile(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(Z|(\+|-)\d{2}:\d{2})')


def _to_date(token):
    if not _correct_date_format.match(token.source_substring):
        raise MalformedDateError
    return iso8601.parse_date(token.source_substring)