"""Utility functions common to the C and C++ domains."""

from __future__ import annotations

import re
from copy import deepcopy
from typing import TYPE_CHECKING, Any, Callable

from docutils import nodes

from sphinx import addnodes
from sphinx.util import logging

if TYPE_CHECKING:
    from docutils.nodes import TextElement

    from sphinx.config import Config

logger = logging.getLogger(__name__)

StringifyTransform = Callable[[Any], str]


_whitespace_re = re.compile(r'\s+')
anon_identifier_re = re.compile(r'(@[a-zA-Z0-9_])[a-zA-Z0-9_]*\b')
identifier_re = re.compile(r'''
    (   # This 'extends' _anon_identifier_re with the ordinary identifiers,
        # make sure they are in sync.
        (~?\b[a-zA-Z_])  # ordinary identifiers
    |   (@[a-zA-Z0-9_])  # our extension for names of anonymous entities
    )
    [a-zA-Z0-9_]*\b
''', flags=re.VERBOSE)
integer_literal_re = re.compile(r'[1-9][0-9]*(\'[0-9]+)*')
octal_literal_re = re.compile(r'0[0-7]*(\'[0-7]+)*')
hex_literal_re = re.compile(r'0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*')
binary_literal_re = re.compile(r'0[bB][01]+(\'[01]+)*')
integers_literal_suffix_re = re.compile(r'''
    # unsigned and/or (long) long, in any order, but at least one of them
    (
        ([uU]    ([lL]  |  (ll)  |  (LL))?)
        |
        (([lL]  |  (ll)  |  (LL))    [uU]?)
    )\b
    # the ending word boundary is important for distinguishing
    # between suffixes and UDLs in C++
''', flags=re.VERBOSE)
float_literal_re = re.compile(r'''
    [+-]?(
    # decimal
      ([0-9]+(\'[0-9]+)*[eE][+-]?[0-9]+(\'[0-9]+)*)
    | (([0-9]+(\'[0-9]+)*)?\.[0-9]+(\'[0-9]+)*([eE][+-]?[0-9]+(\'[0-9]+)*)?)
    | ([0-9]+(\'[0-9]+)*\.([eE][+-]?[0-9]+(\'[0-9]+)*)?)
    # hex
    | (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*[pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)
    | (0[xX]([0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?\.
        [0-9a-fA-F]+(\'[0-9a-fA-F]+)*([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?)
    | (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*\.([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?)
    )
''', flags=re.VERBOSE)
float_literal_suffix_re = re.compile(r'[fFlL]\b')
# the ending word boundary is important for distinguishing between suffixes and UDLs in C++
char_literal_re = re.compile(r'''
    ((?:u8)|u|U|L)?
    '(
      (?:[^\\'])
    | (\\(
        (?:['"?\\abfnrtv])
      | (?:[0-7]{1,3})
      | (?:x[0-9a-fA-F]{2})
      | (?:u[0-9a-fA-F]{4})
      | (?:U[0-9a-fA-F]{8})
      ))
    )'
''', flags=re.VERBOSE)


def verify_description_mode(mode: str) -> None:
    if mode not in ('lastIsName', 'noneIsName', 'markType', 'markName', 'param', 'udl'):
        raise Exception("Description mode '%s' is invalid." % mode)


class NoOldIdError(Exception):
    # Used to avoid implementing unneeded id generation for old id schemes.
    pass


class ASTBaseBase:
    def __eq__(self, other: Any) -> bool:
        if type(self) is not type(other):
            return False
        try:
            for key, value in self.__dict__.items():
                if value != getattr(other, key):
                    return False
        except AttributeError:
            return False
        return True

    # Defining __hash__ = None is not strictly needed when __eq__ is defined.
    __hash__ = None  # type: ignore[assignment]

    def clone(self) -> Any:
        return deepcopy(self)

    def _stringify(self, transform: StringifyTransform) -> str:
        raise NotImplementedError(repr(self))

    def __str__(self) -> str:
        return self._stringify(lambda ast: str(ast))

    def get_display_string(self) -> str:
        return self._stringify(lambda ast: ast.get_display_string())

    def __repr__(self) -> str:
        return '<%s>' % self.__class__.__name__


################################################################################
# Attributes
################################################################################

class ASTAttribute(ASTBaseBase):
    def describe_signature(self, signode: TextElement) -> None:
        raise NotImplementedError(repr(self))


class ASTCPPAttribute(ASTAttribute):
    def __init__(self, arg: str) -> None:
        self.arg = arg

    def _stringify(self, transform: StringifyTransform) -> str:
        return "[[" + self.arg + "]]"

    def describe_signature(self, signode: TextElement) -> None:
        signode.append(addnodes.desc_sig_punctuation('[[', '[['))
        signode.append(nodes.Text(self.arg))
        signode.append(addnodes.desc_sig_punctuation(']]', ']]'))


class ASTGnuAttribute(ASTBaseBase):
    def __init__(self, name: str, args: ASTBaseParenExprList | None) -> None:
        self.name = name
        self.args = args

    def _stringify(self, transform: StringifyTransform) -> str:
        res = [self.name]
        if self.args:
            res.append(transform(self.args))
        return ''.join(res)


class ASTGnuAttributeList(ASTAttribute):
    def __init__(self, attrs: list[ASTGnuAttribute]) -> None:
        self.attrs = attrs

    def _stringify(self, transform: StringifyTransform) -> str:
        res = ['__attribute__((']
        first = True
        for attr in self.attrs:
            if not first:
                res.append(', ')
            first = False
            res.append(transform(attr))
        res.append('))')
        return ''.join(res)

    def describe_signature(self, signode: TextElement) -> None:
        txt = str(self)
        signode.append(nodes.Text(txt))


class ASTIdAttribute(ASTAttribute):
    """For simple attributes defined by the user."""

    def __init__(self, id: str) -> None:
        self.id = id

    def _stringify(self, transform: StringifyTransform) -> str:
        return self.id

    def describe_signature(self, signode: TextElement) -> None:
        signode.append(nodes.Text(self.id))


class ASTParenAttribute(ASTAttribute):
    """For paren attributes defined by the user."""

    def __init__(self, id: str, arg: str) -> None:
        self.id = id
        self.arg = arg

    def _stringify(self, transform: StringifyTransform) -> str:
        return self.id + '(' + self.arg + ')'

    def describe_signature(self, signode: TextElement) -> None:
        txt = str(self)
        signode.append(nodes.Text(txt))


class ASTAttributeList(ASTBaseBase):
    def __init__(self, attrs: list[ASTAttribute]) -> None:
        self.attrs = attrs

    def __len__(self) -> int:
        return len(self.attrs)

    def __add__(self, other: ASTAttributeList) -> ASTAttributeList:
        return ASTAttributeList(self.attrs + other.attrs)

    def _stringify(self, transform: StringifyTransform) -> str:
        return ' '.join(transform(attr) for attr in self.attrs)

    def describe_signature(self, signode: TextElement) -> None:
        if len(self.attrs) == 0:
            return
        self.attrs[0].describe_signature(signode)
        if len(self.attrs) == 1:
            return
        for attr in self.attrs[1:]:
            signode.append(addnodes.desc_sig_space())
            attr.describe_signature(signode)


################################################################################

class ASTBaseParenExprList(ASTBaseBase):
    pass


################################################################################

class UnsupportedMultiCharacterCharLiteral(Exception):
    pass


class DefinitionError(Exception):
    pass


class BaseParser:
    def __init__(self, definition: str, *,
                 location: nodes.Node | tuple[str, int] | str,
                 config: Config) -> None:
        self.definition = definition.strip()
        self.location = location  # for warnings
        self.config = config

        self.pos = 0
        self.end = len(self.definition)
        self.last_match: re.Match[str] | None = None
        self._previous_state: tuple[int, re.Match[str] | None] = (0, None)
        self.otherErrors: list[DefinitionError] = []

        # in our tests the following is set to False to capture bad parsing
        self.allowFallbackExpressionParsing = True

    def _make_multi_error(self, errors: list[Any], header: str) -> DefinitionError:
        if len(errors) == 1:
            if len(header) > 0:
                return DefinitionError(header + '\n' + str(errors[0][0]))
            else:
                return DefinitionError(str(errors[0][0]))
        result = [header, '\n']
        for e in errors:
            if len(e[1]) > 0:
                indent = '  '
                result.append(e[1])
                result.append(':\n')
                for line in str(e[0]).split('\n'):
                    if len(line) == 0:
                        continue
                    result.append(indent)
                    result.append(line)
                    result.append('\n')
            else:
                result.append(str(e[0]))
        return DefinitionError(''.join(result))

    @property
    def language(self) -> str:
        raise NotImplementedError

    def status(self, msg: str) -> None:
        # for debugging
        indicator = '-' * self.pos + '^'
        logger.debug(f"{msg}\n{self.definition}\n{indicator}")  # NoQA: G004

    def fail(self, msg: str) -> None:
        errors = []
        indicator = '-' * self.pos + '^'
        exMain = DefinitionError(
            'Invalid %s declaration: %s [error at %d]\n  %s\n  %s' %
            (self.language, msg, self.pos, self.definition, indicator))
        errors.append((exMain, "Main error"))
        for err in self.otherErrors:
            errors.append((err, "Potential other error"))
        self.otherErrors = []
        raise self._make_multi_error(errors, '')

    def warn(self, msg: str) -> None:
        logger.warning(msg, location=self.location)

    def match(self, regex: re.Pattern[str]) -> bool:
        match = regex.match(self.definition, self.pos)
        if match is not None:
            self._previous_state = (self.pos, self.last_match)
            self.pos = match.end()
            self.last_match = match
            return True
        return False

    def skip_string(self, string: str) -> bool:
        strlen = len(string)
        if self.definition[self.pos:self.pos + strlen] == string:
            self.pos += strlen
            return True
        return False

    def skip_word(self, word: str) -> bool:
        return self.match(re.compile(r'\b%s\b' % re.escape(word)))

    def skip_ws(self) -> bool:
        return self.match(_whitespace_re)

    def skip_word_and_ws(self, word: str) -> bool:
        if self.skip_word(word):
            self.skip_ws()
            return True
        return False

    def skip_string_and_ws(self, string: str) -> bool:
        if self.skip_string(string):
            self.skip_ws()
            return True
        return False

    @property
    def eof(self) -> bool:
        return self.pos >= self.end

    @property
    def current_char(self) -> str:
        try:
            return self.definition[self.pos]
        except IndexError:
            return 'EOF'

    @property
    def matched_text(self) -> str:
        if self.last_match is not None:
            return self.last_match.group()
        return ''

    def read_rest(self) -> str:
        rv = self.definition[self.pos:]
        self.pos = self.end
        return rv

    def assert_end(self, *, allowSemicolon: bool = False) -> None:
        self.skip_ws()
        if allowSemicolon:
            if not self.eof and self.definition[self.pos:] != ';':
                self.fail('Expected end of definition or ;.')
        else:
            if not self.eof:
                self.fail('Expected end of definition.')

    ################################################################################

    @property
    def id_attributes(self):
        raise NotImplementedError

    @property
    def paren_attributes(self):
        raise NotImplementedError

    def _parse_balanced_token_seq(self, end: list[str]) -> str:
        # TODO: add handling of string literals and similar
        brackets = {'(': ')', '[': ']', '{': '}'}
        startPos = self.pos
        symbols: list[str] = []
        while not self.eof:
            if len(symbols) == 0 and self.current_char in end:
                break
            if self.current_char in brackets:
                symbols.append(brackets[self.current_char])
            elif len(symbols) > 0 and self.current_char == symbols[-1]:
                symbols.pop()
            elif self.current_char in ")]}":
                self.fail("Unexpected '%s' in balanced-token-seq." % self.current_char)
            self.pos += 1
        if self.eof:
            self.fail("Could not find end of balanced-token-seq starting at %d."
                      % startPos)
        return self.definition[startPos:self.pos]

    def _parse_attribute(self) -> ASTAttribute | None:
        self.skip_ws()
        # try C++11 style
        startPos = self.pos
        if self.skip_string_and_ws('['):
            if not self.skip_string('['):
                self.pos = startPos
            else:
                # TODO: actually implement the correct grammar
                arg = self._parse_balanced_token_seq(end=[']'])
                if not self.skip_string_and_ws(']'):
                    self.fail("Expected ']' in end of attribute.")
                if not self.skip_string_and_ws(']'):
                    self.fail("Expected ']' in end of attribute after [[...]")
                return ASTCPPAttribute(arg)

        # try GNU style
        if self.skip_word_and_ws('__attribute__'):
            if not self.skip_string_and_ws('('):
                self.fail("Expected '(' after '__attribute__'.")
            if not self.skip_string_and_ws('('):
                self.fail("Expected '(' after '__attribute__('.")
            attrs = []
            while 1:
                if self.match(identifier_re):
                    name = self.matched_text
                    exprs = self._parse_paren_expression_list()
                    attrs.append(ASTGnuAttribute(name, exprs))
                if self.skip_string_and_ws(','):
                    continue
                if self.skip_string_and_ws(')'):
                    break
                self.fail("Expected identifier, ')', or ',' in __attribute__.")
            if not self.skip_string_and_ws(')'):
                self.fail("Expected ')' after '__attribute__((...)'")
            return ASTGnuAttributeList(attrs)

        # try the simple id attributes defined by the user
        for id in self.id_attributes:
            if self.skip_word_and_ws(id):
                return ASTIdAttribute(id)

        # try the paren attributes defined by the user
        for id in self.paren_attributes:
            if not self.skip_string_and_ws(id):
                continue
            if not self.skip_string('('):
                self.fail("Expected '(' after user-defined paren-attribute.")
            arg = self._parse_balanced_token_seq(end=[')'])
            if not self.skip_string(')'):
                self.fail("Expected ')' to end user-defined paren-attribute.")
            return ASTParenAttribute(id, arg)

        return None

    def _parse_attribute_list(self) -> ASTAttributeList:
        res = []
        while True:
            attr = self._parse_attribute()
            if attr is None:
                break
            res.append(attr)
        return ASTAttributeList(res)

    def _parse_paren_expression_list(self) -> ASTBaseParenExprList | None:
        raise NotImplementedError