1 files changed, 464 insertions, 0 deletions
diff --git a/sphinx/util/cfamily.py b/sphinx/util/cfamily.py
new file mode 100644
index 0000000..a3fdbe3
--- /dev/null
+++ b/sphinx/util/cfamily.py
@@ -0,0 +1,464 @@
+"""Utility functions common to the C and C++ domains."""
+
+from __future__ import annotations
+
+import re
+from copy import deepcopy
+from typing import TYPE_CHECKING, Any, Callable
+
+from docutils import nodes
+
+from sphinx import addnodes
+from sphinx.util import logging
+
+if TYPE_CHECKING:
+    from docutils.nodes import TextElement
+
+    from sphinx.config import Config
+
+logger = logging.getLogger(__name__)
+
+StringifyTransform = Callable[[Any], str]
+
+
+_whitespace_re = re.compile(r'\s+')
+anon_identifier_re = re.compile(r'(@[a-zA-Z0-9_])[a-zA-Z0-9_]*\b')
+identifier_re = re.compile(r'''
+    (   # This 'extends' _anon_identifier_re with the ordinary identifiers,
+        # make sure they are in sync.
+        (~?\b[a-zA-Z_])  # ordinary identifiers
+    |   (@[a-zA-Z0-9_])  # our extension for names of anonymous entities
+    )
+    [a-zA-Z0-9_]*\b
+''', flags=re.VERBOSE)
+integer_literal_re = re.compile(r'[1-9][0-9]*(\'[0-9]+)*')
+octal_literal_re = re.compile(r'0[0-7]*(\'[0-7]+)*')
+hex_literal_re = re.compile(r'0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*')
+binary_literal_re = re.compile(r'0[bB][01]+(\'[01]+)*')
+integers_literal_suffix_re = re.compile(r'''
+    # unsigned and/or (long) long, in any order, but at least one of them
+    (
+        ([uU]    ([lL]  |  (ll)  |  (LL))?)
+        |
+        (([lL]  |  (ll)  |  (LL))    [uU]?)
+    )\b
+    # the ending word boundary is important for distinguishing
+    # between suffixes and UDLs in C++
+''', flags=re.VERBOSE)
+float_literal_re = re.compile(r'''
+    [+-]?(
+    # decimal
+      ([0-9]+(\'[0-9]+)*[eE][+-]?[0-9]+(\'[0-9]+)*)
+    | (([0-9]+(\'[0-9]+)*)?\.[0-9]+(\'[0-9]+)*([eE][+-]?[0-9]+(\'[0-9]+)*)?)
+    | ([0-9]+(\'[0-9]+)*\.([eE][+-]?[0-9]+(\'[0-9]+)*)?)
+    # hex
+    | (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*[pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)
+    | (0[xX]([0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?\.
+        [0-9a-fA-F]+(\'[0-9a-fA-F]+)*([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?)
+    | (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*\.([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?)
+    )
+''', flags=re.VERBOSE)
+float_literal_suffix_re = re.compile(r'[fFlL]\b')
+# the ending word boundary is important for distinguishing between suffixes and UDLs in C++
+char_literal_re = re.compile(r'''
+    ((?:u8)|u|U|L)?
+    '(
+      (?:[^\\'])
+    | (\\(
+        (?:['"?\\abfnrtv])
+      | (?:[0-7]{1,3})
+      | (?:x[0-9a-fA-F]{2})
+      | (?:u[0-9a-fA-F]{4})
+      | (?:U[0-9a-fA-F]{8})
+      ))
+    )'
+''', flags=re.VERBOSE)
+
+
+def verify_description_mode(mode: str) -> None:
+    if mode not in ('lastIsName', 'noneIsName', 'markType', 'markName', 'param', 'udl'):
+        raise Exception("Description mode '%s' is invalid." % mode)
+
+
+class NoOldIdError(Exception):
+    # Used to avoid implementing unneeded id generation for old id schemes.
+    pass
+
+
+class ASTBaseBase:
+    def __eq__(self, other: Any) -> bool:
+        if type(self) is not type(other):
+            return False
+        try:
+            for key, value in self.__dict__.items():
+                if value != getattr(other, key):
+                    return False
+        except AttributeError:
+            return False
+        return True
+
+    # Defining __hash__ = None is not strictly needed when __eq__ is defined.
+    __hash__ = None  # type: ignore[assignment]
+
+    def clone(self) -> Any:
+        return deepcopy(self)
+
+    def _stringify(self, transform: StringifyTransform) -> str:
+        raise NotImplementedError(repr(self))
+
+    def __str__(self) -> str:
+        return self._stringify(lambda ast: str(ast))
+
+    def get_display_string(self) -> str:
+        return self._stringify(lambda ast: ast.get_display_string())
+
+    def __repr__(self) -> str:
+        return '<%s>' % self.__class__.__name__
+
+
+################################################################################
+# Attributes
+################################################################################
+
+class ASTAttribute(ASTBaseBase):
+    def describe_signature(self, signode: TextElement) -> None:
+        raise NotImplementedError(repr(self))
+
+
+class ASTCPPAttribute(ASTAttribute):
+    def __init__(self, arg: str) -> None:
+        self.arg = arg
+
+    def _stringify(self, transform: StringifyTransform) -> str:
+        return "[[" + self.arg + "]]"
+
+    def describe_signature(self, signode: TextElement) -> None:
+        signode.append(addnodes.desc_sig_punctuation('[[', '[['))
+        signode.append(nodes.Text(self.arg))
+        signode.append(addnodes.desc_sig_punctuation(']]', ']]'))
+
+
+class ASTGnuAttribute(ASTBaseBase):
+    def __init__(self, name: str, args: ASTBaseParenExprList | None) -> None:
+        self.name = name
+        self.args = args
+
+    def _stringify(self, transform: StringifyTransform) -> str:
+        res = [self.name]
+        if self.args:
+            res.append(transform(self.args))
+        return ''.join(res)
+
+
+class ASTGnuAttributeList(ASTAttribute):
+    def __init__(self, attrs: list[ASTGnuAttribute]) -> None:
+        self.attrs = attrs
+
+    def _stringify(self, transform: StringifyTransform) -> str:
+        res = ['__attribute__((']
+        first = True
+        for attr in self.attrs:
+            if not first:
+                res.append(', ')
+            first = False
+            res.append(transform(attr))
+        res.append('))')
+        return ''.join(res)
+
+    def describe_signature(self, signode: TextElement) -> None:
+        txt = str(self)
+        signode.append(nodes.Text(txt))
+
+
+class ASTIdAttribute(ASTAttribute):
+    """For simple attributes defined by the user."""
+
+    def __init__(self, id: str) -> None:
+        self.id = id
+
+    def _stringify(self, transform: StringifyTransform) -> str:
+        return self.id
+
+    def describe_signature(self, signode: TextElement) -> None:
+        signode.append(nodes.Text(self.id))
+
+
+class ASTParenAttribute(ASTAttribute):
+    """For paren attributes defined by the user."""
+
+    def __init__(self, id: str, arg: str) -> None:
+        self.id = id
+        self.arg = arg
+
+    def _stringify(self, transform: StringifyTransform) -> str:
+        return self.id + '(' + self.arg + ')'
+
+    def describe_signature(self, signode: TextElement) -> None:
+        txt = str(self)
+        signode.append(nodes.Text(txt))
+
+
+class ASTAttributeList(ASTBaseBase):
+    def __init__(self, attrs: list[ASTAttribute]) -> None:
+        self.attrs = attrs
+
+    def __len__(self) -> int:
+        return len(self.attrs)
+
+    def __add__(self, other: ASTAttributeList) -> ASTAttributeList:
+        return ASTAttributeList(self.attrs + other.attrs)
+
+    def _stringify(self, transform: StringifyTransform) -> str:
+        return ' '.join(transform(attr) for attr in self.attrs)
+
+    def describe_signature(self, signode: TextElement) -> None:
+        if len(self.attrs) == 0:
+            return
+        self.attrs[0].describe_signature(signode)
+        if len(self.attrs) == 1:
+            return
+        for attr in self.attrs[1:]:
+            signode.append(addnodes.desc_sig_space())
+            attr.describe_signature(signode)
+
+
+################################################################################
+
+class ASTBaseParenExprList(ASTBaseBase):
+    pass
+
+
+################################################################################
+
+class UnsupportedMultiCharacterCharLiteral(Exception):
+    pass
+
+
+class DefinitionError(Exception):
+    pass
+
+
+class BaseParser:
+    def __init__(self, definition: str, *,
+                 location: nodes.Node | tuple[str, int] | str,
+                 config: Config) -> None:
+        self.definition = definition.strip()
+        self.location = location  # for warnings
+        self.config = config
+
+        self.pos = 0
+        self.end = len(self.definition)
+        self.last_match: re.Match[str] | None = None
+        self._previous_state: tuple[int, re.Match[str] | None] = (0, None)
+        self.otherErrors: list[DefinitionError] = []
+
+        # in our tests the following is set to False to capture bad parsing
+        self.allowFallbackExpressionParsing = True
+
+    def _make_multi_error(self, errors: list[Any], header: str) -> DefinitionError:
+        if len(errors) == 1:
+            if len(header) > 0:
+                return DefinitionError(header + '\n' + str(errors[0][0]))
+            else:
+                return DefinitionError(str(errors[0][0]))
+        result = [header, '\n']
+        for e in errors:
+            if len(e[1]) > 0:
+                indent = '  '
+                result.append(e[1])
+                result.append(':\n')
+                for line in str(e[0]).split('\n'):
+                    if len(line) == 0:
+                        continue
+                    result.append(indent)
+                    result.append(line)
+                    result.append('\n')
+            else:
+                result.append(str(e[0]))
+        return DefinitionError(''.join(result))
+
+    @property
+    def language(self) -> str:
+        raise NotImplementedError
+
+    def status(self, msg: str) -> None:
+        # for debugging
+        indicator = '-' * self.pos + '^'
+        logger.debug(f"{msg}\n{self.definition}\n{indicator}")  # NoQA: G004
+
+    def fail(self, msg: str) -> None:
+        errors = []
+        indicator = '-' * self.pos + '^'
+        exMain = DefinitionError(
+            'Invalid %s declaration: %s [error at %d]\n  %s\n  %s' %
+            (self.language, msg, self.pos, self.definition, indicator))
+        errors.append((exMain, "Main error"))
+        for err in self.otherErrors:
+            errors.append((err, "Potential other error"))
+        self.otherErrors = []
+        raise self._make_multi_error(errors, '')
+
+    def warn(self, msg: str) -> None:
+        logger.warning(msg, location=self.location)
+
+    def match(self, regex: re.Pattern[str]) -> bool:
+        match = regex.match(self.definition, self.pos)
+        if match is not None:
+            self._previous_state = (self.pos, self.last_match)
+            self.pos = match.end()
+            self.last_match = match
+            return True
+        return False
+
+    def skip_string(self, string: str) -> bool:
+        strlen = len(string)
+        if self.definition[self.pos:self.pos + strlen] == string:
+            self.pos += strlen
+            return True
+        return False
+
+    def skip_word(self, word: str) -> bool:
+        return self.match(re.compile(r'\b%s\b' % re.escape(word)))
+
+    def skip_ws(self) -> bool:
+        return self.match(_whitespace_re)
+
+    def skip_word_and_ws(self, word: str) -> bool:
+        if self.skip_word(word):
+            self.skip_ws()
+            return True
+        return False
+
+    def skip_string_and_ws(self, string: str) -> bool:
+        if self.skip_string(string):
+            self.skip_ws()
+            return True
+        return False
+
+    @property
+    def eof(self) -> bool:
+        return self.pos >= self.end
+
+    @property
+    def current_char(self) -> str:
+        try:
+            return self.definition[self.pos]
+        except IndexError:
+            return 'EOF'
+
+    @property
+    def matched_text(self) -> str:
+        if self.last_match is not None:
+            return self.last_match.group()
+        return ''
+
+    def read_rest(self) -> str:
+        rv = self.definition[self.pos:]
+        self.pos = self.end
+        return rv
+
+    def assert_end(self, *, allowSemicolon: bool = False) -> None:
+        self.skip_ws()
+        if allowSemicolon:
+            if not self.eof and self.definition[self.pos:] != ';':
+                self.fail('Expected end of definition or ;.')
+        else:
+            if not self.eof:
+                self.fail('Expected end of definition.')
+
+    ################################################################################
+
+    @property
+    def id_attributes(self):
+        raise NotImplementedError
+
+    @property
+    def paren_attributes(self):
+        raise NotImplementedError
+
+    def _parse_balanced_token_seq(self, end: list[str]) -> str:
+        # TODO: add handling of string literals and similar
+        brackets = {'(': ')', '[': ']', '{': '}'}
+        startPos = self.pos
+        symbols: list[str] = []
+        while not self.eof:
+            if len(symbols) == 0 and self.current_char in end:
+                break
+            if self.current_char in brackets:
+                symbols.append(brackets[self.current_char])
+            elif len(symbols) > 0 and self.current_char == symbols[-1]:
+                symbols.pop()
+            elif self.current_char in ")]}":
+                self.fail("Unexpected '%s' in balanced-token-seq." % self.current_char)
+            self.pos += 1
+        if self.eof:
+            self.fail("Could not find end of balanced-token-seq starting at %d."
+                      % startPos)
+        return self.definition[startPos:self.pos]
+
+    def _parse_attribute(self) -> ASTAttribute | None:
+        self.skip_ws()
+        # try C++11 style
+        startPos = self.pos
+        if self.skip_string_and_ws('['):
+            if not self.skip_string('['):
+                self.pos = startPos
+            else:
+                # TODO: actually implement the correct grammar
+                arg = self._parse_balanced_token_seq(end=[']'])
+                if not self.skip_string_and_ws(']'):
+                    self.fail("Expected ']' in end of attribute.")
+                if not self.skip_string_and_ws(']'):
+                    self.fail("Expected ']' in end of attribute after [[...]")
+                return ASTCPPAttribute(arg)
+
+        # try GNU style
+        if self.skip_word_and_ws('__attribute__'):
+            if not self.skip_string_and_ws('('):
+                self.fail("Expected '(' after '__attribute__'.")
+            if not self.skip_string_and_ws('('):
+                self.fail("Expected '(' after '__attribute__('.")
+            attrs = []
+            while 1:
+                if self.match(identifier_re):
+                    name = self.matched_text
+                    exprs = self._parse_paren_expression_list()
+                    attrs.append(ASTGnuAttribute(name, exprs))
+                if self.skip_string_and_ws(','):
+                    continue
+                if self.skip_string_and_ws(')'):
+                    break
+                self.fail("Expected identifier, ')', or ',' in __attribute__.")
+            if not self.skip_string_and_ws(')'):
+                self.fail("Expected ')' after '__attribute__((...)'")
+            return ASTGnuAttributeList(attrs)
+
+        # try the simple id attributes defined by the user
+        for id in self.id_attributes:
+            if self.skip_word_and_ws(id):
+                return ASTIdAttribute(id)
+
+        # try the paren attributes defined by the user
+        for id in self.paren_attributes:
+            if not self.skip_string_and_ws(id):
+                continue
+            if not self.skip_string('('):
+                self.fail("Expected '(' after user-defined paren-attribute.")
+            arg = self._parse_balanced_token_seq(end=[')'])
+            if not self.skip_string(')'):
+                self.fail("Expected ')' to end user-defined paren-attribute.")
+            return ASTParenAttribute(id, arg)
+
+        return None
+
+    def _parse_attribute_list(self) -> ASTAttributeList:
+        res = []
+        while True:
+            attr = self._parse_attribute()
+            if attr is None:
+                break
+            res.append(attr)
+        return ASTAttributeList(res)
+
+    def _parse_paren_expression_list(self) -> ASTBaseParenExprList | None:
+        raise NotImplementedError