From cf7da1843c45a4c2df7a749f7886a2d2ba0ee92a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 19:25:40 +0200 Subject: Adding upstream version 7.2.6. Signed-off-by: Daniel Baumann --- sphinx/util/cfamily.py | 464 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100644 sphinx/util/cfamily.py (limited to 'sphinx/util/cfamily.py') diff --git a/sphinx/util/cfamily.py b/sphinx/util/cfamily.py new file mode 100644 index 0000000..a3fdbe3 --- /dev/null +++ b/sphinx/util/cfamily.py @@ -0,0 +1,464 @@ +"""Utility functions common to the C and C++ domains.""" + +from __future__ import annotations + +import re +from copy import deepcopy +from typing import TYPE_CHECKING, Any, Callable + +from docutils import nodes + +from sphinx import addnodes +from sphinx.util import logging + +if TYPE_CHECKING: + from docutils.nodes import TextElement + + from sphinx.config import Config + +logger = logging.getLogger(__name__) + +StringifyTransform = Callable[[Any], str] + + +_whitespace_re = re.compile(r'\s+') +anon_identifier_re = re.compile(r'(@[a-zA-Z0-9_])[a-zA-Z0-9_]*\b') +identifier_re = re.compile(r''' + ( # This 'extends' _anon_identifier_re with the ordinary identifiers, + # make sure they are in sync. + (~?\b[a-zA-Z_]) # ordinary identifiers + | (@[a-zA-Z0-9_]) # our extension for names of anonymous entities + ) + [a-zA-Z0-9_]*\b +''', flags=re.VERBOSE) +integer_literal_re = re.compile(r'[1-9][0-9]*(\'[0-9]+)*') +octal_literal_re = re.compile(r'0[0-7]*(\'[0-7]+)*') +hex_literal_re = re.compile(r'0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*') +binary_literal_re = re.compile(r'0[bB][01]+(\'[01]+)*') +integers_literal_suffix_re = re.compile(r''' + # unsigned and/or (long) long, in any order, but at least one of them + ( + ([uU] ([lL] | (ll) | (LL))?) + | + (([lL] | (ll) | (LL)) [uU]?) + )\b + # the ending word boundary is important for distinguishing + # between suffixes and UDLs in C++ +''', flags=re.VERBOSE) +float_literal_re = re.compile(r''' + [+-]?( + # decimal + ([0-9]+(\'[0-9]+)*[eE][+-]?[0-9]+(\'[0-9]+)*) + | (([0-9]+(\'[0-9]+)*)?\.[0-9]+(\'[0-9]+)*([eE][+-]?[0-9]+(\'[0-9]+)*)?) + | ([0-9]+(\'[0-9]+)*\.([eE][+-]?[0-9]+(\'[0-9]+)*)?) + # hex + | (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*[pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*) + | (0[xX]([0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?\. + [0-9a-fA-F]+(\'[0-9a-fA-F]+)*([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?) + | (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*\.([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?) + ) +''', flags=re.VERBOSE) +float_literal_suffix_re = re.compile(r'[fFlL]\b') +# the ending word boundary is important for distinguishing between suffixes and UDLs in C++ +char_literal_re = re.compile(r''' + ((?:u8)|u|U|L)? + '( + (?:[^\\']) + | (\\( + (?:['"?\\abfnrtv]) + | (?:[0-7]{1,3}) + | (?:x[0-9a-fA-F]{2}) + | (?:u[0-9a-fA-F]{4}) + | (?:U[0-9a-fA-F]{8}) + )) + )' +''', flags=re.VERBOSE) + + +def verify_description_mode(mode: str) -> None: + if mode not in ('lastIsName', 'noneIsName', 'markType', 'markName', 'param', 'udl'): + raise Exception("Description mode '%s' is invalid." % mode) + + +class NoOldIdError(Exception): + # Used to avoid implementing unneeded id generation for old id schemes. + pass + + +class ASTBaseBase: + def __eq__(self, other: Any) -> bool: + if type(self) is not type(other): + return False + try: + for key, value in self.__dict__.items(): + if value != getattr(other, key): + return False + except AttributeError: + return False + return True + + # Defining __hash__ = None is not strictly needed when __eq__ is defined. + __hash__ = None # type: ignore[assignment] + + def clone(self) -> Any: + return deepcopy(self) + + def _stringify(self, transform: StringifyTransform) -> str: + raise NotImplementedError(repr(self)) + + def __str__(self) -> str: + return self._stringify(lambda ast: str(ast)) + + def get_display_string(self) -> str: + return self._stringify(lambda ast: ast.get_display_string()) + + def __repr__(self) -> str: + return '<%s>' % self.__class__.__name__ + + +################################################################################ +# Attributes +################################################################################ + +class ASTAttribute(ASTBaseBase): + def describe_signature(self, signode: TextElement) -> None: + raise NotImplementedError(repr(self)) + + +class ASTCPPAttribute(ASTAttribute): + def __init__(self, arg: str) -> None: + self.arg = arg + + def _stringify(self, transform: StringifyTransform) -> str: + return "[[" + self.arg + "]]" + + def describe_signature(self, signode: TextElement) -> None: + signode.append(addnodes.desc_sig_punctuation('[[', '[[')) + signode.append(nodes.Text(self.arg)) + signode.append(addnodes.desc_sig_punctuation(']]', ']]')) + + +class ASTGnuAttribute(ASTBaseBase): + def __init__(self, name: str, args: ASTBaseParenExprList | None) -> None: + self.name = name + self.args = args + + def _stringify(self, transform: StringifyTransform) -> str: + res = [self.name] + if self.args: + res.append(transform(self.args)) + return ''.join(res) + + +class ASTGnuAttributeList(ASTAttribute): + def __init__(self, attrs: list[ASTGnuAttribute]) -> None: + self.attrs = attrs + + def _stringify(self, transform: StringifyTransform) -> str: + res = ['__attribute__(('] + first = True + for attr in self.attrs: + if not first: + res.append(', ') + first = False + res.append(transform(attr)) + res.append('))') + return ''.join(res) + + def describe_signature(self, signode: TextElement) -> None: + txt = str(self) + signode.append(nodes.Text(txt)) + + +class ASTIdAttribute(ASTAttribute): + """For simple attributes defined by the user.""" + + def __init__(self, id: str) -> None: + self.id = id + + def _stringify(self, transform: StringifyTransform) -> str: + return self.id + + def describe_signature(self, signode: TextElement) -> None: + signode.append(nodes.Text(self.id)) + + +class ASTParenAttribute(ASTAttribute): + """For paren attributes defined by the user.""" + + def __init__(self, id: str, arg: str) -> None: + self.id = id + self.arg = arg + + def _stringify(self, transform: StringifyTransform) -> str: + return self.id + '(' + self.arg + ')' + + def describe_signature(self, signode: TextElement) -> None: + txt = str(self) + signode.append(nodes.Text(txt)) + + +class ASTAttributeList(ASTBaseBase): + def __init__(self, attrs: list[ASTAttribute]) -> None: + self.attrs = attrs + + def __len__(self) -> int: + return len(self.attrs) + + def __add__(self, other: ASTAttributeList) -> ASTAttributeList: + return ASTAttributeList(self.attrs + other.attrs) + + def _stringify(self, transform: StringifyTransform) -> str: + return ' '.join(transform(attr) for attr in self.attrs) + + def describe_signature(self, signode: TextElement) -> None: + if len(self.attrs) == 0: + return + self.attrs[0].describe_signature(signode) + if len(self.attrs) == 1: + return + for attr in self.attrs[1:]: + signode.append(addnodes.desc_sig_space()) + attr.describe_signature(signode) + + +################################################################################ + +class ASTBaseParenExprList(ASTBaseBase): + pass + + +################################################################################ + +class UnsupportedMultiCharacterCharLiteral(Exception): + pass + + +class DefinitionError(Exception): + pass + + +class BaseParser: + def __init__(self, definition: str, *, + location: nodes.Node | tuple[str, int] | str, + config: Config) -> None: + self.definition = definition.strip() + self.location = location # for warnings + self.config = config + + self.pos = 0 + self.end = len(self.definition) + self.last_match: re.Match[str] | None = None + self._previous_state: tuple[int, re.Match[str] | None] = (0, None) + self.otherErrors: list[DefinitionError] = [] + + # in our tests the following is set to False to capture bad parsing + self.allowFallbackExpressionParsing = True + + def _make_multi_error(self, errors: list[Any], header: str) -> DefinitionError: + if len(errors) == 1: + if len(header) > 0: + return DefinitionError(header + '\n' + str(errors[0][0])) + else: + return DefinitionError(str(errors[0][0])) + result = [header, '\n'] + for e in errors: + if len(e[1]) > 0: + indent = ' ' + result.append(e[1]) + result.append(':\n') + for line in str(e[0]).split('\n'): + if len(line) == 0: + continue + result.append(indent) + result.append(line) + result.append('\n') + else: + result.append(str(e[0])) + return DefinitionError(''.join(result)) + + @property + def language(self) -> str: + raise NotImplementedError + + def status(self, msg: str) -> None: + # for debugging + indicator = '-' * self.pos + '^' + logger.debug(f"{msg}\n{self.definition}\n{indicator}") # NoQA: G004 + + def fail(self, msg: str) -> None: + errors = [] + indicator = '-' * self.pos + '^' + exMain = DefinitionError( + 'Invalid %s declaration: %s [error at %d]\n %s\n %s' % + (self.language, msg, self.pos, self.definition, indicator)) + errors.append((exMain, "Main error")) + for err in self.otherErrors: + errors.append((err, "Potential other error")) + self.otherErrors = [] + raise self._make_multi_error(errors, '') + + def warn(self, msg: str) -> None: + logger.warning(msg, location=self.location) + + def match(self, regex: re.Pattern[str]) -> bool: + match = regex.match(self.definition, self.pos) + if match is not None: + self._previous_state = (self.pos, self.last_match) + self.pos = match.end() + self.last_match = match + return True + return False + + def skip_string(self, string: str) -> bool: + strlen = len(string) + if self.definition[self.pos:self.pos + strlen] == string: + self.pos += strlen + return True + return False + + def skip_word(self, word: str) -> bool: + return self.match(re.compile(r'\b%s\b' % re.escape(word))) + + def skip_ws(self) -> bool: + return self.match(_whitespace_re) + + def skip_word_and_ws(self, word: str) -> bool: + if self.skip_word(word): + self.skip_ws() + return True + return False + + def skip_string_and_ws(self, string: str) -> bool: + if self.skip_string(string): + self.skip_ws() + return True + return False + + @property + def eof(self) -> bool: + return self.pos >= self.end + + @property + def current_char(self) -> str: + try: + return self.definition[self.pos] + except IndexError: + return 'EOF' + + @property + def matched_text(self) -> str: + if self.last_match is not None: + return self.last_match.group() + return '' + + def read_rest(self) -> str: + rv = self.definition[self.pos:] + self.pos = self.end + return rv + + def assert_end(self, *, allowSemicolon: bool = False) -> None: + self.skip_ws() + if allowSemicolon: + if not self.eof and self.definition[self.pos:] != ';': + self.fail('Expected end of definition or ;.') + else: + if not self.eof: + self.fail('Expected end of definition.') + + ################################################################################ + + @property + def id_attributes(self): + raise NotImplementedError + + @property + def paren_attributes(self): + raise NotImplementedError + + def _parse_balanced_token_seq(self, end: list[str]) -> str: + # TODO: add handling of string literals and similar + brackets = {'(': ')', '[': ']', '{': '}'} + startPos = self.pos + symbols: list[str] = [] + while not self.eof: + if len(symbols) == 0 and self.current_char in end: + break + if self.current_char in brackets: + symbols.append(brackets[self.current_char]) + elif len(symbols) > 0 and self.current_char == symbols[-1]: + symbols.pop() + elif self.current_char in ")]}": + self.fail("Unexpected '%s' in balanced-token-seq." % self.current_char) + self.pos += 1 + if self.eof: + self.fail("Could not find end of balanced-token-seq starting at %d." + % startPos) + return self.definition[startPos:self.pos] + + def _parse_attribute(self) -> ASTAttribute | None: + self.skip_ws() + # try C++11 style + startPos = self.pos + if self.skip_string_and_ws('['): + if not self.skip_string('['): + self.pos = startPos + else: + # TODO: actually implement the correct grammar + arg = self._parse_balanced_token_seq(end=[']']) + if not self.skip_string_and_ws(']'): + self.fail("Expected ']' in end of attribute.") + if not self.skip_string_and_ws(']'): + self.fail("Expected ']' in end of attribute after [[...]") + return ASTCPPAttribute(arg) + + # try GNU style + if self.skip_word_and_ws('__attribute__'): + if not self.skip_string_and_ws('('): + self.fail("Expected '(' after '__attribute__'.") + if not self.skip_string_and_ws('('): + self.fail("Expected '(' after '__attribute__('.") + attrs = [] + while 1: + if self.match(identifier_re): + name = self.matched_text + exprs = self._parse_paren_expression_list() + attrs.append(ASTGnuAttribute(name, exprs)) + if self.skip_string_and_ws(','): + continue + if self.skip_string_and_ws(')'): + break + self.fail("Expected identifier, ')', or ',' in __attribute__.") + if not self.skip_string_and_ws(')'): + self.fail("Expected ')' after '__attribute__((...)'") + return ASTGnuAttributeList(attrs) + + # try the simple id attributes defined by the user + for id in self.id_attributes: + if self.skip_word_and_ws(id): + return ASTIdAttribute(id) + + # try the paren attributes defined by the user + for id in self.paren_attributes: + if not self.skip_string_and_ws(id): + continue + if not self.skip_string('('): + self.fail("Expected '(' after user-defined paren-attribute.") + arg = self._parse_balanced_token_seq(end=[')']) + if not self.skip_string(')'): + self.fail("Expected ')' to end user-defined paren-attribute.") + return ASTParenAttribute(id, arg) + + return None + + def _parse_attribute_list(self) -> ASTAttributeList: + res = [] + while True: + attr = self._parse_attribute() + if attr is None: + break + res.append(attr) + return ASTAttributeList(res) + + def _parse_paren_expression_list(self) -> ASTBaseParenExprList | None: + raise NotImplementedError -- cgit v1.2.3