summaryrefslogtreecommitdiffstats
path: root/sphinx/util/cfamily.py
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/util/cfamily.py')
-rw-r--r--sphinx/util/cfamily.py464
1 files changed, 464 insertions, 0 deletions
diff --git a/sphinx/util/cfamily.py b/sphinx/util/cfamily.py
new file mode 100644
index 0000000..a3fdbe3
--- /dev/null
+++ b/sphinx/util/cfamily.py
@@ -0,0 +1,464 @@
+"""Utility functions common to the C and C++ domains."""
+
+from __future__ import annotations
+
+import re
+from copy import deepcopy
+from typing import TYPE_CHECKING, Any, Callable
+
+from docutils import nodes
+
+from sphinx import addnodes
+from sphinx.util import logging
+
+if TYPE_CHECKING:
+ from docutils.nodes import TextElement
+
+ from sphinx.config import Config
+
+logger = logging.getLogger(__name__)
+
+StringifyTransform = Callable[[Any], str]
+
+
+_whitespace_re = re.compile(r'\s+')
+anon_identifier_re = re.compile(r'(@[a-zA-Z0-9_])[a-zA-Z0-9_]*\b')
+identifier_re = re.compile(r'''
+ ( # This 'extends' _anon_identifier_re with the ordinary identifiers,
+ # make sure they are in sync.
+ (~?\b[a-zA-Z_]) # ordinary identifiers
+ | (@[a-zA-Z0-9_]) # our extension for names of anonymous entities
+ )
+ [a-zA-Z0-9_]*\b
+''', flags=re.VERBOSE)
+integer_literal_re = re.compile(r'[1-9][0-9]*(\'[0-9]+)*')
+octal_literal_re = re.compile(r'0[0-7]*(\'[0-7]+)*')
+hex_literal_re = re.compile(r'0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*')
+binary_literal_re = re.compile(r'0[bB][01]+(\'[01]+)*')
+integers_literal_suffix_re = re.compile(r'''
+ # unsigned and/or (long) long, in any order, but at least one of them
+ (
+ ([uU] ([lL] | (ll) | (LL))?)
+ |
+ (([lL] | (ll) | (LL)) [uU]?)
+ )\b
+ # the ending word boundary is important for distinguishing
+ # between suffixes and UDLs in C++
+''', flags=re.VERBOSE)
+float_literal_re = re.compile(r'''
+ [+-]?(
+ # decimal
+ ([0-9]+(\'[0-9]+)*[eE][+-]?[0-9]+(\'[0-9]+)*)
+ | (([0-9]+(\'[0-9]+)*)?\.[0-9]+(\'[0-9]+)*([eE][+-]?[0-9]+(\'[0-9]+)*)?)
+ | ([0-9]+(\'[0-9]+)*\.([eE][+-]?[0-9]+(\'[0-9]+)*)?)
+ # hex
+ | (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*[pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)
+ | (0[xX]([0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?\.
+ [0-9a-fA-F]+(\'[0-9a-fA-F]+)*([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?)
+ | (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*\.([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?)
+ )
+''', flags=re.VERBOSE)
+float_literal_suffix_re = re.compile(r'[fFlL]\b')
+# the ending word boundary is important for distinguishing between suffixes and UDLs in C++
+char_literal_re = re.compile(r'''
+ ((?:u8)|u|U|L)?
+ '(
+ (?:[^\\'])
+ | (\\(
+ (?:['"?\\abfnrtv])
+ | (?:[0-7]{1,3})
+ | (?:x[0-9a-fA-F]{2})
+ | (?:u[0-9a-fA-F]{4})
+ | (?:U[0-9a-fA-F]{8})
+ ))
+ )'
+''', flags=re.VERBOSE)
+
+
+def verify_description_mode(mode: str) -> None:
+ if mode not in ('lastIsName', 'noneIsName', 'markType', 'markName', 'param', 'udl'):
+ raise Exception("Description mode '%s' is invalid." % mode)
+
+
+class NoOldIdError(Exception):
+ # Used to avoid implementing unneeded id generation for old id schemes.
+ pass
+
+
+class ASTBaseBase:
+ def __eq__(self, other: Any) -> bool:
+ if type(self) is not type(other):
+ return False
+ try:
+ for key, value in self.__dict__.items():
+ if value != getattr(other, key):
+ return False
+ except AttributeError:
+ return False
+ return True
+
+ # Defining __hash__ = None is not strictly needed when __eq__ is defined.
+ __hash__ = None # type: ignore[assignment]
+
+ def clone(self) -> Any:
+ return deepcopy(self)
+
+ def _stringify(self, transform: StringifyTransform) -> str:
+ raise NotImplementedError(repr(self))
+
+ def __str__(self) -> str:
+ return self._stringify(lambda ast: str(ast))
+
+ def get_display_string(self) -> str:
+ return self._stringify(lambda ast: ast.get_display_string())
+
+ def __repr__(self) -> str:
+ return '<%s>' % self.__class__.__name__
+
+
+################################################################################
+# Attributes
+################################################################################
+
+class ASTAttribute(ASTBaseBase):
+ def describe_signature(self, signode: TextElement) -> None:
+ raise NotImplementedError(repr(self))
+
+
+class ASTCPPAttribute(ASTAttribute):
+ def __init__(self, arg: str) -> None:
+ self.arg = arg
+
+ def _stringify(self, transform: StringifyTransform) -> str:
+ return "[[" + self.arg + "]]"
+
+ def describe_signature(self, signode: TextElement) -> None:
+ signode.append(addnodes.desc_sig_punctuation('[[', '[['))
+ signode.append(nodes.Text(self.arg))
+ signode.append(addnodes.desc_sig_punctuation(']]', ']]'))
+
+
+class ASTGnuAttribute(ASTBaseBase):
+ def __init__(self, name: str, args: ASTBaseParenExprList | None) -> None:
+ self.name = name
+ self.args = args
+
+ def _stringify(self, transform: StringifyTransform) -> str:
+ res = [self.name]
+ if self.args:
+ res.append(transform(self.args))
+ return ''.join(res)
+
+
+class ASTGnuAttributeList(ASTAttribute):
+ def __init__(self, attrs: list[ASTGnuAttribute]) -> None:
+ self.attrs = attrs
+
+ def _stringify(self, transform: StringifyTransform) -> str:
+ res = ['__attribute__((']
+ first = True
+ for attr in self.attrs:
+ if not first:
+ res.append(', ')
+ first = False
+ res.append(transform(attr))
+ res.append('))')
+ return ''.join(res)
+
+ def describe_signature(self, signode: TextElement) -> None:
+ txt = str(self)
+ signode.append(nodes.Text(txt))
+
+
+class ASTIdAttribute(ASTAttribute):
+ """For simple attributes defined by the user."""
+
+ def __init__(self, id: str) -> None:
+ self.id = id
+
+ def _stringify(self, transform: StringifyTransform) -> str:
+ return self.id
+
+ def describe_signature(self, signode: TextElement) -> None:
+ signode.append(nodes.Text(self.id))
+
+
+class ASTParenAttribute(ASTAttribute):
+ """For paren attributes defined by the user."""
+
+ def __init__(self, id: str, arg: str) -> None:
+ self.id = id
+ self.arg = arg
+
+ def _stringify(self, transform: StringifyTransform) -> str:
+ return self.id + '(' + self.arg + ')'
+
+ def describe_signature(self, signode: TextElement) -> None:
+ txt = str(self)
+ signode.append(nodes.Text(txt))
+
+
+class ASTAttributeList(ASTBaseBase):
+ def __init__(self, attrs: list[ASTAttribute]) -> None:
+ self.attrs = attrs
+
+ def __len__(self) -> int:
+ return len(self.attrs)
+
+ def __add__(self, other: ASTAttributeList) -> ASTAttributeList:
+ return ASTAttributeList(self.attrs + other.attrs)
+
+ def _stringify(self, transform: StringifyTransform) -> str:
+ return ' '.join(transform(attr) for attr in self.attrs)
+
+ def describe_signature(self, signode: TextElement) -> None:
+ if len(self.attrs) == 0:
+ return
+ self.attrs[0].describe_signature(signode)
+ if len(self.attrs) == 1:
+ return
+ for attr in self.attrs[1:]:
+ signode.append(addnodes.desc_sig_space())
+ attr.describe_signature(signode)
+
+
+################################################################################
+
+class ASTBaseParenExprList(ASTBaseBase):
+ pass
+
+
+################################################################################
+
+class UnsupportedMultiCharacterCharLiteral(Exception):
+ pass
+
+
+class DefinitionError(Exception):
+ pass
+
+
+class BaseParser:
+ def __init__(self, definition: str, *,
+ location: nodes.Node | tuple[str, int] | str,
+ config: Config) -> None:
+ self.definition = definition.strip()
+ self.location = location # for warnings
+ self.config = config
+
+ self.pos = 0
+ self.end = len(self.definition)
+ self.last_match: re.Match[str] | None = None
+ self._previous_state: tuple[int, re.Match[str] | None] = (0, None)
+ self.otherErrors: list[DefinitionError] = []
+
+ # in our tests the following is set to False to capture bad parsing
+ self.allowFallbackExpressionParsing = True
+
+ def _make_multi_error(self, errors: list[Any], header: str) -> DefinitionError:
+ if len(errors) == 1:
+ if len(header) > 0:
+ return DefinitionError(header + '\n' + str(errors[0][0]))
+ else:
+ return DefinitionError(str(errors[0][0]))
+ result = [header, '\n']
+ for e in errors:
+ if len(e[1]) > 0:
+ indent = ' '
+ result.append(e[1])
+ result.append(':\n')
+ for line in str(e[0]).split('\n'):
+ if len(line) == 0:
+ continue
+ result.append(indent)
+ result.append(line)
+ result.append('\n')
+ else:
+ result.append(str(e[0]))
+ return DefinitionError(''.join(result))
+
+ @property
+ def language(self) -> str:
+ raise NotImplementedError
+
+ def status(self, msg: str) -> None:
+ # for debugging
+ indicator = '-' * self.pos + '^'
+ logger.debug(f"{msg}\n{self.definition}\n{indicator}") # NoQA: G004
+
+ def fail(self, msg: str) -> None:
+ errors = []
+ indicator = '-' * self.pos + '^'
+ exMain = DefinitionError(
+ 'Invalid %s declaration: %s [error at %d]\n %s\n %s' %
+ (self.language, msg, self.pos, self.definition, indicator))
+ errors.append((exMain, "Main error"))
+ for err in self.otherErrors:
+ errors.append((err, "Potential other error"))
+ self.otherErrors = []
+ raise self._make_multi_error(errors, '')
+
+ def warn(self, msg: str) -> None:
+ logger.warning(msg, location=self.location)
+
+ def match(self, regex: re.Pattern[str]) -> bool:
+ match = regex.match(self.definition, self.pos)
+ if match is not None:
+ self._previous_state = (self.pos, self.last_match)
+ self.pos = match.end()
+ self.last_match = match
+ return True
+ return False
+
+ def skip_string(self, string: str) -> bool:
+ strlen = len(string)
+ if self.definition[self.pos:self.pos + strlen] == string:
+ self.pos += strlen
+ return True
+ return False
+
+ def skip_word(self, word: str) -> bool:
+ return self.match(re.compile(r'\b%s\b' % re.escape(word)))
+
+ def skip_ws(self) -> bool:
+ return self.match(_whitespace_re)
+
+ def skip_word_and_ws(self, word: str) -> bool:
+ if self.skip_word(word):
+ self.skip_ws()
+ return True
+ return False
+
+ def skip_string_and_ws(self, string: str) -> bool:
+ if self.skip_string(string):
+ self.skip_ws()
+ return True
+ return False
+
+ @property
+ def eof(self) -> bool:
+ return self.pos >= self.end
+
+ @property
+ def current_char(self) -> str:
+ try:
+ return self.definition[self.pos]
+ except IndexError:
+ return 'EOF'
+
+ @property
+ def matched_text(self) -> str:
+ if self.last_match is not None:
+ return self.last_match.group()
+ return ''
+
+ def read_rest(self) -> str:
+ rv = self.definition[self.pos:]
+ self.pos = self.end
+ return rv
+
+ def assert_end(self, *, allowSemicolon: bool = False) -> None:
+ self.skip_ws()
+ if allowSemicolon:
+ if not self.eof and self.definition[self.pos:] != ';':
+ self.fail('Expected end of definition or ;.')
+ else:
+ if not self.eof:
+ self.fail('Expected end of definition.')
+
+ ################################################################################
+
+ @property
+ def id_attributes(self):
+ raise NotImplementedError
+
+ @property
+ def paren_attributes(self):
+ raise NotImplementedError
+
+ def _parse_balanced_token_seq(self, end: list[str]) -> str:
+ # TODO: add handling of string literals and similar
+ brackets = {'(': ')', '[': ']', '{': '}'}
+ startPos = self.pos
+ symbols: list[str] = []
+ while not self.eof:
+ if len(symbols) == 0 and self.current_char in end:
+ break
+ if self.current_char in brackets:
+ symbols.append(brackets[self.current_char])
+ elif len(symbols) > 0 and self.current_char == symbols[-1]:
+ symbols.pop()
+ elif self.current_char in ")]}":
+ self.fail("Unexpected '%s' in balanced-token-seq." % self.current_char)
+ self.pos += 1
+ if self.eof:
+ self.fail("Could not find end of balanced-token-seq starting at %d."
+ % startPos)
+ return self.definition[startPos:self.pos]
+
+ def _parse_attribute(self) -> ASTAttribute | None:
+ self.skip_ws()
+ # try C++11 style
+ startPos = self.pos
+ if self.skip_string_and_ws('['):
+ if not self.skip_string('['):
+ self.pos = startPos
+ else:
+ # TODO: actually implement the correct grammar
+ arg = self._parse_balanced_token_seq(end=[']'])
+ if not self.skip_string_and_ws(']'):
+ self.fail("Expected ']' in end of attribute.")
+ if not self.skip_string_and_ws(']'):
+ self.fail("Expected ']' in end of attribute after [[...]")
+ return ASTCPPAttribute(arg)
+
+ # try GNU style
+ if self.skip_word_and_ws('__attribute__'):
+ if not self.skip_string_and_ws('('):
+ self.fail("Expected '(' after '__attribute__'.")
+ if not self.skip_string_and_ws('('):
+ self.fail("Expected '(' after '__attribute__('.")
+ attrs = []
+ while 1:
+ if self.match(identifier_re):
+ name = self.matched_text
+ exprs = self._parse_paren_expression_list()
+ attrs.append(ASTGnuAttribute(name, exprs))
+ if self.skip_string_and_ws(','):
+ continue
+ if self.skip_string_and_ws(')'):
+ break
+ self.fail("Expected identifier, ')', or ',' in __attribute__.")
+ if not self.skip_string_and_ws(')'):
+ self.fail("Expected ')' after '__attribute__((...)'")
+ return ASTGnuAttributeList(attrs)
+
+ # try the simple id attributes defined by the user
+ for id in self.id_attributes:
+ if self.skip_word_and_ws(id):
+ return ASTIdAttribute(id)
+
+ # try the paren attributes defined by the user
+ for id in self.paren_attributes:
+ if not self.skip_string_and_ws(id):
+ continue
+ if not self.skip_string('('):
+ self.fail("Expected '(' after user-defined paren-attribute.")
+ arg = self._parse_balanced_token_seq(end=[')'])
+ if not self.skip_string(')'):
+ self.fail("Expected ')' to end user-defined paren-attribute.")
+ return ASTParenAttribute(id, arg)
+
+ return None
+
+ def _parse_attribute_list(self) -> ASTAttributeList:
+ res = []
+ while True:
+ attr = self._parse_attribute()
+ if attr is None:
+ break
+ res.append(attr)
+ return ASTAttributeList(res)
+
+ def _parse_paren_expression_list(self) -> ASTBaseParenExprList | None:
+ raise NotImplementedError