From cf7da1843c45a4c2df7a749f7886a2d2ba0ee92a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 19:25:40 +0200 Subject: Adding upstream version 7.2.6. Signed-off-by: Daniel Baumann --- sphinx/pycode/__init__.py | 152 ++++++++++++ sphinx/pycode/ast.py | 188 +++++++++++++++ sphinx/pycode/parser.py | 577 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 917 insertions(+) create mode 100644 sphinx/pycode/__init__.py create mode 100644 sphinx/pycode/ast.py create mode 100644 sphinx/pycode/parser.py (limited to 'sphinx/pycode') diff --git a/sphinx/pycode/__init__.py b/sphinx/pycode/__init__.py new file mode 100644 index 0000000..55835ec --- /dev/null +++ b/sphinx/pycode/__init__.py @@ -0,0 +1,152 @@ +"""Utilities parsing and analyzing Python code.""" + +from __future__ import annotations + +import tokenize +from importlib import import_module +from os import path +from typing import TYPE_CHECKING, Any + +from sphinx.errors import PycodeError +from sphinx.pycode.parser import Parser + +if TYPE_CHECKING: + from inspect import Signature + + +class ModuleAnalyzer: + annotations: dict[tuple[str, str], str] + attr_docs: dict[tuple[str, str], list[str]] + finals: list[str] + overloads: dict[str, list[Signature]] + tagorder: dict[str, int] + tags: dict[str, tuple[str, int, int]] + + # cache for analyzer objects -- caches both by module and file name + cache: dict[tuple[str, str], Any] = {} + + @staticmethod + def get_module_source(modname: str) -> tuple[str | None, str | None]: + """Try to find the source code for a module. + + Returns ('filename', 'source'). One of it can be None if + no filename or source found + """ + try: + mod = import_module(modname) + except Exception as err: + raise PycodeError('error importing %r' % modname, err) from err + loader = getattr(mod, '__loader__', None) + filename = getattr(mod, '__file__', None) + if loader and getattr(loader, 'get_source', None): + # prefer Native loader, as it respects #coding directive + try: + source = loader.get_source(modname) + if source: + # no exception and not None - it must be module source + return filename, source + except ImportError: + pass # Try other "source-mining" methods + if filename is None and loader and getattr(loader, 'get_filename', None): + # have loader, but no filename + try: + filename = loader.get_filename(modname) + except ImportError as err: + raise PycodeError('error getting filename for %r' % modname, err) from err + if filename is None: + # all methods for getting filename failed, so raise... + raise PycodeError('no source found for module %r' % modname) + filename = path.normpath(path.abspath(filename)) + if filename.lower().endswith(('.pyo', '.pyc')): + filename = filename[:-1] + if not path.isfile(filename) and path.isfile(filename + 'w'): + filename += 'w' + elif not filename.lower().endswith(('.py', '.pyw')): + raise PycodeError('source is not a .py file: %r' % filename) + + if not path.isfile(filename): + raise PycodeError('source file is not present: %r' % filename) + return filename, None + + @classmethod + def for_string(cls, string: str, modname: str, srcname: str = '', + ) -> ModuleAnalyzer: + return cls(string, modname, srcname) + + @classmethod + def for_file(cls, filename: str, modname: str) -> ModuleAnalyzer: + if ('file', filename) in cls.cache: + return cls.cache['file', filename] + try: + with tokenize.open(filename) as f: + string = f.read() + obj = cls(string, modname, filename) + cls.cache['file', filename] = obj + except Exception as err: + raise PycodeError('error opening %r' % filename, err) from err + return obj + + @classmethod + def for_module(cls, modname: str) -> ModuleAnalyzer: + if ('module', modname) in cls.cache: + entry = cls.cache['module', modname] + if isinstance(entry, PycodeError): + raise entry + return entry + + try: + filename, source = cls.get_module_source(modname) + if source is not None: + obj = cls.for_string(source, modname, filename or '') + elif filename is not None: + obj = cls.for_file(filename, modname) + except PycodeError as err: + cls.cache['module', modname] = err + raise + cls.cache['module', modname] = obj + return obj + + def __init__(self, source: str, modname: str, srcname: str) -> None: + self.modname = modname # name of the module + self.srcname = srcname # name of the source file + + # cache the source code as well + self.code = source + + self._analyzed = False + + def analyze(self) -> None: + """Analyze the source code.""" + if self._analyzed: + return + + try: + parser = Parser(self.code) + parser.parse() + + self.attr_docs = {} + for (scope, comment) in parser.comments.items(): + if comment: + self.attr_docs[scope] = comment.splitlines() + [''] + else: + self.attr_docs[scope] = [''] + + self.annotations = parser.annotations + self.finals = parser.finals + self.overloads = parser.overloads + self.tags = parser.definitions + self.tagorder = parser.deforders + self._analyzed = True + except Exception as exc: + msg = f'parsing {self.srcname!r} failed: {exc!r}' + raise PycodeError(msg) from exc + + def find_attr_docs(self) -> dict[tuple[str, str], list[str]]: + """Find class and module-level attributes and their documentation.""" + self.analyze() + return self.attr_docs + + def find_tags(self) -> dict[str, tuple[str, int, int]]: + """Find class, function and method definitions and their location.""" + self.analyze() + return self.tags diff --git a/sphinx/pycode/ast.py b/sphinx/pycode/ast.py new file mode 100644 index 0000000..e5914cc --- /dev/null +++ b/sphinx/pycode/ast.py @@ -0,0 +1,188 @@ +"""Helpers for AST (Abstract Syntax Tree).""" + +from __future__ import annotations + +import ast +from typing import overload + +OPERATORS: dict[type[ast.AST], str] = { + ast.Add: "+", + ast.And: "and", + ast.BitAnd: "&", + ast.BitOr: "|", + ast.BitXor: "^", + ast.Div: "/", + ast.FloorDiv: "//", + ast.Invert: "~", + ast.LShift: "<<", + ast.MatMult: "@", + ast.Mult: "*", + ast.Mod: "%", + ast.Not: "not", + ast.Pow: "**", + ast.Or: "or", + ast.RShift: ">>", + ast.Sub: "-", + ast.UAdd: "+", + ast.USub: "-", +} + + +@overload +def unparse(node: None, code: str = '') -> None: + ... + + +@overload +def unparse(node: ast.AST, code: str = '') -> str: + ... + + +def unparse(node: ast.AST | None, code: str = '') -> str | None: + """Unparse an AST to string.""" + if node is None: + return None + elif isinstance(node, str): + return node + return _UnparseVisitor(code).visit(node) + + +# a greatly cut-down version of `ast._Unparser` +class _UnparseVisitor(ast.NodeVisitor): + def __init__(self, code: str = '') -> None: + self.code = code + + def _visit_op(self, node: ast.AST) -> str: + return OPERATORS[node.__class__] + for _op in OPERATORS: + locals()[f'visit_{_op.__name__}'] = _visit_op + + def visit_arg(self, node: ast.arg) -> str: + if node.annotation: + return f"{node.arg}: {self.visit(node.annotation)}" + else: + return node.arg + + def _visit_arg_with_default(self, arg: ast.arg, default: ast.AST | None) -> str: + """Unparse a single argument to a string.""" + name = self.visit(arg) + if default: + if arg.annotation: + name += " = %s" % self.visit(default) + else: + name += "=%s" % self.visit(default) + return name + + def visit_arguments(self, node: ast.arguments) -> str: + defaults: list[ast.expr | None] = list(node.defaults) + positionals = len(node.args) + posonlyargs = len(node.posonlyargs) + positionals += posonlyargs + for _ in range(len(defaults), positionals): + defaults.insert(0, None) + + kw_defaults: list[ast.expr | None] = list(node.kw_defaults) + for _ in range(len(kw_defaults), len(node.kwonlyargs)): + kw_defaults.insert(0, None) + + args: list[str] = [] + for i, arg in enumerate(node.posonlyargs): + args.append(self._visit_arg_with_default(arg, defaults[i])) + + if node.posonlyargs: + args.append('/') + + for i, arg in enumerate(node.args): + args.append(self._visit_arg_with_default(arg, defaults[i + posonlyargs])) + + if node.vararg: + args.append("*" + self.visit(node.vararg)) + + if node.kwonlyargs and not node.vararg: + args.append('*') + for i, arg in enumerate(node.kwonlyargs): + args.append(self._visit_arg_with_default(arg, kw_defaults[i])) + + if node.kwarg: + args.append("**" + self.visit(node.kwarg)) + + return ", ".join(args) + + def visit_Attribute(self, node: ast.Attribute) -> str: + return f"{self.visit(node.value)}.{node.attr}" + + def visit_BinOp(self, node: ast.BinOp) -> str: + # Special case ``**`` to not have surrounding spaces. + if isinstance(node.op, ast.Pow): + return "".join(map(self.visit, (node.left, node.op, node.right))) + return " ".join(self.visit(e) for e in [node.left, node.op, node.right]) + + def visit_BoolOp(self, node: ast.BoolOp) -> str: + op = " %s " % self.visit(node.op) + return op.join(self.visit(e) for e in node.values) + + def visit_Call(self, node: ast.Call) -> str: + args = ', '.join([self.visit(e) for e in node.args] + + [f"{k.arg}={self.visit(k.value)}" for k in node.keywords]) + return f"{self.visit(node.func)}({args})" + + def visit_Constant(self, node: ast.Constant) -> str: + if node.value is Ellipsis: + return "..." + elif isinstance(node.value, (int, float, complex)): + if self.code: + return ast.get_source_segment(self.code, node) or repr(node.value) + else: + return repr(node.value) + else: + return repr(node.value) + + def visit_Dict(self, node: ast.Dict) -> str: + keys = (self.visit(k) for k in node.keys if k is not None) + values = (self.visit(v) for v in node.values) + items = (k + ": " + v for k, v in zip(keys, values)) + return "{" + ", ".join(items) + "}" + + def visit_Lambda(self, node: ast.Lambda) -> str: + return "lambda %s: ..." % self.visit(node.args) + + def visit_List(self, node: ast.List) -> str: + return "[" + ", ".join(self.visit(e) for e in node.elts) + "]" + + def visit_Name(self, node: ast.Name) -> str: + return node.id + + def visit_Set(self, node: ast.Set) -> str: + return "{" + ", ".join(self.visit(e) for e in node.elts) + "}" + + def visit_Subscript(self, node: ast.Subscript) -> str: + def is_simple_tuple(value: ast.expr) -> bool: + return ( + isinstance(value, ast.Tuple) + and bool(value.elts) + and not any(isinstance(elt, ast.Starred) for elt in value.elts) + ) + + if is_simple_tuple(node.slice): + elts = ", ".join(self.visit(e) + for e in node.slice.elts) # type: ignore[attr-defined] + return f"{self.visit(node.value)}[{elts}]" + return f"{self.visit(node.value)}[{self.visit(node.slice)}]" + + def visit_UnaryOp(self, node: ast.UnaryOp) -> str: + # UnaryOp is one of {UAdd, USub, Invert, Not}, which refer to ``+x``, + # ``-x``, ``~x``, and ``not x``. Only Not needs a space. + if isinstance(node.op, ast.Not): + return f"{self.visit(node.op)} {self.visit(node.operand)}" + return f"{self.visit(node.op)}{self.visit(node.operand)}" + + def visit_Tuple(self, node: ast.Tuple) -> str: + if len(node.elts) == 0: + return "()" + elif len(node.elts) == 1: + return "(%s,)" % self.visit(node.elts[0]) + else: + return "(" + ", ".join(self.visit(e) for e in node.elts) + ")" + + def generic_visit(self, node): + raise NotImplementedError('Unable to parse %s object' % type(node).__name__) diff --git a/sphinx/pycode/parser.py b/sphinx/pycode/parser.py new file mode 100644 index 0000000..a0f855d --- /dev/null +++ b/sphinx/pycode/parser.py @@ -0,0 +1,577 @@ +"""Utilities parsing and analyzing Python code.""" + +from __future__ import annotations + +import ast +import contextlib +import inspect +import itertools +import re +import tokenize +from inspect import Signature +from token import DEDENT, INDENT, NAME, NEWLINE, NUMBER, OP, STRING +from tokenize import COMMENT, NL +from typing import Any + +from sphinx.pycode.ast import unparse as ast_unparse + +comment_re = re.compile('^\\s*#: ?(.*)\r?\n?$') +indent_re = re.compile('^\\s*$') +emptyline_re = re.compile('^\\s*(#.*)?$') + + +def filter_whitespace(code: str) -> str: + return code.replace('\f', ' ') # replace FF (form feed) with whitespace + + +def get_assign_targets(node: ast.AST) -> list[ast.expr]: + """Get list of targets from Assign and AnnAssign node.""" + if isinstance(node, ast.Assign): + return node.targets + else: + return [node.target] # type: ignore[attr-defined] + + +def get_lvar_names(node: ast.AST, self: ast.arg | None = None) -> list[str]: + """Convert assignment-AST to variable names. + + This raises `TypeError` if the assignment does not create new variable:: + + ary[0] = 'foo' + dic["bar"] = 'baz' + # => TypeError + """ + if self: + self_id = self.arg + + node_name = node.__class__.__name__ + if node_name in ('Constant', 'Index', 'Slice', 'Subscript'): + raise TypeError('%r does not create new variable' % node) + if node_name == 'Name': + if self is None or node.id == self_id: # type: ignore[attr-defined] + return [node.id] # type: ignore[attr-defined] + else: + raise TypeError('The assignment %r is not instance variable' % node) + elif node_name in ('Tuple', 'List'): + members = [] + for elt in node.elts: # type: ignore[attr-defined] + with contextlib.suppress(TypeError): + members.extend(get_lvar_names(elt, self)) + + return members + elif node_name == 'Attribute': + if ( + node.value.__class__.__name__ == 'Name' and # type: ignore[attr-defined] + self and node.value.id == self_id # type: ignore[attr-defined] + ): + # instance variable + return ["%s" % get_lvar_names(node.attr, self)[0]] # type: ignore[attr-defined] + else: + raise TypeError('The assignment %r is not instance variable' % node) + elif node_name == 'str': + return [node] # type: ignore[list-item] + elif node_name == 'Starred': + return get_lvar_names(node.value, self) # type: ignore[attr-defined] + else: + raise NotImplementedError('Unexpected node name %r' % node_name) + + +def dedent_docstring(s: str) -> str: + """Remove common leading indentation from docstring.""" + def dummy() -> None: + # dummy function to mock `inspect.getdoc`. + pass + + dummy.__doc__ = s + docstring = inspect.getdoc(dummy) + if docstring: + return docstring.lstrip("\r\n").rstrip("\r\n") + else: + return "" + + +class Token: + """Better token wrapper for tokenize module.""" + + def __init__(self, kind: int, value: Any, start: tuple[int, int], end: tuple[int, int], + source: str) -> None: + self.kind = kind + self.value = value + self.start = start + self.end = end + self.source = source + + def __eq__(self, other: Any) -> bool: + if isinstance(other, int): + return self.kind == other + elif isinstance(other, str): + return self.value == other + elif isinstance(other, (list, tuple)): + return [self.kind, self.value] == list(other) + elif other is None: + return False + else: + raise ValueError('Unknown value: %r' % other) + + def match(self, *conditions: Any) -> bool: + return any(self == candidate for candidate in conditions) + + def __repr__(self) -> str: + return f'' + + +class TokenProcessor: + def __init__(self, buffers: list[str]) -> None: + lines = iter(buffers) + self.buffers = buffers + self.tokens = tokenize.generate_tokens(lambda: next(lines)) + self.current: Token | None = None + self.previous: Token | None = None + + def get_line(self, lineno: int) -> str: + """Returns specified line.""" + return self.buffers[lineno - 1] + + def fetch_token(self) -> Token | None: + """Fetch the next token from source code. + + Returns ``None`` if sequence finished. + """ + try: + self.previous = self.current + self.current = Token(*next(self.tokens)) + except StopIteration: + self.current = None + + return self.current + + def fetch_until(self, condition: Any) -> list[Token]: + """Fetch tokens until specified token appeared. + + .. note:: This also handles parenthesis well. + """ + tokens = [] + while current := self.fetch_token(): + tokens.append(current) + if current == condition: + break + if current == [OP, '(']: + tokens += self.fetch_until([OP, ')']) + elif current == [OP, '{']: + tokens += self.fetch_until([OP, '}']) + elif current == [OP, '[']: + tokens += self.fetch_until([OP, ']']) + + return tokens + + +class AfterCommentParser(TokenProcessor): + """Python source code parser to pick up comments after assignments. + + This parser takes code which starts with an assignment statement, + and returns the comment for the variable if one exists. + """ + + def __init__(self, lines: list[str]) -> None: + super().__init__(lines) + self.comment: str | None = None + + def fetch_rvalue(self) -> list[Token]: + """Fetch right-hand value of assignment.""" + tokens = [] + while current := self.fetch_token(): + tokens.append(current) + if current == [OP, '(']: + tokens += self.fetch_until([OP, ')']) + elif current == [OP, '{']: + tokens += self.fetch_until([OP, '}']) + elif current == [OP, '[']: + tokens += self.fetch_until([OP, ']']) + elif current == INDENT: + tokens += self.fetch_until(DEDENT) + elif current == [OP, ';']: # NoQA: SIM114 + break + elif current and current.kind not in {OP, NAME, NUMBER, STRING}: + break + + return tokens + + def parse(self) -> None: + """Parse the code and obtain comment after assignment.""" + # skip lvalue (or whole of AnnAssign) + while (tok := self.fetch_token()) and not tok.match([OP, '='], NEWLINE, COMMENT): + assert tok + assert tok is not None + + # skip rvalue (if exists) + if tok == [OP, '=']: + self.fetch_rvalue() + tok = self.current + assert tok is not None + + if tok == COMMENT: + self.comment = tok.value + + +class VariableCommentPicker(ast.NodeVisitor): + """Python source code parser to pick up variable comments.""" + + def __init__(self, buffers: list[str], encoding: str) -> None: + self.counter = itertools.count() + self.buffers = buffers + self.encoding = encoding + self.context: list[str] = [] + self.current_classes: list[str] = [] + self.current_function: ast.FunctionDef | None = None + self.comments: dict[tuple[str, str], str] = {} + self.annotations: dict[tuple[str, str], str] = {} + self.previous: ast.AST | None = None + self.deforders: dict[str, int] = {} + self.finals: list[str] = [] + self.overloads: dict[str, list[Signature]] = {} + self.typing: str | None = None + self.typing_final: str | None = None + self.typing_overload: str | None = None + super().__init__() + + def get_qualname_for(self, name: str) -> list[str] | None: + """Get qualified name for given object as a list of string(s).""" + if self.current_function: + if self.current_classes and self.context[-1] == "__init__": + # store variable comments inside __init__ method of classes + return self.context[:-1] + [name] + else: + return None + else: + return self.context + [name] + + def add_entry(self, name: str) -> None: + qualname = self.get_qualname_for(name) + if qualname: + self.deforders[".".join(qualname)] = next(self.counter) + + def add_final_entry(self, name: str) -> None: + qualname = self.get_qualname_for(name) + if qualname: + self.finals.append(".".join(qualname)) + + def add_overload_entry(self, func: ast.FunctionDef) -> None: + # avoid circular import problem + from sphinx.util.inspect import signature_from_ast + qualname = self.get_qualname_for(func.name) + if qualname: + overloads = self.overloads.setdefault(".".join(qualname), []) + overloads.append(signature_from_ast(func)) + + def add_variable_comment(self, name: str, comment: str) -> None: + qualname = self.get_qualname_for(name) + if qualname: + basename = ".".join(qualname[:-1]) + self.comments[(basename, name)] = comment + + def add_variable_annotation(self, name: str, annotation: ast.AST) -> None: + qualname = self.get_qualname_for(name) + if qualname: + basename = ".".join(qualname[:-1]) + self.annotations[(basename, name)] = ast_unparse(annotation) + + def is_final(self, decorators: list[ast.expr]) -> bool: + final = [] + if self.typing: + final.append('%s.final' % self.typing) + if self.typing_final: + final.append(self.typing_final) + + for decorator in decorators: + try: + if ast_unparse(decorator) in final: + return True + except NotImplementedError: + pass + + return False + + def is_overload(self, decorators: list[ast.expr]) -> bool: + overload = [] + if self.typing: + overload.append('%s.overload' % self.typing) + if self.typing_overload: + overload.append(self.typing_overload) + + for decorator in decorators: + try: + if ast_unparse(decorator) in overload: + return True + except NotImplementedError: + pass + + return False + + def get_self(self) -> ast.arg | None: + """Returns the name of the first argument if in a function.""" + if self.current_function and self.current_function.args.args: + return self.current_function.args.args[0] + if self.current_function and self.current_function.args.posonlyargs: + return self.current_function.args.posonlyargs[0] + return None + + def get_line(self, lineno: int) -> str: + """Returns specified line.""" + return self.buffers[lineno - 1] + + def visit(self, node: ast.AST) -> None: + """Updates self.previous to the given node.""" + super().visit(node) + self.previous = node + + def visit_Import(self, node: ast.Import) -> None: + """Handles Import node and record the order of definitions.""" + for name in node.names: + self.add_entry(name.asname or name.name) + + if name.name == 'typing': + self.typing = name.asname or name.name + elif name.name == 'typing.final': + self.typing_final = name.asname or name.name + elif name.name == 'typing.overload': + self.typing_overload = name.asname or name.name + + def visit_ImportFrom(self, node: ast.ImportFrom) -> None: + """Handles Import node and record the order of definitions.""" + for name in node.names: + self.add_entry(name.asname or name.name) + + if node.module == 'typing' and name.name == 'final': + self.typing_final = name.asname or name.name + elif node.module == 'typing' and name.name == 'overload': + self.typing_overload = name.asname or name.name + + def visit_Assign(self, node: ast.Assign) -> None: + """Handles Assign node and pick up a variable comment.""" + try: + targets = get_assign_targets(node) + varnames: list[str] = sum( + [get_lvar_names(t, self=self.get_self()) for t in targets], [], + ) + current_line = self.get_line(node.lineno) + except TypeError: + return # this assignment is not new definition! + + # record annotation + if hasattr(node, 'annotation') and node.annotation: + for varname in varnames: + self.add_variable_annotation(varname, node.annotation) + elif hasattr(node, 'type_comment') and node.type_comment: + for varname in varnames: + self.add_variable_annotation( + varname, node.type_comment) # type: ignore[arg-type] + + # check comments after assignment + parser = AfterCommentParser([current_line[node.col_offset:]] + + self.buffers[node.lineno:]) + parser.parse() + if parser.comment and comment_re.match(parser.comment): + for varname in varnames: + self.add_variable_comment(varname, comment_re.sub('\\1', parser.comment)) + self.add_entry(varname) + return + + # check comments before assignment + if indent_re.match(current_line[:node.col_offset]): + comment_lines = [] + for i in range(node.lineno - 1): + before_line = self.get_line(node.lineno - 1 - i) + if comment_re.match(before_line): + comment_lines.append(comment_re.sub('\\1', before_line)) + else: + break + + if comment_lines: + comment = dedent_docstring('\n'.join(reversed(comment_lines))) + for varname in varnames: + self.add_variable_comment(varname, comment) + self.add_entry(varname) + return + + # not commented (record deforders only) + for varname in varnames: + self.add_entry(varname) + + def visit_AnnAssign(self, node: ast.AnnAssign) -> None: + """Handles AnnAssign node and pick up a variable comment.""" + self.visit_Assign(node) # type: ignore[arg-type] + + def visit_Expr(self, node: ast.Expr) -> None: + """Handles Expr node and pick up a comment if string.""" + if (isinstance(self.previous, (ast.Assign, ast.AnnAssign)) and + isinstance(node.value, ast.Constant) and isinstance(node.value.value, str)): + try: + targets = get_assign_targets(self.previous) + varnames = get_lvar_names(targets[0], self.get_self()) + for varname in varnames: + if isinstance(node.value.value, str): + docstring = node.value.value + else: + docstring = node.value.value.decode(self.encoding or 'utf-8') + + self.add_variable_comment(varname, dedent_docstring(docstring)) + self.add_entry(varname) + except TypeError: + pass # this assignment is not new definition! + + def visit_Try(self, node: ast.Try) -> None: + """Handles Try node and processes body and else-clause. + + .. note:: pycode parser ignores objects definition in except-clause. + """ + for subnode in node.body: + self.visit(subnode) + for subnode in node.orelse: + self.visit(subnode) + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + """Handles ClassDef node and set context.""" + self.current_classes.append(node.name) + self.add_entry(node.name) + if self.is_final(node.decorator_list): + self.add_final_entry(node.name) + self.context.append(node.name) + self.previous = node + for child in node.body: + self.visit(child) + self.context.pop() + self.current_classes.pop() + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + """Handles FunctionDef node and set context.""" + if self.current_function is None: + self.add_entry(node.name) # should be called before setting self.current_function + if self.is_final(node.decorator_list): + self.add_final_entry(node.name) + if self.is_overload(node.decorator_list): + self.add_overload_entry(node) + self.context.append(node.name) + self.current_function = node + for child in node.body: + self.visit(child) + self.context.pop() + self.current_function = None + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + """Handles AsyncFunctionDef node and set context.""" + self.visit_FunctionDef(node) # type: ignore[arg-type] + + +class DefinitionFinder(TokenProcessor): + """Python source code parser to detect location of functions, + classes and methods. + """ + + def __init__(self, lines: list[str]) -> None: + super().__init__(lines) + self.decorator: Token | None = None + self.context: list[str] = [] + self.indents: list[tuple[str, str | None, int | None]] = [] + self.definitions: dict[str, tuple[str, int, int]] = {} + + def add_definition(self, name: str, entry: tuple[str, int, int]) -> None: + """Add a location of definition.""" + if self.indents and self.indents[-1][0] == 'def' and entry[0] == 'def': + # ignore definition of inner function + pass + else: + self.definitions[name] = entry + + def parse(self) -> None: + """Parse the code to obtain location of definitions.""" + while True: + token = self.fetch_token() + if token is None: + break + if token == COMMENT: + pass + elif token == [OP, '@'] and (self.previous is None or + self.previous.match(NEWLINE, NL, INDENT, DEDENT)): + if self.decorator is None: + self.decorator = token + elif token.match([NAME, 'class']): + self.parse_definition('class') + elif token.match([NAME, 'def']): + self.parse_definition('def') + elif token == INDENT: + self.indents.append(('other', None, None)) + elif token == DEDENT: + self.finalize_block() + + def parse_definition(self, typ: str) -> None: + """Parse AST of definition.""" + name = self.fetch_token() + self.context.append(name.value) # type: ignore[union-attr] + funcname = '.'.join(self.context) + + if self.decorator: + start_pos = self.decorator.start[0] + self.decorator = None + else: + start_pos = name.start[0] # type: ignore[union-attr] + + self.fetch_until([OP, ':']) + if self.fetch_token().match(COMMENT, NEWLINE): # type: ignore[union-attr] + self.fetch_until(INDENT) + self.indents.append((typ, funcname, start_pos)) + else: + # one-liner + self.add_definition(funcname, + (typ, start_pos, name.end[0])) # type: ignore[union-attr] + self.context.pop() + + def finalize_block(self) -> None: + """Finalize definition block.""" + definition = self.indents.pop() + if definition[0] != 'other': + typ, funcname, start_pos = definition + end_pos = self.current.end[0] - 1 # type: ignore[union-attr] + while emptyline_re.match(self.get_line(end_pos)): + end_pos -= 1 + + self.add_definition(funcname, (typ, start_pos, end_pos)) # type: ignore[arg-type] + self.context.pop() + + +class Parser: + """Python source code parser to pick up variable comments. + + This is a better wrapper for ``VariableCommentPicker``. + """ + + def __init__(self, code: str, encoding: str = 'utf-8') -> None: + self.code = filter_whitespace(code) + self.encoding = encoding + self.annotations: dict[tuple[str, str], str] = {} + self.comments: dict[tuple[str, str], str] = {} + self.deforders: dict[str, int] = {} + self.definitions: dict[str, tuple[str, int, int]] = {} + self.finals: list[str] = [] + self.overloads: dict[str, list[Signature]] = {} + + def parse(self) -> None: + """Parse the source code.""" + self.parse_comments() + self.parse_definition() + + def parse_comments(self) -> None: + """Parse the code and pick up comments.""" + tree = ast.parse(self.code, type_comments=True) + picker = VariableCommentPicker(self.code.splitlines(True), self.encoding) + picker.visit(tree) + self.annotations = picker.annotations + self.comments = picker.comments + self.deforders = picker.deforders + self.finals = picker.finals + self.overloads = picker.overloads + + def parse_definition(self) -> None: + """Parse the location of definitions from the code.""" + parser = DefinitionFinder(self.code.splitlines(True)) + parser.parse() + self.definitions = parser.definitions -- cgit v1.2.3