1 files changed, 577 insertions, 0 deletions
diff --git a/sphinx/pycode/parser.py b/sphinx/pycode/parser.py
new file mode 100644
index 0000000..a0f855d
--- /dev/null
+++ b/sphinx/pycode/parser.py
@@ -0,0 +1,577 @@
+"""Utilities parsing and analyzing Python code."""
+
+from __future__ import annotations
+
+import ast
+import contextlib
+import inspect
+import itertools
+import re
+import tokenize
+from inspect import Signature
+from token import DEDENT, INDENT, NAME, NEWLINE, NUMBER, OP, STRING
+from tokenize import COMMENT, NL
+from typing import Any
+
+from sphinx.pycode.ast import unparse as ast_unparse
+
+comment_re = re.compile('^\\s*#: ?(.*)\r?\n?$')
+indent_re = re.compile('^\\s*$')
+emptyline_re = re.compile('^\\s*(#.*)?$')
+
+
+def filter_whitespace(code: str) -> str:
+    return code.replace('\f', ' ')  # replace FF (form feed) with whitespace
+
+
+def get_assign_targets(node: ast.AST) -> list[ast.expr]:
+    """Get list of targets from Assign and AnnAssign node."""
+    if isinstance(node, ast.Assign):
+        return node.targets
+    else:
+        return [node.target]  # type: ignore[attr-defined]
+
+
+def get_lvar_names(node: ast.AST, self: ast.arg | None = None) -> list[str]:
+    """Convert assignment-AST to variable names.
+
+    This raises `TypeError` if the assignment does not create new variable::
+
+        ary[0] = 'foo'
+        dic["bar"] = 'baz'
+        # => TypeError
+    """
+    if self:
+        self_id = self.arg
+
+    node_name = node.__class__.__name__
+    if node_name in ('Constant', 'Index', 'Slice', 'Subscript'):
+        raise TypeError('%r does not create new variable' % node)
+    if node_name == 'Name':
+        if self is None or node.id == self_id:  # type: ignore[attr-defined]
+            return [node.id]  # type: ignore[attr-defined]
+        else:
+            raise TypeError('The assignment %r is not instance variable' % node)
+    elif node_name in ('Tuple', 'List'):
+        members = []
+        for elt in node.elts:  # type: ignore[attr-defined]
+            with contextlib.suppress(TypeError):
+                members.extend(get_lvar_names(elt, self))
+
+        return members
+    elif node_name == 'Attribute':
+        if (
+            node.value.__class__.__name__ == 'Name' and  # type: ignore[attr-defined]
+            self and node.value.id == self_id  # type: ignore[attr-defined]
+        ):
+            # instance variable
+            return ["%s" % get_lvar_names(node.attr, self)[0]]  # type: ignore[attr-defined]
+        else:
+            raise TypeError('The assignment %r is not instance variable' % node)
+    elif node_name == 'str':
+        return [node]  # type: ignore[list-item]
+    elif node_name == 'Starred':
+        return get_lvar_names(node.value, self)  # type: ignore[attr-defined]
+    else:
+        raise NotImplementedError('Unexpected node name %r' % node_name)
+
+
+def dedent_docstring(s: str) -> str:
+    """Remove common leading indentation from docstring."""
+    def dummy() -> None:
+        # dummy function to mock `inspect.getdoc`.
+        pass
+
+    dummy.__doc__ = s
+    docstring = inspect.getdoc(dummy)
+    if docstring:
+        return docstring.lstrip("\r\n").rstrip("\r\n")
+    else:
+        return ""
+
+
+class Token:
+    """Better token wrapper for tokenize module."""
+
+    def __init__(self, kind: int, value: Any, start: tuple[int, int], end: tuple[int, int],
+                 source: str) -> None:
+        self.kind = kind
+        self.value = value
+        self.start = start
+        self.end = end
+        self.source = source
+
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, int):
+            return self.kind == other
+        elif isinstance(other, str):
+            return self.value == other
+        elif isinstance(other, (list, tuple)):
+            return [self.kind, self.value] == list(other)
+        elif other is None:
+            return False
+        else:
+            raise ValueError('Unknown value: %r' % other)
+
+    def match(self, *conditions: Any) -> bool:
+        return any(self == candidate for candidate in conditions)
+
+    def __repr__(self) -> str:
+        return f'<Token kind={tokenize.tok_name[self.kind]!r} value={self.value.strip()!r}>'
+
+
+class TokenProcessor:
+    def __init__(self, buffers: list[str]) -> None:
+        lines = iter(buffers)
+        self.buffers = buffers
+        self.tokens = tokenize.generate_tokens(lambda: next(lines))
+        self.current: Token | None = None
+        self.previous: Token | None = None
+
+    def get_line(self, lineno: int) -> str:
+        """Returns specified line."""
+        return self.buffers[lineno - 1]
+
+    def fetch_token(self) -> Token | None:
+        """Fetch the next token from source code.
+
+        Returns ``None`` if sequence finished.
+        """
+        try:
+            self.previous = self.current
+            self.current = Token(*next(self.tokens))
+        except StopIteration:
+            self.current = None
+
+        return self.current
+
+    def fetch_until(self, condition: Any) -> list[Token]:
+        """Fetch tokens until specified token appeared.
+
+        .. note:: This also handles parenthesis well.
+        """
+        tokens = []
+        while current := self.fetch_token():
+            tokens.append(current)
+            if current == condition:
+                break
+            if current == [OP, '(']:
+                tokens += self.fetch_until([OP, ')'])
+            elif current == [OP, '{']:
+                tokens += self.fetch_until([OP, '}'])
+            elif current == [OP, '[']:
+                tokens += self.fetch_until([OP, ']'])
+
+        return tokens
+
+
+class AfterCommentParser(TokenProcessor):
+    """Python source code parser to pick up comments after assignments.
+
+    This parser takes code which starts with an assignment statement,
+    and returns the comment for the variable if one exists.
+    """
+
+    def __init__(self, lines: list[str]) -> None:
+        super().__init__(lines)
+        self.comment: str | None = None
+
+    def fetch_rvalue(self) -> list[Token]:
+        """Fetch right-hand value of assignment."""
+        tokens = []
+        while current := self.fetch_token():
+            tokens.append(current)
+            if current == [OP, '(']:
+                tokens += self.fetch_until([OP, ')'])
+            elif current == [OP, '{']:
+                tokens += self.fetch_until([OP, '}'])
+            elif current == [OP, '[']:
+                tokens += self.fetch_until([OP, ']'])
+            elif current == INDENT:
+                tokens += self.fetch_until(DEDENT)
+            elif current == [OP, ';']:  # NoQA: SIM114
+                break
+            elif current and current.kind not in {OP, NAME, NUMBER, STRING}:
+                break
+
+        return tokens
+
+    def parse(self) -> None:
+        """Parse the code and obtain comment after assignment."""
+        # skip lvalue (or whole of AnnAssign)
+        while (tok := self.fetch_token()) and not tok.match([OP, '='], NEWLINE, COMMENT):
+            assert tok
+        assert tok is not None
+
+        # skip rvalue (if exists)
+        if tok == [OP, '=']:
+            self.fetch_rvalue()
+            tok = self.current
+            assert tok is not None
+
+        if tok == COMMENT:
+            self.comment = tok.value
+
+
+class VariableCommentPicker(ast.NodeVisitor):
+    """Python source code parser to pick up variable comments."""
+
+    def __init__(self, buffers: list[str], encoding: str) -> None:
+        self.counter = itertools.count()
+        self.buffers = buffers
+        self.encoding = encoding
+        self.context: list[str] = []
+        self.current_classes: list[str] = []
+        self.current_function: ast.FunctionDef | None = None
+        self.comments: dict[tuple[str, str], str] = {}
+        self.annotations: dict[tuple[str, str], str] = {}
+        self.previous: ast.AST | None = None
+        self.deforders: dict[str, int] = {}
+        self.finals: list[str] = []
+        self.overloads: dict[str, list[Signature]] = {}
+        self.typing: str | None = None
+        self.typing_final: str | None = None
+        self.typing_overload: str | None = None
+        super().__init__()
+
+    def get_qualname_for(self, name: str) -> list[str] | None:
+        """Get qualified name for given object as a list of string(s)."""
+        if self.current_function:
+            if self.current_classes and self.context[-1] == "__init__":
+                # store variable comments inside __init__ method of classes
+                return self.context[:-1] + [name]
+            else:
+                return None
+        else:
+            return self.context + [name]
+
+    def add_entry(self, name: str) -> None:
+        qualname = self.get_qualname_for(name)
+        if qualname:
+            self.deforders[".".join(qualname)] = next(self.counter)
+
+    def add_final_entry(self, name: str) -> None:
+        qualname = self.get_qualname_for(name)
+        if qualname:
+            self.finals.append(".".join(qualname))
+
+    def add_overload_entry(self, func: ast.FunctionDef) -> None:
+        # avoid circular import problem
+        from sphinx.util.inspect import signature_from_ast
+        qualname = self.get_qualname_for(func.name)
+        if qualname:
+            overloads = self.overloads.setdefault(".".join(qualname), [])
+            overloads.append(signature_from_ast(func))
+
+    def add_variable_comment(self, name: str, comment: str) -> None:
+        qualname = self.get_qualname_for(name)
+        if qualname:
+            basename = ".".join(qualname[:-1])
+            self.comments[(basename, name)] = comment
+
+    def add_variable_annotation(self, name: str, annotation: ast.AST) -> None:
+        qualname = self.get_qualname_for(name)
+        if qualname:
+            basename = ".".join(qualname[:-1])
+            self.annotations[(basename, name)] = ast_unparse(annotation)
+
+    def is_final(self, decorators: list[ast.expr]) -> bool:
+        final = []
+        if self.typing:
+            final.append('%s.final' % self.typing)
+        if self.typing_final:
+            final.append(self.typing_final)
+
+        for decorator in decorators:
+            try:
+                if ast_unparse(decorator) in final:
+                    return True
+            except NotImplementedError:
+                pass
+
+        return False
+
+    def is_overload(self, decorators: list[ast.expr]) -> bool:
+        overload = []
+        if self.typing:
+            overload.append('%s.overload' % self.typing)
+        if self.typing_overload:
+            overload.append(self.typing_overload)
+
+        for decorator in decorators:
+            try:
+                if ast_unparse(decorator) in overload:
+                    return True
+            except NotImplementedError:
+                pass
+
+        return False
+
+    def get_self(self) -> ast.arg | None:
+        """Returns the name of the first argument if in a function."""
+        if self.current_function and self.current_function.args.args:
+            return self.current_function.args.args[0]
+        if self.current_function and self.current_function.args.posonlyargs:
+            return self.current_function.args.posonlyargs[0]
+        return None
+
+    def get_line(self, lineno: int) -> str:
+        """Returns specified line."""
+        return self.buffers[lineno - 1]
+
+    def visit(self, node: ast.AST) -> None:
+        """Updates self.previous to the given node."""
+        super().visit(node)
+        self.previous = node
+
+    def visit_Import(self, node: ast.Import) -> None:
+        """Handles Import node and record the order of definitions."""
+        for name in node.names:
+            self.add_entry(name.asname or name.name)
+
+            if name.name == 'typing':
+                self.typing = name.asname or name.name
+            elif name.name == 'typing.final':
+                self.typing_final = name.asname or name.name
+            elif name.name == 'typing.overload':
+                self.typing_overload = name.asname or name.name
+
+    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
+        """Handles Import node and record the order of definitions."""
+        for name in node.names:
+            self.add_entry(name.asname or name.name)
+
+            if node.module == 'typing' and name.name == 'final':
+                self.typing_final = name.asname or name.name
+            elif node.module == 'typing' and name.name == 'overload':
+                self.typing_overload = name.asname or name.name
+
+    def visit_Assign(self, node: ast.Assign) -> None:
+        """Handles Assign node and pick up a variable comment."""
+        try:
+            targets = get_assign_targets(node)
+            varnames: list[str] = sum(
+                [get_lvar_names(t, self=self.get_self()) for t in targets], [],
+            )
+            current_line = self.get_line(node.lineno)
+        except TypeError:
+            return  # this assignment is not new definition!
+
+        # record annotation
+        if hasattr(node, 'annotation') and node.annotation:
+            for varname in varnames:
+                self.add_variable_annotation(varname, node.annotation)
+        elif hasattr(node, 'type_comment') and node.type_comment:
+            for varname in varnames:
+                self.add_variable_annotation(
+                    varname, node.type_comment)  # type: ignore[arg-type]
+
+        # check comments after assignment
+        parser = AfterCommentParser([current_line[node.col_offset:]] +
+                                    self.buffers[node.lineno:])
+        parser.parse()
+        if parser.comment and comment_re.match(parser.comment):
+            for varname in varnames:
+                self.add_variable_comment(varname, comment_re.sub('\\1', parser.comment))
+                self.add_entry(varname)
+            return
+
+        # check comments before assignment
+        if indent_re.match(current_line[:node.col_offset]):
+            comment_lines = []
+            for i in range(node.lineno - 1):
+                before_line = self.get_line(node.lineno - 1 - i)
+                if comment_re.match(before_line):
+                    comment_lines.append(comment_re.sub('\\1', before_line))
+                else:
+                    break
+
+            if comment_lines:
+                comment = dedent_docstring('\n'.join(reversed(comment_lines)))
+                for varname in varnames:
+                    self.add_variable_comment(varname, comment)
+                    self.add_entry(varname)
+                return
+
+        # not commented (record deforders only)
+        for varname in varnames:
+            self.add_entry(varname)
+
+    def visit_AnnAssign(self, node: ast.AnnAssign) -> None:
+        """Handles AnnAssign node and pick up a variable comment."""
+        self.visit_Assign(node)  # type: ignore[arg-type]
+
+    def visit_Expr(self, node: ast.Expr) -> None:
+        """Handles Expr node and pick up a comment if string."""
+        if (isinstance(self.previous, (ast.Assign, ast.AnnAssign)) and
+                isinstance(node.value, ast.Constant) and isinstance(node.value.value, str)):
+            try:
+                targets = get_assign_targets(self.previous)
+                varnames = get_lvar_names(targets[0], self.get_self())
+                for varname in varnames:
+                    if isinstance(node.value.value, str):
+                        docstring = node.value.value
+                    else:
+                        docstring = node.value.value.decode(self.encoding or 'utf-8')
+
+                    self.add_variable_comment(varname, dedent_docstring(docstring))
+                    self.add_entry(varname)
+            except TypeError:
+                pass  # this assignment is not new definition!
+
+    def visit_Try(self, node: ast.Try) -> None:
+        """Handles Try node and processes body and else-clause.
+
+        .. note:: pycode parser ignores objects definition in except-clause.
+        """
+        for subnode in node.body:
+            self.visit(subnode)
+        for subnode in node.orelse:
+            self.visit(subnode)
+
+    def visit_ClassDef(self, node: ast.ClassDef) -> None:
+        """Handles ClassDef node and set context."""
+        self.current_classes.append(node.name)
+        self.add_entry(node.name)
+        if self.is_final(node.decorator_list):
+            self.add_final_entry(node.name)
+        self.context.append(node.name)
+        self.previous = node
+        for child in node.body:
+            self.visit(child)
+        self.context.pop()
+        self.current_classes.pop()
+
+    def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
+        """Handles FunctionDef node and set context."""
+        if self.current_function is None:
+            self.add_entry(node.name)  # should be called before setting self.current_function
+            if self.is_final(node.decorator_list):
+                self.add_final_entry(node.name)
+            if self.is_overload(node.decorator_list):
+                self.add_overload_entry(node)
+            self.context.append(node.name)
+            self.current_function = node
+            for child in node.body:
+                self.visit(child)
+            self.context.pop()
+            self.current_function = None
+
+    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
+        """Handles AsyncFunctionDef node and set context."""
+        self.visit_FunctionDef(node)  # type: ignore[arg-type]
+
+
+class DefinitionFinder(TokenProcessor):
+    """Python source code parser to detect location of functions,
+    classes and methods.
+    """
+
+    def __init__(self, lines: list[str]) -> None:
+        super().__init__(lines)
+        self.decorator: Token | None = None
+        self.context: list[str] = []
+        self.indents: list[tuple[str, str | None, int | None]] = []
+        self.definitions: dict[str, tuple[str, int, int]] = {}
+
+    def add_definition(self, name: str, entry: tuple[str, int, int]) -> None:
+        """Add a location of definition."""
+        if self.indents and self.indents[-1][0] == 'def' and entry[0] == 'def':
+            # ignore definition of inner function
+            pass
+        else:
+            self.definitions[name] = entry
+
+    def parse(self) -> None:
+        """Parse the code to obtain location of definitions."""
+        while True:
+            token = self.fetch_token()
+            if token is None:
+                break
+            if token == COMMENT:
+                pass
+            elif token == [OP, '@'] and (self.previous is None or
+                                         self.previous.match(NEWLINE, NL, INDENT, DEDENT)):
+                if self.decorator is None:
+                    self.decorator = token
+            elif token.match([NAME, 'class']):
+                self.parse_definition('class')
+            elif token.match([NAME, 'def']):
+                self.parse_definition('def')
+            elif token == INDENT:
+                self.indents.append(('other', None, None))
+            elif token == DEDENT:
+                self.finalize_block()
+
+    def parse_definition(self, typ: str) -> None:
+        """Parse AST of definition."""
+        name = self.fetch_token()
+        self.context.append(name.value)  # type: ignore[union-attr]
+        funcname = '.'.join(self.context)
+
+        if self.decorator:
+            start_pos = self.decorator.start[0]
+            self.decorator = None
+        else:
+            start_pos = name.start[0]  # type: ignore[union-attr]
+
+        self.fetch_until([OP, ':'])
+        if self.fetch_token().match(COMMENT, NEWLINE):  # type: ignore[union-attr]
+            self.fetch_until(INDENT)
+            self.indents.append((typ, funcname, start_pos))
+        else:
+            # one-liner
+            self.add_definition(funcname,
+                                (typ, start_pos, name.end[0]))  # type: ignore[union-attr]
+            self.context.pop()
+
+    def finalize_block(self) -> None:
+        """Finalize definition block."""
+        definition = self.indents.pop()
+        if definition[0] != 'other':
+            typ, funcname, start_pos = definition
+            end_pos = self.current.end[0] - 1  # type: ignore[union-attr]
+            while emptyline_re.match(self.get_line(end_pos)):
+                end_pos -= 1
+
+            self.add_definition(funcname, (typ, start_pos, end_pos))  # type: ignore[arg-type]
+            self.context.pop()
+
+
+class Parser:
+    """Python source code parser to pick up variable comments.
+
+    This is a better wrapper for ``VariableCommentPicker``.
+    """
+
+    def __init__(self, code: str, encoding: str = 'utf-8') -> None:
+        self.code = filter_whitespace(code)
+        self.encoding = encoding
+        self.annotations: dict[tuple[str, str], str] = {}
+        self.comments: dict[tuple[str, str], str] = {}
+        self.deforders: dict[str, int] = {}
+        self.definitions: dict[str, tuple[str, int, int]] = {}
+        self.finals: list[str] = []
+        self.overloads: dict[str, list[Signature]] = {}
+
+    def parse(self) -> None:
+        """Parse the source code."""
+        self.parse_comments()
+        self.parse_definition()
+
+    def parse_comments(self) -> None:
+        """Parse the code and pick up comments."""
+        tree = ast.parse(self.code, type_comments=True)
+        picker = VariableCommentPicker(self.code.splitlines(True), self.encoding)
+        picker.visit(tree)
+        self.annotations = picker.annotations
+        self.comments = picker.comments
+        self.deforders = picker.deforders
+        self.finals = picker.finals
+        self.overloads = picker.overloads
+
+    def parse_definition(self) -> None:
+        """Parse the location of definitions from the code."""
+        parser = DefinitionFinder(self.code.splitlines(True))
+        parser.parse()
+        self.definitions = parser.definitions