"""Classes for docstring parsing and formatting.""" from __future__ import annotations import collections import contextlib import inspect import re from functools import partial from typing import TYPE_CHECKING, Any, Callable from sphinx.locale import _, __ from sphinx.util import logging from sphinx.util.typing import get_type_hints, stringify_annotation if TYPE_CHECKING: from sphinx.application import Sphinx from sphinx.config import Config as SphinxConfig logger = logging.getLogger(__name__) _directive_regex = re.compile(r'\.\. \S+::') _google_section_regex = re.compile(r'^(\s|\w)+:\s*$') _google_typed_arg_regex = re.compile(r'(.+?)\(\s*(.*[^\s]+)\s*\)') _numpy_section_regex = re.compile(r'^[=\-`:\'"~^_*+#<>]{2,}\s*$') _single_colon_regex = re.compile(r'(?`))') _xref_regex = re.compile( r'(?:(?::(?:[a-zA-Z0-9]+[\-_+:.])*[a-zA-Z0-9]+:)?`.+?`)', ) _bullet_list_regex = re.compile(r'^(\*|\+|\-)(\s+\S|\s*$)') _enumerated_list_regex = re.compile( r'^(?P\()?' r'(\d+|#|[ivxlcdm]+|[IVXLCDM]+|[a-zA-Z])' r'(?(paren)\)|\.)(\s+\S|\s*$)') _token_regex = re.compile( r"(,\sor\s|\sor\s|\sof\s|:\s|\sto\s|,\sand\s|\sand\s|,\s" r"|[{]|[}]" r'|"(?:\\"|[^"])*"' r"|'(?:\\'|[^'])*')", ) _default_regex = re.compile( r"^default[^_0-9A-Za-z].*$", ) _SINGLETONS = ("None", "True", "False", "Ellipsis") class Deque(collections.deque): """ A subclass of deque that mimics ``pockets.iterators.modify_iter``. The `.Deque.get` and `.Deque.next` methods are added. """ sentinel = object() def get(self, n: int) -> Any: """ Return the nth element of the stack, or ``self.sentinel`` if n is greater than the stack size. """ return self[n] if n < len(self) else self.sentinel def next(self) -> Any: if self: return super().popleft() else: raise StopIteration def _convert_type_spec(_type: str, translations: dict[str, str] | None = None) -> str: """Convert type specification to reference in reST.""" if translations is not None and _type in translations: return translations[_type] if _type == 'None': return ':py:obj:`None`' return f':py:class:`{_type}`' class GoogleDocstring: """Convert Google style docstrings to reStructuredText. Parameters ---------- docstring : :obj:`str` or :obj:`list` of :obj:`str` The docstring to parse, given either as a string or split into individual lines. config: :obj:`sphinx.ext.napoleon.Config` or :obj:`sphinx.config.Config` The configuration settings to use. If not given, defaults to the config object on `app`; or if `app` is not given defaults to the a new :class:`sphinx.ext.napoleon.Config` object. Other Parameters ---------------- app : :class:`sphinx.application.Sphinx`, optional Application object representing the Sphinx process. what : :obj:`str`, optional A string specifying the type of the object to which the docstring belongs. Valid values: "module", "class", "exception", "function", "method", "attribute". name : :obj:`str`, optional The fully qualified name of the object. obj : module, class, exception, function, method, or attribute The object to which the docstring belongs. options : :class:`sphinx.ext.autodoc.Options`, optional The options given to the directive: an object with attributes inherited_members, undoc_members, show_inheritance and no_index that are True if the flag option of same name was given to the auto directive. Example ------- >>> from sphinx.ext.napoleon import Config >>> config = Config(napoleon_use_param=True, napoleon_use_rtype=True) >>> docstring = '''One line summary. ... ... Extended description. ... ... Args: ... arg1(int): Description of `arg1` ... arg2(str): Description of `arg2` ... Returns: ... str: Description of return value. ... ''' >>> print(GoogleDocstring(docstring, config)) One line summary. Extended description. :param arg1: Description of `arg1` :type arg1: int :param arg2: Description of `arg2` :type arg2: str :returns: Description of return value. :rtype: str """ _name_rgx = re.compile(r"^\s*((?::(?P\S+):)?`(?P~?[a-zA-Z0-9_.-]+)`|" r" (?P~?[a-zA-Z0-9_.-]+))\s*", re.X) def __init__( self, docstring: str | list[str], config: SphinxConfig | None = None, app: Sphinx | None = None, what: str = '', name: str = '', obj: Any = None, options: Any = None, ) -> None: self._app = app if config: self._config = config elif app: self._config = app.config else: from sphinx.ext.napoleon import Config self._config = Config() # type: ignore[assignment] if not what: if inspect.isclass(obj): what = 'class' elif inspect.ismodule(obj): what = 'module' elif callable(obj): what = 'function' else: what = 'object' self._what = what self._name = name self._obj = obj self._opt = options if isinstance(docstring, str): lines = docstring.splitlines() else: lines = docstring self._lines = Deque(map(str.rstrip, lines)) self._parsed_lines: list[str] = [] self._is_in_section = False self._section_indent = 0 if not hasattr(self, '_directive_sections'): self._directive_sections: list[str] = [] if not hasattr(self, '_sections'): self._sections: dict[str, Callable] = { 'args': self._parse_parameters_section, 'arguments': self._parse_parameters_section, 'attention': partial(self._parse_admonition, 'attention'), 'attributes': self._parse_attributes_section, 'caution': partial(self._parse_admonition, 'caution'), 'danger': partial(self._parse_admonition, 'danger'), 'error': partial(self._parse_admonition, 'error'), 'example': self._parse_examples_section, 'examples': self._parse_examples_section, 'hint': partial(self._parse_admonition, 'hint'), 'important': partial(self._parse_admonition, 'important'), 'keyword args': self._parse_keyword_arguments_section, 'keyword arguments': self._parse_keyword_arguments_section, 'methods': self._parse_methods_section, 'note': partial(self._parse_admonition, 'note'), 'notes': self._parse_notes_section, 'other parameters': self._parse_other_parameters_section, 'parameters': self._parse_parameters_section, 'receive': self._parse_receives_section, 'receives': self._parse_receives_section, 'return': self._parse_returns_section, 'returns': self._parse_returns_section, 'raise': self._parse_raises_section, 'raises': self._parse_raises_section, 'references': self._parse_references_section, 'see also': self._parse_see_also_section, 'tip': partial(self._parse_admonition, 'tip'), 'todo': partial(self._parse_admonition, 'todo'), 'warning': partial(self._parse_admonition, 'warning'), 'warnings': partial(self._parse_admonition, 'warning'), 'warn': self._parse_warns_section, 'warns': self._parse_warns_section, 'yield': self._parse_yields_section, 'yields': self._parse_yields_section, } self._load_custom_sections() self._parse() def __str__(self) -> str: """Return the parsed docstring in reStructuredText format. Returns ------- unicode Unicode version of the docstring. """ return '\n'.join(self.lines()) def lines(self) -> list[str]: """Return the parsed lines of the docstring in reStructuredText format. Returns ------- list(str) The lines of the docstring in a list. """ return self._parsed_lines def _consume_indented_block(self, indent: int = 1) -> list[str]: lines = [] line = self._lines.get(0) while ( not self._is_section_break() and (not line or self._is_indented(line, indent)) ): lines.append(self._lines.next()) line = self._lines.get(0) return lines def _consume_contiguous(self) -> list[str]: lines = [] while (self._lines and self._lines.get(0) and not self._is_section_header()): lines.append(self._lines.next()) return lines def _consume_empty(self) -> list[str]: lines = [] line = self._lines.get(0) while self._lines and not line: lines.append(self._lines.next()) line = self._lines.get(0) return lines def _consume_field(self, parse_type: bool = True, prefer_type: bool = False, ) -> tuple[str, str, list[str]]: line = self._lines.next() before, colon, after = self._partition_field_on_colon(line) _name, _type, _desc = before, '', after if parse_type: match = _google_typed_arg_regex.match(before) if match: _name = match.group(1).strip() _type = match.group(2) _name = self._escape_args_and_kwargs(_name) if prefer_type and not _type: _type, _name = _name, _type if _type and self._config.napoleon_preprocess_types: _type = _convert_type_spec(_type, self._config.napoleon_type_aliases or {}) indent = self._get_indent(line) + 1 _descs = [_desc] + self._dedent(self._consume_indented_block(indent)) _descs = self.__class__(_descs, self._config).lines() return _name, _type, _descs def _consume_fields(self, parse_type: bool = True, prefer_type: bool = False, multiple: bool = False) -> list[tuple[str, str, list[str]]]: self._consume_empty() fields = [] while not self._is_section_break(): _name, _type, _desc = self._consume_field(parse_type, prefer_type) if multiple and _name: for name in _name.split(","): fields.append((name.strip(), _type, _desc)) elif _name or _type or _desc: fields.append((_name, _type, _desc)) return fields def _consume_inline_attribute(self) -> tuple[str, list[str]]: line = self._lines.next() _type, colon, _desc = self._partition_field_on_colon(line) if not colon or not _desc: _type, _desc = _desc, _type _desc += colon _descs = [_desc] + self._dedent(self._consume_to_end()) _descs = self.__class__(_descs, self._config).lines() return _type, _descs def _consume_returns_section(self, preprocess_types: bool = False, ) -> list[tuple[str, str, list[str]]]: lines = self._dedent(self._consume_to_next_section()) if lines: before, colon, after = self._partition_field_on_colon(lines[0]) _name, _type, _desc = '', '', lines if colon: if after: _desc = [after] + lines[1:] else: _desc = lines[1:] _type = before if (_type and preprocess_types and self._config.napoleon_preprocess_types): _type = _convert_type_spec(_type, self._config.napoleon_type_aliases or {}) _desc = self.__class__(_desc, self._config).lines() return [(_name, _type, _desc)] else: return [] def _consume_usage_section(self) -> list[str]: lines = self._dedent(self._consume_to_next_section()) return lines def _consume_section_header(self) -> str: section = self._lines.next() stripped_section = section.strip(':') if stripped_section.lower() in self._sections: section = stripped_section return section def _consume_to_end(self) -> list[str]: lines = [] while self._lines: lines.append(self._lines.next()) return lines def _consume_to_next_section(self) -> list[str]: self._consume_empty() lines = [] while not self._is_section_break(): lines.append(self._lines.next()) return lines + self._consume_empty() def _dedent(self, lines: list[str], full: bool = False) -> list[str]: if full: return [line.lstrip() for line in lines] else: min_indent = self._get_min_indent(lines) return [line[min_indent:] for line in lines] def _escape_args_and_kwargs(self, name: str) -> str: if name.endswith('_') and getattr(self._config, 'strip_signature_backslash', False): name = name[:-1] + r'\_' if name[:2] == '**': return r'\*\*' + name[2:] elif name[:1] == '*': return r'\*' + name[1:] else: return name def _fix_field_desc(self, desc: list[str]) -> list[str]: if self._is_list(desc): desc = [''] + desc elif desc[0].endswith('::'): desc_block = desc[1:] indent = self._get_indent(desc[0]) block_indent = self._get_initial_indent(desc_block) if block_indent > indent: desc = [''] + desc else: desc = ['', desc[0]] + self._indent(desc_block, 4) return desc def _format_admonition(self, admonition: str, lines: list[str]) -> list[str]: lines = self._strip_empty(lines) if len(lines) == 1: return [f'.. {admonition}:: {lines[0].strip()}', ''] elif lines: lines = self._indent(self._dedent(lines), 3) return ['.. %s::' % admonition, ''] + lines + [''] else: return ['.. %s::' % admonition, ''] def _format_block( self, prefix: str, lines: list[str], padding: str | None = None, ) -> list[str]: if lines: if padding is None: padding = ' ' * len(prefix) result_lines = [] for i, line in enumerate(lines): if i == 0: result_lines.append((prefix + line).rstrip()) elif line: result_lines.append(padding + line) else: result_lines.append('') return result_lines else: return [prefix] def _format_docutils_params(self, fields: list[tuple[str, str, list[str]]], field_role: str = 'param', type_role: str = 'type', ) -> list[str]: lines = [] for _name, _type, _desc in fields: _desc = self._strip_empty(_desc) if any(_desc): _desc = self._fix_field_desc(_desc) field = f':{field_role} {_name}: ' lines.extend(self._format_block(field, _desc)) else: lines.append(f':{field_role} {_name}:') if _type: lines.append(f':{type_role} {_name}: {_type}') return lines + [''] def _format_field(self, _name: str, _type: str, _desc: list[str]) -> list[str]: _desc = self._strip_empty(_desc) has_desc = any(_desc) separator = ' -- ' if has_desc else '' if _name: if _type: if '`' in _type: field = f'**{_name}** ({_type}){separator}' else: field = f'**{_name}** (*{_type}*){separator}' else: field = f'**{_name}**{separator}' elif _type: if '`' in _type: field = f'{_type}{separator}' else: field = f'*{_type}*{separator}' else: field = '' if has_desc: _desc = self._fix_field_desc(_desc) if _desc[0]: return [field + _desc[0]] + _desc[1:] else: return [field] + _desc else: return [field] def _format_fields(self, field_type: str, fields: list[tuple[str, str, list[str]]], ) -> list[str]: field_type = ':%s:' % field_type.strip() padding = ' ' * len(field_type) multi = len(fields) > 1 lines: list[str] = [] for _name, _type, _desc in fields: field = self._format_field(_name, _type, _desc) if multi: if lines: lines.extend(self._format_block(padding + ' * ', field)) else: lines.extend(self._format_block(field_type + ' * ', field)) else: lines.extend(self._format_block(field_type + ' ', field)) if lines and lines[-1]: lines.append('') return lines def _get_current_indent(self, peek_ahead: int = 0) -> int: line = self._lines.get(peek_ahead) while line is not self._lines.sentinel: if line: return self._get_indent(line) peek_ahead += 1 line = self._lines.get(peek_ahead) return 0 def _get_indent(self, line: str) -> int: for i, s in enumerate(line): if not s.isspace(): return i return len(line) def _get_initial_indent(self, lines: list[str]) -> int: for line in lines: if line: return self._get_indent(line) return 0 def _get_min_indent(self, lines: list[str]) -> int: min_indent = None for line in lines: if line: indent = self._get_indent(line) if min_indent is None or indent < min_indent: min_indent = indent return min_indent or 0 def _indent(self, lines: list[str], n: int = 4) -> list[str]: return [(' ' * n) + line for line in lines] def _is_indented(self, line: str, indent: int = 1) -> bool: for i, s in enumerate(line): # noqa: SIM110 if i >= indent: return True elif not s.isspace(): return False return False def _is_list(self, lines: list[str]) -> bool: if not lines: return False if _bullet_list_regex.match(lines[0]): return True if _enumerated_list_regex.match(lines[0]): return True if len(lines) < 2 or lines[0].endswith('::'): return False indent = self._get_indent(lines[0]) next_indent = indent for line in lines[1:]: if line: next_indent = self._get_indent(line) break return next_indent > indent def _is_section_header(self) -> bool: section = self._lines.get(0).lower() match = _google_section_regex.match(section) if match and section.strip(':') in self._sections: header_indent = self._get_indent(section) section_indent = self._get_current_indent(peek_ahead=1) return section_indent > header_indent elif self._directive_sections: if _directive_regex.match(section): for directive_section in self._directive_sections: if section.startswith(directive_section): return True return False def _is_section_break(self) -> bool: line = self._lines.get(0) return (not self._lines or self._is_section_header() or (self._is_in_section and line and not self._is_indented(line, self._section_indent))) def _load_custom_sections(self) -> None: if self._config.napoleon_custom_sections is not None: for entry in self._config.napoleon_custom_sections: if isinstance(entry, str): # if entry is just a label, add to sections list, # using generic section logic. self._sections[entry.lower()] = self._parse_custom_generic_section else: # otherwise, assume entry is container; if entry[1] == "params_style": self._sections[entry[0].lower()] = \ self._parse_custom_params_style_section elif entry[1] == "returns_style": self._sections[entry[0].lower()] = \ self._parse_custom_returns_style_section else: # [0] is new section, [1] is the section to alias. # in the case of key mismatch, just handle as generic section. self._sections[entry[0].lower()] = \ self._sections.get(entry[1].lower(), self._parse_custom_generic_section) def _parse(self) -> None: self._parsed_lines = self._consume_empty() if self._name and self._what in ('attribute', 'data', 'property'): res: list[str] = [] with contextlib.suppress(StopIteration): res = self._parse_attribute_docstring() self._parsed_lines.extend(res) return while self._lines: if self._is_section_header(): try: section = self._consume_section_header() self._is_in_section = True self._section_indent = self._get_current_indent() if _directive_regex.match(section): lines = [section] + self._consume_to_next_section() else: lines = self._sections[section.lower()](section) finally: self._is_in_section = False self._section_indent = 0 else: if not self._parsed_lines: lines = self._consume_contiguous() + self._consume_empty() else: lines = self._consume_to_next_section() self._parsed_lines.extend(lines) def _parse_admonition(self, admonition: str, section: str) -> list[str]: # type (str, str) -> List[str] lines = self._consume_to_next_section() return self._format_admonition(admonition, lines) def _parse_attribute_docstring(self) -> list[str]: _type, _desc = self._consume_inline_attribute() lines = self._format_field('', '', _desc) if _type: lines.extend(['', ':type: %s' % _type]) return lines def _parse_attributes_section(self, section: str) -> list[str]: lines = [] for _name, _type, _desc in self._consume_fields(): if not _type: _type = self._lookup_annotation(_name) if self._config.napoleon_use_ivar: field = ':ivar %s: ' % _name lines.extend(self._format_block(field, _desc)) if _type: lines.append(f':vartype {_name}: {_type}') else: lines.append('.. attribute:: ' + _name) if self._opt: if 'no-index' in self._opt or 'noindex' in self._opt: lines.append(' :no-index:') lines.append('') fields = self._format_field('', '', _desc) lines.extend(self._indent(fields, 3)) if _type: lines.append('') lines.extend(self._indent([':type: %s' % _type], 3)) lines.append('') if self._config.napoleon_use_ivar: lines.append('') return lines def _parse_examples_section(self, section: str) -> list[str]: labels = { 'example': _('Example'), 'examples': _('Examples'), } use_admonition = self._config.napoleon_use_admonition_for_examples label = labels.get(section.lower(), section) return self._parse_generic_section(label, use_admonition) def _parse_custom_generic_section(self, section: str) -> list[str]: # for now, no admonition for simple custom sections return self._parse_generic_section(section, False) def _parse_custom_params_style_section(self, section: str) -> list[str]: return self._format_fields(section, self._consume_fields()) def _parse_custom_returns_style_section(self, section: str) -> list[str]: fields = self._consume_returns_section(preprocess_types=True) return self._format_fields(section, fields) def _parse_usage_section(self, section: str) -> list[str]: header = ['.. rubric:: Usage:', ''] block = ['.. code-block:: python', ''] lines = self._consume_usage_section() lines = self._indent(lines, 3) return header + block + lines + [''] def _parse_generic_section(self, section: str, use_admonition: bool) -> list[str]: lines = self._strip_empty(self._consume_to_next_section()) lines = self._dedent(lines) if use_admonition: header = '.. admonition:: %s' % section lines = self._indent(lines, 3) else: header = '.. rubric:: %s' % section if lines: return [header, ''] + lines + [''] else: return [header, ''] def _parse_keyword_arguments_section(self, section: str) -> list[str]: fields = self._consume_fields() if self._config.napoleon_use_keyword: return self._format_docutils_params( fields, field_role="keyword", type_role="kwtype") else: return self._format_fields(_('Keyword Arguments'), fields) def _parse_methods_section(self, section: str) -> list[str]: lines: list[str] = [] for _name, _type, _desc in self._consume_fields(parse_type=False): lines.append('.. method:: %s' % _name) if self._opt: if 'no-index' in self._opt or 'noindex' in self._opt: lines.append(' :no-index:') if _desc: lines.extend([''] + self._indent(_desc, 3)) lines.append('') return lines def _parse_notes_section(self, section: str) -> list[str]: use_admonition = self._config.napoleon_use_admonition_for_notes return self._parse_generic_section(_('Notes'), use_admonition) def _parse_other_parameters_section(self, section: str) -> list[str]: if self._config.napoleon_use_param: # Allow to declare multiple parameters at once (ex: x, y: int) fields = self._consume_fields(multiple=True) return self._format_docutils_params(fields) else: fields = self._consume_fields() return self._format_fields(_('Other Parameters'), fields) def _parse_parameters_section(self, section: str) -> list[str]: if self._config.napoleon_use_param: # Allow to declare multiple parameters at once (ex: x, y: int) fields = self._consume_fields(multiple=True) return self._format_docutils_params(fields) else: fields = self._consume_fields() return self._format_fields(_('Parameters'), fields) def _parse_raises_section(self, section: str) -> list[str]: fields = self._consume_fields(parse_type=False, prefer_type=True) lines: list[str] = [] for _name, _type, _desc in fields: m = self._name_rgx.match(_type) if m and m.group('name'): _type = m.group('name') elif _xref_regex.match(_type): pos = _type.find('`') _type = _type[pos + 1:-1] _type = ' ' + _type if _type else '' _desc = self._strip_empty(_desc) _descs = ' ' + '\n '.join(_desc) if any(_desc) else '' lines.append(f':raises{_type}:{_descs}') if lines: lines.append('') return lines def _parse_receives_section(self, section: str) -> list[str]: if self._config.napoleon_use_param: # Allow to declare multiple parameters at once (ex: x, y: int) fields = self._consume_fields(multiple=True) return self._format_docutils_params(fields) else: fields = self._consume_fields() return self._format_fields(_('Receives'), fields) def _parse_references_section(self, section: str) -> list[str]: use_admonition = self._config.napoleon_use_admonition_for_references return self._parse_generic_section(_('References'), use_admonition) def _parse_returns_section(self, section: str) -> list[str]: fields = self._consume_returns_section() multi = len(fields) > 1 use_rtype = False if multi else self._config.napoleon_use_rtype lines: list[str] = [] for _name, _type, _desc in fields: if use_rtype: field = self._format_field(_name, '', _desc) else: field = self._format_field(_name, _type, _desc) if multi: if lines: lines.extend(self._format_block(' * ', field)) else: lines.extend(self._format_block(':returns: * ', field)) else: if any(field): # only add :returns: if there's something to say lines.extend(self._format_block(':returns: ', field)) if _type and use_rtype: lines.extend([':rtype: %s' % _type, '']) if lines and lines[-1]: lines.append('') return lines def _parse_see_also_section(self, section: str) -> list[str]: return self._parse_admonition('seealso', section) def _parse_warns_section(self, section: str) -> list[str]: return self._format_fields(_('Warns'), self._consume_fields()) def _parse_yields_section(self, section: str) -> list[str]: fields = self._consume_returns_section(preprocess_types=True) return self._format_fields(_('Yields'), fields) def _partition_field_on_colon(self, line: str) -> tuple[str, str, str]: before_colon = [] after_colon = [] colon = '' found_colon = False for i, source in enumerate(_xref_or_code_regex.split(line)): if found_colon: after_colon.append(source) else: m = _single_colon_regex.search(source) if (i % 2) == 0 and m: found_colon = True colon = source[m.start(): m.end()] before_colon.append(source[:m.start()]) after_colon.append(source[m.end():]) else: before_colon.append(source) return ("".join(before_colon).strip(), colon, "".join(after_colon).strip()) def _strip_empty(self, lines: list[str]) -> list[str]: if lines: start = -1 for i, line in enumerate(lines): if line: start = i break if start == -1: lines = [] end = -1 for i in reversed(range(len(lines))): line = lines[i] if line: end = i break if start > 0 or end + 1 < len(lines): lines = lines[start:end + 1] return lines def _lookup_annotation(self, _name: str) -> str: if self._config.napoleon_attr_annotations: if self._what in ("module", "class", "exception") and self._obj: # cache the class annotations if not hasattr(self, "_annotations"): localns = getattr(self._config, "autodoc_type_aliases", {}) localns.update(getattr( self._config, "napoleon_type_aliases", {}, ) or {}) self._annotations = get_type_hints(self._obj, None, localns) if _name in self._annotations: return stringify_annotation(self._annotations[_name], 'fully-qualified-except-typing') # No annotation found return "" def _recombine_set_tokens(tokens: list[str]) -> list[str]: token_queue = collections.deque(tokens) keywords = ("optional", "default") def takewhile_set(tokens): open_braces = 0 previous_token = None while True: try: token = tokens.popleft() except IndexError: break if token == ", ": previous_token = token continue if not token.strip(): continue if token in keywords: tokens.appendleft(token) if previous_token is not None: tokens.appendleft(previous_token) break if previous_token is not None: yield previous_token previous_token = None if token == "{": open_braces += 1 elif token == "}": open_braces -= 1 yield token if open_braces == 0: break def combine_set(tokens): while True: try: token = tokens.popleft() except IndexError: break if token == "{": tokens.appendleft("{") yield "".join(takewhile_set(tokens)) else: yield token return list(combine_set(token_queue)) def _tokenize_type_spec(spec: str) -> list[str]: def postprocess(item): if _default_regex.match(item): default = item[:7] # can't be separated by anything other than a single space # for now other = item[8:] return [default, " ", other] else: return [item] tokens = [ item for raw_token in _token_regex.split(spec) for item in postprocess(raw_token) if item ] return tokens def _token_type(token: str, location: str | None = None) -> str: def is_numeric(token): try: # use complex to make sure every numeric value is detected as literal complex(token) except ValueError: return False else: return True if token.startswith(" ") or token.endswith(" "): type_ = "delimiter" elif ( is_numeric(token) or (token.startswith("{") and token.endswith("}")) or (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")) ): type_ = "literal" elif token.startswith("{"): logger.warning( __("invalid value set (missing closing brace): %s"), token, location=location, ) type_ = "literal" elif token.endswith("}"): logger.warning( __("invalid value set (missing opening brace): %s"), token, location=location, ) type_ = "literal" elif token.startswith(("'", '"')): logger.warning( __("malformed string literal (missing closing quote): %s"), token, location=location, ) type_ = "literal" elif token.endswith(("'", '"')): logger.warning( __("malformed string literal (missing opening quote): %s"), token, location=location, ) type_ = "literal" elif token in ("optional", "default"): # default is not a official keyword (yet) but supported by the # reference implementation (numpydoc) and widely used type_ = "control" elif _xref_regex.match(token): type_ = "reference" else: type_ = "obj" return type_ def _convert_numpy_type_spec( _type: str, location: str | None = None, translations: dict | None = None, ) -> str: if translations is None: translations = {} def convert_obj(obj, translations, default_translation): translation = translations.get(obj, obj) # use :class: (the default) only if obj is not a standard singleton if translation in _SINGLETONS and default_translation == ":class:`%s`": default_translation = ":obj:`%s`" elif translation == "..." and default_translation == ":class:`%s`": # allow referencing the builtin ... default_translation = ":obj:`%s `" if _xref_regex.match(translation) is None: translation = default_translation % translation return translation tokens = _tokenize_type_spec(_type) combined_tokens = _recombine_set_tokens(tokens) types = [ (token, _token_type(token, location)) for token in combined_tokens ] converters = { "literal": lambda x: "``%s``" % x, "obj": lambda x: convert_obj(x, translations, ":class:`%s`"), "control": lambda x: "*%s*" % x, "delimiter": lambda x: x, "reference": lambda x: x, } converted = "".join(converters.get(type_)(token) # type: ignore[misc] for token, type_ in types) return converted class NumpyDocstring(GoogleDocstring): """Convert NumPy style docstrings to reStructuredText. Parameters ---------- docstring : :obj:`str` or :obj:`list` of :obj:`str` The docstring to parse, given either as a string or split into individual lines. config: :obj:`sphinx.ext.napoleon.Config` or :obj:`sphinx.config.Config` The configuration settings to use. If not given, defaults to the config object on `app`; or if `app` is not given defaults to the a new :class:`sphinx.ext.napoleon.Config` object. Other Parameters ---------------- app : :class:`sphinx.application.Sphinx`, optional Application object representing the Sphinx process. what : :obj:`str`, optional A string specifying the type of the object to which the docstring belongs. Valid values: "module", "class", "exception", "function", "method", "attribute". name : :obj:`str`, optional The fully qualified name of the object. obj : module, class, exception, function, method, or attribute The object to which the docstring belongs. options : :class:`sphinx.ext.autodoc.Options`, optional The options given to the directive: an object with attributes inherited_members, undoc_members, show_inheritance and no_index that are True if the flag option of same name was given to the auto directive. Example ------- >>> from sphinx.ext.napoleon import Config >>> config = Config(napoleon_use_param=True, napoleon_use_rtype=True) >>> docstring = '''One line summary. ... ... Extended description. ... ... Parameters ... ---------- ... arg1 : int ... Description of `arg1` ... arg2 : str ... Description of `arg2` ... Returns ... ------- ... str ... Description of return value. ... ''' >>> print(NumpyDocstring(docstring, config)) One line summary. Extended description. :param arg1: Description of `arg1` :type arg1: int :param arg2: Description of `arg2` :type arg2: str :returns: Description of return value. :rtype: str Methods ------- __str__() Return the parsed docstring in reStructuredText format. Returns ------- str UTF-8 encoded version of the docstring. __unicode__() Return the parsed docstring in reStructuredText format. Returns ------- unicode Unicode version of the docstring. lines() Return the parsed lines of the docstring in reStructuredText format. Returns ------- list(str) The lines of the docstring in a list. """ def __init__( self, docstring: str | list[str], config: SphinxConfig | None = None, app: Sphinx | None = None, what: str = '', name: str = '', obj: Any = None, options: Any = None, ) -> None: self._directive_sections = ['.. index::'] super().__init__(docstring, config, app, what, name, obj, options) def _get_location(self) -> str | None: try: filepath = inspect.getfile(self._obj) if self._obj is not None else None except TypeError: filepath = None name = self._name if filepath is None and name is None: return None elif filepath is None: filepath = "" return ":".join([filepath, "docstring of %s" % name]) def _escape_args_and_kwargs(self, name: str) -> str: func = super()._escape_args_and_kwargs if ", " in name: return ", ".join(func(param) for param in name.split(", ")) else: return func(name) def _consume_field(self, parse_type: bool = True, prefer_type: bool = False, ) -> tuple[str, str, list[str]]: line = self._lines.next() if parse_type: _name, _, _type = self._partition_field_on_colon(line) else: _name, _type = line, '' _name, _type = _name.strip(), _type.strip() _name = self._escape_args_and_kwargs(_name) if parse_type and not _type: _type = self._lookup_annotation(_name) if prefer_type and not _type: _type, _name = _name, _type if self._config.napoleon_preprocess_types: _type = _convert_numpy_type_spec( _type, location=self._get_location(), translations=self._config.napoleon_type_aliases or {}, ) indent = self._get_indent(line) + 1 _desc = self._dedent(self._consume_indented_block(indent)) _desc = self.__class__(_desc, self._config).lines() return _name, _type, _desc def _consume_returns_section(self, preprocess_types: bool = False, ) -> list[tuple[str, str, list[str]]]: return self._consume_fields(prefer_type=True) def _consume_section_header(self) -> str: section = self._lines.next() if not _directive_regex.match(section): # Consume the header underline self._lines.next() return section def _is_section_break(self) -> bool: line1, line2 = self._lines.get(0), self._lines.get(1) return (not self._lines or self._is_section_header() or ['', ''] == [line1, line2] or (self._is_in_section and line1 and not self._is_indented(line1, self._section_indent))) def _is_section_header(self) -> bool: section, underline = self._lines.get(0), self._lines.get(1) section = section.lower() if section in self._sections and isinstance(underline, str): return bool(_numpy_section_regex.match(underline)) elif self._directive_sections: if _directive_regex.match(section): for directive_section in self._directive_sections: if section.startswith(directive_section): return True return False def _parse_see_also_section(self, section: str) -> list[str]: lines = self._consume_to_next_section() try: return self._parse_numpydoc_see_also_section(lines) except ValueError: return self._format_admonition('seealso', lines) def _parse_numpydoc_see_also_section(self, content: list[str]) -> list[str]: """ Derived from the NumpyDoc implementation of _parse_see_also. See Also -------- func_name : Descriptive text continued text another_func_name : Descriptive text func_name1, func_name2, :meth:`func_name`, func_name3 """ items = [] def parse_item_name(text: str) -> tuple[str, str | None]: """Match ':role:`name`' or 'name'""" m = self._name_rgx.match(text) if m: g = m.groups() if g[1] is None: return g[3], None else: return g[2], g[1] raise ValueError("%s is not a item name" % text) def push_item(name: str | None, rest: list[str]) -> None: if not name: return name, role = parse_item_name(name) items.append((name, list(rest), role)) del rest[:] def translate(func, description, role): translations = self._config.napoleon_type_aliases if role is not None or not translations: return func, description, role translated = translations.get(func, func) match = self._name_rgx.match(translated) if not match: return translated, description, role groups = match.groupdict() role = groups["role"] new_func = groups["name"] or groups["name2"] return new_func, description, role current_func = None rest: list[str] = [] for line in content: if not line.strip(): continue m = self._name_rgx.match(line) if m and line[m.end():].strip().startswith(':'): push_item(current_func, rest) current_func, line = line[:m.end()], line[m.end():] rest = [line.split(':', 1)[1].strip()] if not rest[0]: rest = [] elif not line.startswith(' '): push_item(current_func, rest) current_func = None if ',' in line: for func in line.split(','): if func.strip(): push_item(func, []) elif line.strip(): current_func = line elif current_func is not None: rest.append(line.strip()) push_item(current_func, rest) if not items: return [] # apply type aliases items = [ translate(func, description, role) for func, description, role in items ] lines: list[str] = [] last_had_desc = True for name, desc, role in items: if role: link = f':{role}:`{name}`' else: link = ':obj:`%s`' % name if desc or last_had_desc: lines += [''] lines += [link] else: lines[-1] += ", %s" % link if desc: lines += self._indent([' '.join(desc)]) last_had_desc = True else: last_had_desc = False lines += [''] return self._format_admonition('seealso', lines)