summaryrefslogtreecommitdiffstats
path: root/sphinx/util/nodes.py
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/util/nodes.py')
-rw-r--r--sphinx/util/nodes.py672
1 files changed, 672 insertions, 0 deletions
diff --git a/sphinx/util/nodes.py b/sphinx/util/nodes.py
new file mode 100644
index 0000000..b68b7fd
--- /dev/null
+++ b/sphinx/util/nodes.py
@@ -0,0 +1,672 @@
+"""Docutils node-related utility functions for Sphinx."""
+
+from __future__ import annotations
+
+import contextlib
+import re
+import unicodedata
+from typing import TYPE_CHECKING, Any, Callable
+
+from docutils import nodes
+
+from sphinx import addnodes
+from sphinx.locale import __
+from sphinx.util import logging
+
+if TYPE_CHECKING:
+ from collections.abc import Iterable
+
+ from docutils.nodes import Element, Node
+ from docutils.parsers.rst import Directive
+ from docutils.parsers.rst.states import Inliner
+ from docutils.statemachine import StringList
+
+ from sphinx.builders import Builder
+ from sphinx.environment import BuildEnvironment
+ from sphinx.util.tags import Tags
+
+logger = logging.getLogger(__name__)
+
+
+# \x00 means the "<" was backslash-escaped
+explicit_title_re = re.compile(r'^(.+?)\s*(?<!\x00)<([^<]*?)>$', re.DOTALL)
+caption_ref_re = explicit_title_re # b/w compat alias
+
+
+class NodeMatcher:
+ """A helper class for Node.findall().
+
+ It checks that the given node is an instance of the specified node-classes and
+ has the specified node-attributes.
+
+ For example, following example searches ``reference`` node having ``refdomain``
+ and ``reftype`` attributes::
+
+ matcher = NodeMatcher(nodes.reference, refdomain='std', reftype='citation')
+ doctree.findall(matcher)
+ # => [<reference ...>, <reference ...>, ...]
+
+ A special value ``typing.Any`` matches any kind of node-attributes. For example,
+ following example searches ``reference`` node having ``refdomain`` attributes::
+
+ from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+ matcher = NodeMatcher(nodes.reference, refdomain=Any)
+ doctree.findall(matcher)
+ # => [<reference ...>, <reference ...>, ...]
+ """
+
+ def __init__(self, *node_classes: type[Node], **attrs: Any) -> None:
+ self.classes = node_classes
+ self.attrs = attrs
+
+ def match(self, node: Node) -> bool:
+ try:
+ if self.classes and not isinstance(node, self.classes):
+ return False
+
+ if self.attrs:
+ if not isinstance(node, nodes.Element):
+ return False
+
+ for key, value in self.attrs.items():
+ if key not in node:
+ return False
+ elif value is Any:
+ continue
+ elif node.get(key) != value:
+ return False
+
+ return True
+ except Exception:
+ # for non-Element nodes
+ return False
+
+ def __call__(self, node: Node) -> bool:
+ return self.match(node)
+
+
+def get_full_module_name(node: Node) -> str:
+ """
+ Return full module dotted path like: 'docutils.nodes.paragraph'
+
+ :param nodes.Node node: target node
+ :return: full module dotted path
+ """
+ return f'{node.__module__}.{node.__class__.__name__}'
+
+
+def repr_domxml(node: Node, length: int = 80) -> str:
+ """
+ return DOM XML representation of the specified node like:
+ '<paragraph translatable="False"><inline classes="versionmodified">New in version...'
+
+ :param nodes.Node node: target node
+ :param int length:
+ length of return value to be striped. if false-value is specified, repr_domxml
+ returns full of DOM XML representation.
+ :return: DOM XML representation
+ """
+ try:
+ text = node.asdom().toxml()
+ except Exception:
+ text = str(node)
+ if length and len(text) > length:
+ text = text[:length] + '...'
+ return text
+
+
+def apply_source_workaround(node: Element) -> None:
+ # workaround: nodes.term have wrong rawsource if classifier is specified.
+ # The behavior of docutils-0.11, 0.12 is:
+ # * when ``term text : classifier1 : classifier2`` is specified,
+ # * rawsource of term node will have: ``term text : classifier1 : classifier2``
+ # * rawsource of classifier node will be None
+ if isinstance(node, nodes.classifier) and not node.rawsource:
+ logger.debug('[i18n] PATCH: %r to have source, line and rawsource: %s',
+ get_full_module_name(node), repr_domxml(node))
+ definition_list_item = node.parent
+ node.source = definition_list_item.source
+ node.line = definition_list_item.line - 1
+ node.rawsource = node.astext() # set 'classifier1' (or 'classifier2')
+ elif isinstance(node, nodes.classifier) and not node.source:
+ # docutils-0.15 fills in rawsource attribute, but not in source.
+ node.source = node.parent.source
+ if isinstance(node, nodes.image) and node.source is None:
+ logger.debug('[i18n] PATCH: %r to have source, line: %s',
+ get_full_module_name(node), repr_domxml(node))
+ node.source, node.line = node.parent.source, node.parent.line
+ if isinstance(node, nodes.title) and node.source is None:
+ logger.debug('[i18n] PATCH: %r to have source: %s',
+ get_full_module_name(node), repr_domxml(node))
+ node.source, node.line = node.parent.source, node.parent.line
+ if isinstance(node, nodes.term):
+ logger.debug('[i18n] PATCH: %r to have rawsource: %s',
+ get_full_module_name(node), repr_domxml(node))
+ # strip classifier from rawsource of term
+ for classifier in reversed(list(node.parent.findall(nodes.classifier))):
+ node.rawsource = re.sub(r'\s*:\s*%s' % re.escape(classifier.astext()),
+ '', node.rawsource)
+ if isinstance(node, nodes.topic) and node.source is None:
+ # docutils-0.18 does not fill the source attribute of topic
+ logger.debug('[i18n] PATCH: %r to have source, line: %s',
+ get_full_module_name(node), repr_domxml(node))
+ node.source, node.line = node.parent.source, node.parent.line
+
+ # workaround: literal_block under bullet list (#4913)
+ if isinstance(node, nodes.literal_block) and node.source is None:
+ with contextlib.suppress(ValueError):
+ node.source = get_node_source(node)
+
+ # workaround: recommonmark-0.2.0 doesn't set rawsource attribute
+ if not node.rawsource:
+ node.rawsource = node.astext()
+
+ if node.source and node.rawsource:
+ return
+
+ # workaround: some docutils nodes doesn't have source, line.
+ if (isinstance(node, (
+ nodes.rubric, # #1305 rubric directive
+ nodes.line, # #1477 line node
+ nodes.image, # #3093 image directive in substitution
+ nodes.field_name, # #3335 field list syntax
+ ))):
+ logger.debug('[i18n] PATCH: %r to have source and line: %s',
+ get_full_module_name(node), repr_domxml(node))
+ try:
+ node.source = get_node_source(node)
+ except ValueError:
+ node.source = ''
+ node.line = 0 # need fix docutils to get `node.line`
+ return
+
+
+IGNORED_NODES = (
+ nodes.Invisible,
+ nodes.literal_block,
+ nodes.doctest_block,
+ addnodes.versionmodified,
+ # XXX there are probably more
+)
+
+
+def is_translatable(node: Node) -> bool:
+ if isinstance(node, addnodes.translatable):
+ return True
+
+ # image node marked as translatable or having alt text
+ if isinstance(node, nodes.image) and (node.get('translatable') or node.get('alt')):
+ return True
+
+ if isinstance(node, nodes.Inline) and 'translatable' not in node: # type: ignore[operator]
+ # inline node must not be translated if 'translatable' is not set
+ return False
+
+ if isinstance(node, nodes.TextElement):
+ if not node.source:
+ logger.debug('[i18n] SKIP %r because no node.source: %s',
+ get_full_module_name(node), repr_domxml(node))
+ return False # built-in message
+ if isinstance(node, IGNORED_NODES) and 'translatable' not in node:
+ logger.debug("[i18n] SKIP %r because node is in IGNORED_NODES "
+ "and no node['translatable']: %s",
+ get_full_module_name(node), repr_domxml(node))
+ return False
+ if not node.get('translatable', True):
+ # not(node['translatable'] == True or node['translatable'] is None)
+ logger.debug("[i18n] SKIP %r because not node['translatable']: %s",
+ get_full_module_name(node), repr_domxml(node))
+ return False
+ # <field_name>orphan</field_name>
+ # XXX ignore all metadata (== docinfo)
+ if isinstance(node, nodes.field_name) and node.children[0] == 'orphan':
+ logger.debug('[i18n] SKIP %r because orphan node: %s',
+ get_full_module_name(node), repr_domxml(node))
+ return False
+ return True
+
+ if isinstance(node, nodes.meta): # type: ignore[attr-defined]
+ return True
+
+ return False
+
+
+LITERAL_TYPE_NODES = (
+ nodes.literal_block,
+ nodes.doctest_block,
+ nodes.math_block,
+ nodes.raw,
+)
+IMAGE_TYPE_NODES = (
+ nodes.image,
+)
+
+
+def extract_messages(doctree: Element) -> Iterable[tuple[Element, str]]:
+ """Extract translatable messages from a document tree."""
+ for node in doctree.findall(is_translatable): # type: Element
+ if isinstance(node, addnodes.translatable):
+ for msg in node.extract_original_messages():
+ yield node, msg
+ continue
+ if isinstance(node, LITERAL_TYPE_NODES):
+ msg = node.rawsource
+ if not msg:
+ msg = node.astext()
+ elif isinstance(node, nodes.image):
+ if node.get('alt'):
+ yield node, node['alt']
+ if node.get('translatable'):
+ image_uri = node.get('original_uri', node['uri'])
+ msg = f'.. image:: {image_uri}'
+ else:
+ msg = ''
+ elif isinstance(node, nodes.meta): # type: ignore[attr-defined]
+ msg = node["content"]
+ else:
+ msg = node.rawsource.replace('\n', ' ').strip()
+
+ # XXX nodes rendering empty are likely a bug in sphinx.addnodes
+ if msg:
+ yield node, msg
+
+
+def get_node_source(node: Element) -> str:
+ for pnode in traverse_parent(node):
+ if pnode.source:
+ return pnode.source
+ msg = 'node source not found'
+ raise ValueError(msg)
+
+
+def get_node_line(node: Element) -> int:
+ for pnode in traverse_parent(node):
+ if pnode.line:
+ return pnode.line
+ msg = 'node line not found'
+ raise ValueError(msg)
+
+
+def traverse_parent(node: Element, cls: Any = None) -> Iterable[Element]:
+ while node:
+ if cls is None or isinstance(node, cls):
+ yield node
+ node = node.parent
+
+
+def get_prev_node(node: Node) -> Node | None:
+ pos = node.parent.index(node)
+ if pos > 0:
+ return node.parent[pos - 1]
+ else:
+ return None
+
+
+def traverse_translatable_index(
+ doctree: Element,
+) -> Iterable[tuple[Element, list[tuple[str, str, str, str, str | None]]]]:
+ """Traverse translatable index node from a document tree."""
+ matcher = NodeMatcher(addnodes.index, inline=False)
+ for node in doctree.findall(matcher): # type: addnodes.index
+ if 'raw_entries' in node:
+ entries = node['raw_entries']
+ else:
+ entries = node['entries']
+ yield node, entries
+
+
+def nested_parse_with_titles(state: Any, content: StringList, node: Node,
+ content_offset: int = 0) -> str:
+ """Version of state.nested_parse() that allows titles and does not require
+ titles to have the same decoration as the calling document.
+
+ This is useful when the parsed content comes from a completely different
+ context, such as docstrings.
+ """
+ # hack around title style bookkeeping
+ surrounding_title_styles = state.memo.title_styles
+ surrounding_section_level = state.memo.section_level
+ state.memo.title_styles = []
+ state.memo.section_level = 0
+ try:
+ return state.nested_parse(content, content_offset, node, match_titles=1)
+ finally:
+ state.memo.title_styles = surrounding_title_styles
+ state.memo.section_level = surrounding_section_level
+
+
+def clean_astext(node: Element) -> str:
+ """Like node.astext(), but ignore images."""
+ node = node.deepcopy()
+ for img in node.findall(nodes.image):
+ img['alt'] = ''
+ for raw in list(node.findall(nodes.raw)):
+ raw.parent.remove(raw)
+ return node.astext()
+
+
+def split_explicit_title(text: str) -> tuple[bool, str, str]:
+ """Split role content into title and target, if given."""
+ match = explicit_title_re.match(text)
+ if match:
+ return True, match.group(1), match.group(2)
+ return False, text, text
+
+
+indextypes = [
+ 'single', 'pair', 'double', 'triple', 'see', 'seealso',
+]
+
+
+def process_index_entry(entry: str, targetid: str,
+ ) -> list[tuple[str, str, str, str, str | None]]:
+ from sphinx.domains.python import pairindextypes
+
+ indexentries: list[tuple[str, str, str, str, str | None]] = []
+ entry = entry.strip()
+ oentry = entry
+ main = ''
+ if entry.startswith('!'):
+ main = 'main'
+ entry = entry[1:].lstrip()
+ for index_type in pairindextypes:
+ if entry.startswith(f'{index_type}:'):
+ value = entry[len(index_type) + 1:].strip()
+ value = f'{pairindextypes[index_type]}; {value}'
+ # xref RemovedInSphinx90Warning
+ logger.warning(__('%r is deprecated for index entries (from entry %r). '
+ "Use 'pair: %s' instead."),
+ index_type, entry, value, type='index')
+ indexentries.append(('pair', value, targetid, main, None))
+ break
+ else:
+ for index_type in indextypes:
+ if entry.startswith(f'{index_type}:'):
+ value = entry[len(index_type) + 1:].strip()
+ if index_type == 'double':
+ index_type = 'pair'
+ indexentries.append((index_type, value, targetid, main, None))
+ break
+ # shorthand notation for single entries
+ else:
+ for value in oentry.split(','):
+ value = value.strip()
+ main = ''
+ if value.startswith('!'):
+ main = 'main'
+ value = value[1:].lstrip()
+ if not value:
+ continue
+ indexentries.append(('single', value, targetid, main, None))
+ return indexentries
+
+
+def inline_all_toctrees(builder: Builder, docnameset: set[str], docname: str,
+ tree: nodes.document, colorfunc: Callable, traversed: list[str],
+ ) -> nodes.document:
+ """Inline all toctrees in the *tree*.
+
+ Record all docnames in *docnameset*, and output docnames with *colorfunc*.
+ """
+ tree = tree.deepcopy()
+ for toctreenode in list(tree.findall(addnodes.toctree)):
+ newnodes = []
+ includefiles = map(str, toctreenode['includefiles'])
+ for includefile in includefiles:
+ if includefile not in traversed:
+ try:
+ traversed.append(includefile)
+ logger.info(colorfunc(includefile) + " ", nonl=True)
+ subtree = inline_all_toctrees(builder, docnameset, includefile,
+ builder.env.get_doctree(includefile),
+ colorfunc, traversed)
+ docnameset.add(includefile)
+ except Exception:
+ logger.warning(__('toctree contains ref to nonexisting file %r'),
+ includefile, location=docname)
+ else:
+ sof = addnodes.start_of_file(docname=includefile)
+ sof.children = subtree.children
+ for sectionnode in sof.findall(nodes.section):
+ if 'docname' not in sectionnode:
+ sectionnode['docname'] = includefile
+ newnodes.append(sof)
+ toctreenode.parent.replace(toctreenode, newnodes)
+ return tree
+
+
+def _make_id(string: str) -> str:
+ """Convert `string` into an identifier and return it.
+
+ This function is a modified version of ``docutils.nodes.make_id()`` of
+ docutils-0.16.
+
+ Changes:
+
+ * Allow to use capital alphabet characters
+ * Allow to use dots (".") and underscores ("_") for an identifier
+ without a leading character.
+
+ # Author: David Goodger <goodger@python.org>
+ # Maintainer: docutils-develop@lists.sourceforge.net
+ # Copyright: This module has been placed in the public domain.
+ """
+ id = string.translate(_non_id_translate_digraphs)
+ id = id.translate(_non_id_translate)
+ # get rid of non-ascii characters.
+ # 'ascii' lowercase to prevent problems with turkish locale.
+ id = unicodedata.normalize('NFKD', id).encode('ascii', 'ignore').decode('ascii')
+ # shrink runs of whitespace and replace by hyphen
+ id = _non_id_chars.sub('-', ' '.join(id.split()))
+ id = _non_id_at_ends.sub('', id)
+ return str(id)
+
+
+_non_id_chars = re.compile('[^a-zA-Z0-9._]+')
+_non_id_at_ends = re.compile('^[-0-9._]+|-+$')
+_non_id_translate = {
+ 0x00f8: 'o', # o with stroke
+ 0x0111: 'd', # d with stroke
+ 0x0127: 'h', # h with stroke
+ 0x0131: 'i', # dotless i
+ 0x0142: 'l', # l with stroke
+ 0x0167: 't', # t with stroke
+ 0x0180: 'b', # b with stroke
+ 0x0183: 'b', # b with topbar
+ 0x0188: 'c', # c with hook
+ 0x018c: 'd', # d with topbar
+ 0x0192: 'f', # f with hook
+ 0x0199: 'k', # k with hook
+ 0x019a: 'l', # l with bar
+ 0x019e: 'n', # n with long right leg
+ 0x01a5: 'p', # p with hook
+ 0x01ab: 't', # t with palatal hook
+ 0x01ad: 't', # t with hook
+ 0x01b4: 'y', # y with hook
+ 0x01b6: 'z', # z with stroke
+ 0x01e5: 'g', # g with stroke
+ 0x0225: 'z', # z with hook
+ 0x0234: 'l', # l with curl
+ 0x0235: 'n', # n with curl
+ 0x0236: 't', # t with curl
+ 0x0237: 'j', # dotless j
+ 0x023c: 'c', # c with stroke
+ 0x023f: 's', # s with swash tail
+ 0x0240: 'z', # z with swash tail
+ 0x0247: 'e', # e with stroke
+ 0x0249: 'j', # j with stroke
+ 0x024b: 'q', # q with hook tail
+ 0x024d: 'r', # r with stroke
+ 0x024f: 'y', # y with stroke
+}
+_non_id_translate_digraphs = {
+ 0x00df: 'sz', # ligature sz
+ 0x00e6: 'ae', # ae
+ 0x0153: 'oe', # ligature oe
+ 0x0238: 'db', # db digraph
+ 0x0239: 'qp', # qp digraph
+}
+
+
+def make_id(env: BuildEnvironment, document: nodes.document,
+ prefix: str = '', term: str | None = None) -> str:
+ """Generate an appropriate node_id for given *prefix* and *term*."""
+ node_id = None
+ if prefix:
+ idformat = prefix + "-%s"
+ else:
+ idformat = (document.settings.id_prefix or "id") + "%s"
+
+ # try to generate node_id by *term*
+ if prefix and term:
+ node_id = _make_id(idformat % term)
+ if node_id == prefix:
+ # *term* is not good to generate a node_id.
+ node_id = None
+ elif term:
+ node_id = _make_id(term)
+ if node_id == '':
+ node_id = None # fallback to None
+
+ while node_id is None or node_id in document.ids:
+ node_id = idformat % env.new_serialno(prefix)
+
+ return node_id
+
+
+def find_pending_xref_condition(node: addnodes.pending_xref, condition: str,
+ ) -> Element | None:
+ """Pick matched pending_xref_condition node up from the pending_xref."""
+ for subnode in node:
+ if (isinstance(subnode, addnodes.pending_xref_condition) and
+ subnode.get('condition') == condition):
+ return subnode
+ return None
+
+
+def make_refnode(builder: Builder, fromdocname: str, todocname: str, targetid: str | None,
+ child: Node | list[Node], title: str | None = None,
+ ) -> nodes.reference:
+ """Shortcut to create a reference node."""
+ node = nodes.reference('', '', internal=True)
+ if fromdocname == todocname and targetid:
+ node['refid'] = targetid
+ else:
+ if targetid:
+ node['refuri'] = (builder.get_relative_uri(fromdocname, todocname) +
+ '#' + targetid)
+ else:
+ node['refuri'] = builder.get_relative_uri(fromdocname, todocname)
+ if title:
+ node['reftitle'] = title
+ node += child
+ return node
+
+
+def set_source_info(directive: Directive, node: Node) -> None:
+ node.source, node.line = \
+ directive.state_machine.get_source_and_line(directive.lineno)
+
+
+def set_role_source_info(inliner: Inliner, lineno: int, node: Node) -> None:
+ gsal = inliner.reporter.get_source_and_line # type: ignore[attr-defined]
+ node.source, node.line = gsal(lineno)
+
+
+def copy_source_info(src: Element, dst: Element) -> None:
+ with contextlib.suppress(ValueError):
+ dst.source = get_node_source(src)
+ dst.line = get_node_line(src)
+
+
+NON_SMARTQUOTABLE_PARENT_NODES = (
+ nodes.FixedTextElement,
+ nodes.literal,
+ nodes.math,
+ nodes.image,
+ nodes.raw,
+ nodes.problematic,
+ addnodes.not_smartquotable,
+)
+
+
+def is_smartquotable(node: Node) -> bool:
+ """Check whether the node is smart-quotable or not."""
+ for pnode in traverse_parent(node.parent):
+ if isinstance(pnode, NON_SMARTQUOTABLE_PARENT_NODES):
+ return False
+ if pnode.get('support_smartquotes', None) is False:
+ return False
+
+ if getattr(node, 'support_smartquotes', None) is False:
+ return False
+
+ return True
+
+
+def process_only_nodes(document: Node, tags: Tags) -> None:
+ """Filter ``only`` nodes which do not match *tags*."""
+ for node in document.findall(addnodes.only):
+ if _only_node_keep_children(node, tags):
+ node.replace_self(node.children or nodes.comment())
+ else:
+ # A comment on the comment() nodes being inserted: replacing by [] would
+ # result in a "Losing ids" exception if there is a target node before
+ # the only node, so we make sure docutils can transfer the id to
+ # something, even if it's just a comment and will lose the id anyway...
+ node.replace_self(nodes.comment())
+
+
+def _only_node_keep_children(node: addnodes.only, tags: Tags) -> bool:
+ """Keep children if tags match or error."""
+ try:
+ return tags.eval_condition(node['expr'])
+ except Exception as err:
+ logger.warning(
+ __('exception while evaluating only directive expression: %s'),
+ err,
+ location=node)
+ return True
+
+
+def _copy_except__document(el: Element) -> Element:
+ """Monkey-patch ```nodes.Element.copy``` to not copy the ``_document``
+ attribute.
+
+ xref: https://github.com/sphinx-doc/sphinx/issues/11116#issuecomment-1376767086
+ """
+ newnode = object.__new__(el.__class__)
+ # set in Element.__init__()
+ newnode.children = []
+ newnode.rawsource = el.rawsource
+ newnode.tagname = el.tagname
+ # copied in Element.copy()
+ newnode.attributes = {k: (v
+ if k not in {'ids', 'classes', 'names', 'dupnames', 'backrefs'}
+ else v[:])
+ for k, v in el.attributes.items()}
+ newnode.line = el.line
+ newnode.source = el.source
+ return newnode
+
+
+nodes.Element.copy = _copy_except__document # type: ignore[assignment]
+
+
+def _deepcopy(el: Element) -> Element:
+ """Monkey-patch ```nodes.Element.deepcopy``` for speed."""
+ newnode = el.copy()
+ newnode.children = [child.deepcopy() for child in el.children]
+ for child in newnode.children:
+ child.parent = newnode
+ if el.document:
+ child.document = el.document
+ if child.source is None:
+ child.source = el.document.current_source
+ if child.line is None:
+ child.line = el.document.current_line
+ return newnode
+
+
+nodes.Element.deepcopy = _deepcopy # type: ignore[assignment]