summaryrefslogtreecommitdiffstats
path: root/sphinx/transforms
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/transforms')
-rw-r--r--sphinx/transforms/__init__.py516
-rw-r--r--sphinx/transforms/compact_bullet_list.py91
-rw-r--r--sphinx/transforms/i18n.py624
-rw-r--r--sphinx/transforms/post_transforms/__init__.py297
-rw-r--r--sphinx/transforms/post_transforms/code.py139
-rw-r--r--sphinx/transforms/post_transforms/images.py280
-rw-r--r--sphinx/transforms/references.py47
7 files changed, 1994 insertions, 0 deletions
diff --git a/sphinx/transforms/__init__.py b/sphinx/transforms/__init__.py
new file mode 100644
index 0000000..8a806cd
--- /dev/null
+++ b/sphinx/transforms/__init__.py
@@ -0,0 +1,516 @@
+"""Docutils transforms used by Sphinx when reading documents."""
+
+from __future__ import annotations
+
+import re
+import unicodedata
+from typing import TYPE_CHECKING, Any, cast
+
+from docutils import nodes
+from docutils.transforms import Transform, Transformer
+from docutils.transforms.parts import ContentsFilter
+from docutils.transforms.universal import SmartQuotes
+from docutils.utils import normalize_language_tag
+from docutils.utils.smartquotes import smartchars
+
+from sphinx import addnodes
+from sphinx.locale import _, __
+from sphinx.util import logging
+from sphinx.util.docutils import new_document
+from sphinx.util.i18n import format_date
+from sphinx.util.nodes import apply_source_workaround, is_smartquotable
+
+if TYPE_CHECKING:
+ from collections.abc import Generator
+
+ from docutils.nodes import Node, Text
+
+ from sphinx.application import Sphinx
+ from sphinx.config import Config
+ from sphinx.domains.std import StandardDomain
+ from sphinx.environment import BuildEnvironment
+
+
+logger = logging.getLogger(__name__)
+
+default_substitutions = {
+ 'version',
+ 'release',
+ 'today',
+ 'translation progress',
+}
+
+
+class SphinxTransform(Transform):
+ """A base class of Transforms.
+
+ Compared with ``docutils.transforms.Transform``, this class improves accessibility to
+ Sphinx APIs.
+ """
+
+ @property
+ def app(self) -> Sphinx:
+ """Reference to the :class:`.Sphinx` object."""
+ return self.env.app
+
+ @property
+ def env(self) -> BuildEnvironment:
+ """Reference to the :class:`.BuildEnvironment` object."""
+ return self.document.settings.env
+
+ @property
+ def config(self) -> Config:
+ """Reference to the :class:`.Config` object."""
+ return self.env.config
+
+
+class SphinxTransformer(Transformer):
+ """
+ A transformer for Sphinx.
+ """
+
+ document: nodes.document
+ env: BuildEnvironment | None = None
+
+ def set_environment(self, env: BuildEnvironment) -> None:
+ self.env = env
+
+ def apply_transforms(self) -> None:
+ if isinstance(self.document, nodes.document):
+ if not hasattr(self.document.settings, 'env') and self.env:
+ self.document.settings.env = self.env
+
+ super().apply_transforms()
+ else:
+ # wrap the target node by document node during transforming
+ try:
+ document = new_document('')
+ if self.env:
+ document.settings.env = self.env
+ document += self.document
+ self.document = document
+ super().apply_transforms()
+ finally:
+ self.document = self.document[0]
+
+
+class DefaultSubstitutions(SphinxTransform):
+ """
+ Replace some substitutions if they aren't defined in the document.
+ """
+ # run before the default Substitutions
+ default_priority = 210
+
+ def apply(self, **kwargs: Any) -> None:
+ # only handle those not otherwise defined in the document
+ to_handle = default_substitutions - set(self.document.substitution_defs)
+ for ref in self.document.findall(nodes.substitution_reference):
+ refname = ref['refname']
+ if refname in to_handle:
+ if refname == 'translation progress':
+ # special handling: calculate translation progress
+ text = _calculate_translation_progress(self.document)
+ else:
+ text = self.config[refname]
+ if refname == 'today' and not text:
+ # special handling: can also specify a strftime format
+ text = format_date(self.config.today_fmt or _('%b %d, %Y'),
+ language=self.config.language)
+ ref.replace_self(nodes.Text(text))
+
+
+def _calculate_translation_progress(document: nodes.document) -> str:
+ try:
+ translation_progress = document['translation_progress']
+ except KeyError:
+ return _('could not calculate translation progress!')
+
+ total = translation_progress['total']
+ translated = translation_progress['translated']
+ if total <= 0:
+ return _('no translated elements!')
+ return f'{translated / total:.2%}'
+
+
+class MoveModuleTargets(SphinxTransform):
+ """
+ Move module targets that are the first thing in a section to the section
+ title.
+
+ XXX Python specific
+ """
+ default_priority = 210
+
+ def apply(self, **kwargs: Any) -> None:
+ for node in list(self.document.findall(nodes.target)):
+ if not node['ids']:
+ continue
+ if (
+ 'ismod' in node
+ and type(node.parent) is nodes.section
+ # index 0: section title node
+ # index 1: index node
+ # index 2: target node
+ and node.parent.index(node) == 2
+ ):
+ node.parent['ids'][0:0] = node['ids']
+ node.parent.remove(node)
+
+
+class HandleCodeBlocks(SphinxTransform):
+ """
+ Several code block related transformations.
+ """
+ default_priority = 210
+
+ def apply(self, **kwargs: Any) -> None:
+ # move doctest blocks out of blockquotes
+ for node in self.document.findall(nodes.block_quote):
+ if all(isinstance(child, nodes.doctest_block) for child
+ in node.children):
+ node.replace_self(node.children)
+ # combine successive doctest blocks
+ # for node in self.document.findall(nodes.doctest_block):
+ # if node not in node.parent.children:
+ # continue
+ # parindex = node.parent.index(node)
+ # while len(node.parent) > parindex+1 and \
+ # isinstance(node.parent[parindex+1], nodes.doctest_block):
+ # node[0] = nodes.Text(node[0] + '\n\n' +
+ # node.parent[parindex+1][0])
+ # del node.parent[parindex+1]
+
+
+class AutoNumbering(SphinxTransform):
+ """
+ Register IDs of tables, figures and literal_blocks to assign numbers.
+ """
+ default_priority = 210
+
+ def apply(self, **kwargs: Any) -> None:
+ domain: StandardDomain = self.env.domains['std']
+
+ for node in self.document.findall(nodes.Element):
+ if (domain.is_enumerable_node(node) and
+ domain.get_numfig_title(node) is not None and
+ node['ids'] == []):
+ self.document.note_implicit_target(node)
+
+
+class SortIds(SphinxTransform):
+ """
+ Sort section IDs so that the "id[0-9]+" one comes last.
+ """
+ default_priority = 261
+
+ def apply(self, **kwargs: Any) -> None:
+ for node in self.document.findall(nodes.section):
+ if len(node['ids']) > 1 and node['ids'][0].startswith('id'):
+ node['ids'] = node['ids'][1:] + [node['ids'][0]]
+
+
+TRANSLATABLE_NODES = {
+ 'literal-block': nodes.literal_block,
+ 'doctest-block': nodes.doctest_block,
+ 'raw': nodes.raw,
+ 'index': addnodes.index,
+ 'image': nodes.image,
+}
+
+
+class ApplySourceWorkaround(SphinxTransform):
+ """
+ Update source and rawsource attributes
+ """
+ default_priority = 10
+
+ def apply(self, **kwargs: Any) -> None:
+ for node in self.document.findall(): # type: Node
+ if isinstance(node, (nodes.TextElement, nodes.image, nodes.topic)):
+ apply_source_workaround(node)
+
+
+class AutoIndexUpgrader(SphinxTransform):
+ """
+ Detect old style (4 column based indices) and automatically upgrade to new style.
+ """
+ default_priority = 210
+
+ def apply(self, **kwargs: Any) -> None:
+ for node in self.document.findall(addnodes.index):
+ if 'entries' in node and any(len(entry) == 4 for entry in node['entries']):
+ msg = __('4 column based index found. '
+ 'It might be a bug of extensions you use: %r') % node['entries']
+ logger.warning(msg, location=node)
+ for i, entry in enumerate(node['entries']):
+ if len(entry) == 4:
+ node['entries'][i] = entry + (None,)
+
+
+class ExtraTranslatableNodes(SphinxTransform):
+ """
+ Make nodes translatable
+ """
+ default_priority = 10
+
+ def apply(self, **kwargs: Any) -> None:
+ targets = self.config.gettext_additional_targets
+ target_nodes = [v for k, v in TRANSLATABLE_NODES.items() if k in targets]
+ if not target_nodes:
+ return
+
+ def is_translatable_node(node: Node) -> bool:
+ return isinstance(node, tuple(target_nodes))
+
+ for node in self.document.findall(is_translatable_node): # type: nodes.Element
+ node['translatable'] = True
+
+
+class UnreferencedFootnotesDetector(SphinxTransform):
+ """
+ Detect unreferenced footnotes and emit warnings
+ """
+ default_priority = 200
+
+ def apply(self, **kwargs: Any) -> None:
+ for node in self.document.footnotes:
+ if node['names'] == []:
+ # footnote having duplicated number. It is already warned at parser.
+ pass
+ elif node['names'][0] not in self.document.footnote_refs:
+ logger.warning(__('Footnote [%s] is not referenced.'), node['names'][0],
+ type='ref', subtype='footnote',
+ location=node)
+
+ for node in self.document.autofootnotes:
+ if not any(ref['auto'] == node['auto'] for ref in self.document.autofootnote_refs):
+ logger.warning(__('Footnote [#] is not referenced.'),
+ type='ref', subtype='footnote',
+ location=node)
+
+
+class DoctestTransform(SphinxTransform):
+ """Set "doctest" style to each doctest_block node"""
+ default_priority = 500
+
+ def apply(self, **kwargs: Any) -> None:
+ for node in self.document.findall(nodes.doctest_block):
+ node['classes'].append('doctest')
+
+
+class FilterSystemMessages(SphinxTransform):
+ """Filter system messages from a doctree."""
+ default_priority = 999
+
+ def apply(self, **kwargs: Any) -> None:
+ filterlevel = 2 if self.config.keep_warnings else 5
+ for node in list(self.document.findall(nodes.system_message)):
+ if node['level'] < filterlevel:
+ logger.debug('%s [filtered system message]', node.astext())
+ node.parent.remove(node)
+
+
+class SphinxContentsFilter(ContentsFilter):
+ """
+ Used with BuildEnvironment.add_toc_from() to discard cross-file links
+ within table-of-contents link nodes.
+ """
+ visit_pending_xref = ContentsFilter.ignore_node_but_process_children
+
+ def visit_image(self, node: nodes.image) -> None:
+ raise nodes.SkipNode
+
+
+class SphinxSmartQuotes(SmartQuotes, SphinxTransform):
+ """
+ Customized SmartQuotes to avoid transform for some extra node types.
+
+ refs: sphinx.parsers.RSTParser
+ """
+ default_priority = 750
+
+ def apply(self, **kwargs: Any) -> None:
+ if not self.is_available():
+ return
+
+ # override default settings with :confval:`smartquotes_action`
+ self.smartquotes_action = self.config.smartquotes_action
+
+ super().apply()
+
+ def is_available(self) -> bool:
+ builders = self.config.smartquotes_excludes.get('builders', [])
+ languages = self.config.smartquotes_excludes.get('languages', [])
+
+ if self.document.settings.smart_quotes is False:
+ # disabled by 3rd party extension (workaround)
+ return False
+ if self.config.smartquotes is False:
+ # disabled by confval smartquotes
+ return False
+ if self.app.builder.name in builders:
+ # disabled by confval smartquotes_excludes['builders']
+ return False
+ if self.config.language in languages:
+ # disabled by confval smartquotes_excludes['languages']
+ return False
+
+ # confirm selected language supports smart_quotes or not
+ language = self.env.settings['language_code']
+ return any(
+ tag in smartchars.quotes
+ for tag in normalize_language_tag(language)
+ )
+
+ def get_tokens(self, txtnodes: list[Text]) -> Generator[tuple[str, str], None, None]:
+ # A generator that yields ``(texttype, nodetext)`` tuples for a list
+ # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
+ for txtnode in txtnodes:
+ if is_smartquotable(txtnode):
+ # SmartQuotes uses backslash escapes instead of null-escapes
+ text = re.sub(r'(?<=\x00)([-\\\'".`])', r'\\\1', str(txtnode))
+ yield 'plain', text
+ else:
+ # skip smart quotes
+ yield 'literal', txtnode.astext()
+
+
+class DoctreeReadEvent(SphinxTransform):
+ """Emit :event:`doctree-read` event."""
+ default_priority = 880
+
+ def apply(self, **kwargs: Any) -> None:
+ self.app.emit('doctree-read', self.document)
+
+
+class ManpageLink(SphinxTransform):
+ """Find manpage section numbers and names"""
+ default_priority = 999
+
+ def apply(self, **kwargs: Any) -> None:
+ for node in self.document.findall(addnodes.manpage):
+ manpage = ' '.join([str(x) for x in node.children
+ if isinstance(x, nodes.Text)])
+ pattern = r'^(?P<path>(?P<page>.+)[\(\.](?P<section>[1-9]\w*)?\)?)$'
+ info = {'path': manpage,
+ 'page': manpage,
+ 'section': ''}
+ r = re.match(pattern, manpage)
+ if r:
+ info = r.groupdict()
+ node.attributes.update(info)
+
+
+class GlossarySorter(SphinxTransform):
+ """Sort glossaries that have the ``sorted`` flag."""
+ # This must be done after i18n, therefore not right
+ # away in the glossary directive.
+ default_priority = 500
+
+ def apply(self, **kwargs: Any) -> None:
+ for glossary in self.document.findall(addnodes.glossary):
+ if glossary["sorted"]:
+ definition_list = cast(nodes.definition_list, glossary[0])
+ definition_list[:] = sorted(
+ definition_list,
+ key=lambda item: unicodedata.normalize(
+ 'NFD',
+ cast(nodes.term, item)[0].astext().lower()),
+ )
+
+
+class ReorderConsecutiveTargetAndIndexNodes(SphinxTransform):
+ """Index nodes interspersed between target nodes prevent other
+ Transformations from combining those target nodes,
+ e.g. ``PropagateTargets``. This transformation reorders them:
+
+ Given the following ``document`` as input::
+
+ <document>
+ <target ids="id1" ...>
+ <index entries="...1...">
+ <target ids="id2" ...>
+ <target ids="id3" ...>
+ <index entries="...2...">
+ <target ids="id4" ...>
+
+ The transformed result will be::
+
+ <document>
+ <index entries="...1...">
+ <index entries="...2...">
+ <target ids="id1" ...>
+ <target ids="id2" ...>
+ <target ids="id3" ...>
+ <target ids="id4" ...>
+ """
+
+ # This transform MUST run before ``PropagateTargets``.
+ default_priority = 220
+
+ def apply(self, **kwargs: Any) -> None:
+ for target in self.document.findall(nodes.target):
+ _reorder_index_target_nodes(target)
+
+
+def _reorder_index_target_nodes(start_node: nodes.target) -> None:
+ """Sort target and index nodes.
+
+ Find all consecutive target and index nodes starting from ``start_node``,
+ and move all index nodes to before the first target node.
+ """
+ nodes_to_reorder: list[nodes.target | addnodes.index] = []
+
+ # Note that we cannot use 'condition' to filter,
+ # as we want *consecutive* target & index nodes.
+ node: nodes.Node
+ for node in start_node.findall(descend=False, siblings=True):
+ if isinstance(node, (nodes.target, addnodes.index)):
+ nodes_to_reorder.append(node)
+ continue
+ break # must be a consecutive run of target or index nodes
+
+ if len(nodes_to_reorder) < 2:
+ return # Nothing to reorder
+
+ parent = nodes_to_reorder[0].parent
+ if parent == nodes_to_reorder[-1].parent:
+ first_idx = parent.index(nodes_to_reorder[0])
+ last_idx = parent.index(nodes_to_reorder[-1])
+ if first_idx + len(nodes_to_reorder) - 1 == last_idx:
+ parent[first_idx:last_idx + 1] = sorted(nodes_to_reorder, key=_sort_key)
+
+
+def _sort_key(node: nodes.Node) -> int:
+ # Must be a stable sort.
+ if isinstance(node, addnodes.index):
+ return 0
+ if isinstance(node, nodes.target):
+ return 1
+ msg = f'_sort_key called with unexpected node type {type(node)!r}'
+ raise ValueError(msg)
+
+
+def setup(app: Sphinx) -> dict[str, Any]:
+ app.add_transform(ApplySourceWorkaround)
+ app.add_transform(ExtraTranslatableNodes)
+ app.add_transform(DefaultSubstitutions)
+ app.add_transform(MoveModuleTargets)
+ app.add_transform(HandleCodeBlocks)
+ app.add_transform(SortIds)
+ app.add_transform(DoctestTransform)
+ app.add_transform(AutoNumbering)
+ app.add_transform(AutoIndexUpgrader)
+ app.add_transform(FilterSystemMessages)
+ app.add_transform(UnreferencedFootnotesDetector)
+ app.add_transform(SphinxSmartQuotes)
+ app.add_transform(DoctreeReadEvent)
+ app.add_transform(ManpageLink)
+ app.add_transform(GlossarySorter)
+ app.add_transform(ReorderConsecutiveTargetAndIndexNodes)
+
+ return {
+ 'version': 'builtin',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }
diff --git a/sphinx/transforms/compact_bullet_list.py b/sphinx/transforms/compact_bullet_list.py
new file mode 100644
index 0000000..149b5e0
--- /dev/null
+++ b/sphinx/transforms/compact_bullet_list.py
@@ -0,0 +1,91 @@
+"""Docutils transforms used by Sphinx when reading documents."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, cast
+
+from docutils import nodes
+
+from sphinx import addnodes
+from sphinx.transforms import SphinxTransform
+
+if TYPE_CHECKING:
+ from docutils.nodes import Node
+
+ from sphinx.application import Sphinx
+
+
+class RefOnlyListChecker(nodes.GenericNodeVisitor):
+ """Raise `nodes.NodeFound` if non-simple list item is encountered.
+
+ Here 'simple' means a list item containing only a paragraph with a
+ single reference in it.
+ """
+
+ def default_visit(self, node: Node) -> None:
+ raise nodes.NodeFound
+
+ def visit_bullet_list(self, node: nodes.bullet_list) -> None:
+ pass
+
+ def visit_list_item(self, node: nodes.list_item) -> None:
+ children: list[Node] = []
+ for child in node.children:
+ if not isinstance(child, nodes.Invisible):
+ children.append(child)
+ if len(children) != 1:
+ raise nodes.NodeFound
+ if not isinstance(children[0], nodes.paragraph):
+ raise nodes.NodeFound
+ para = children[0]
+ if len(para) != 1:
+ raise nodes.NodeFound
+ if not isinstance(para[0], addnodes.pending_xref):
+ raise nodes.NodeFound
+ raise nodes.SkipChildren
+
+ def invisible_visit(self, node: Node) -> None:
+ """Invisible nodes should be ignored."""
+ pass
+
+
+class RefOnlyBulletListTransform(SphinxTransform):
+ """Change refonly bullet lists to use compact_paragraphs.
+
+ Specifically implemented for 'Indices and Tables' section, which looks
+ odd when html_compact_lists is false.
+ """
+ default_priority = 100
+
+ def apply(self, **kwargs: Any) -> None:
+ if self.config.html_compact_lists:
+ return
+
+ def check_refonly_list(node: Node) -> bool:
+ """Check for list with only references in it."""
+ visitor = RefOnlyListChecker(self.document)
+ try:
+ node.walk(visitor)
+ except nodes.NodeFound:
+ return False
+ else:
+ return True
+
+ for node in self.document.findall(nodes.bullet_list):
+ if check_refonly_list(node):
+ for item in node.findall(nodes.list_item):
+ para = cast(nodes.paragraph, item[0])
+ ref = cast(nodes.reference, para[0])
+ compact_para = addnodes.compact_paragraph()
+ compact_para += ref
+ item.replace(para, compact_para)
+
+
+def setup(app: Sphinx) -> dict[str, Any]:
+ app.add_transform(RefOnlyBulletListTransform)
+
+ return {
+ 'version': 'builtin',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }
diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py
new file mode 100644
index 0000000..d26c279
--- /dev/null
+++ b/sphinx/transforms/i18n.py
@@ -0,0 +1,624 @@
+"""Docutils transforms used by Sphinx when reading documents."""
+
+from __future__ import annotations
+
+import contextlib
+from os import path
+from re import DOTALL, match
+from textwrap import indent
+from typing import TYPE_CHECKING, Any, TypeVar
+
+from docutils import nodes
+from docutils.io import StringInput
+
+from sphinx import addnodes
+from sphinx.domains.std import make_glossary_term, split_term_classifiers
+from sphinx.errors import ConfigError
+from sphinx.locale import __
+from sphinx.locale import init as init_locale
+from sphinx.transforms import SphinxTransform
+from sphinx.util import get_filetype, logging
+from sphinx.util.i18n import docname_to_domain
+from sphinx.util.index_entries import split_index_msg
+from sphinx.util.nodes import (
+ IMAGE_TYPE_NODES,
+ LITERAL_TYPE_NODES,
+ NodeMatcher,
+ extract_messages,
+ traverse_translatable_index,
+)
+
+if TYPE_CHECKING:
+ from collections.abc import Sequence
+
+ from sphinx.application import Sphinx
+ from sphinx.config import Config
+
+
+logger = logging.getLogger(__name__)
+
+# The attributes not copied to the translated node
+#
+# * refexplict: For allow to give (or not to give) an explicit title
+# to the pending_xref on translation
+EXCLUDED_PENDING_XREF_ATTRIBUTES = ('refexplicit',)
+
+
+N = TypeVar('N', bound=nodes.Node)
+
+
+def publish_msgstr(app: Sphinx, source: str, source_path: str, source_line: int,
+ config: Config, settings: Any) -> nodes.Element:
+ """Publish msgstr (single line) into docutils document
+
+ :param sphinx.application.Sphinx app: sphinx application
+ :param str source: source text
+ :param str source_path: source path for warning indication
+ :param source_line: source line for warning indication
+ :param sphinx.config.Config config: sphinx config
+ :param docutils.frontend.Values settings: docutils settings
+ :return: document
+ :rtype: docutils.nodes.document
+ """
+ try:
+ # clear rst_prolog temporarily
+ rst_prolog = config.rst_prolog
+ config.rst_prolog = None # type: ignore[attr-defined]
+
+ from sphinx.io import SphinxI18nReader
+ reader = SphinxI18nReader()
+ reader.setup(app)
+ filetype = get_filetype(config.source_suffix, source_path)
+ parser = app.registry.create_source_parser(app, filetype)
+ doc = reader.read(
+ source=StringInput(source=source,
+ source_path=f"{source_path}:{source_line}:<translated>"),
+ parser=parser,
+ settings=settings,
+ )
+ with contextlib.suppress(IndexError): # empty node
+ return doc[0] # type: ignore[return-value]
+ return doc
+ finally:
+ config.rst_prolog = rst_prolog # type: ignore[attr-defined]
+
+
+def parse_noqa(source: str) -> tuple[str, bool]:
+ m = match(r"(.*)(?<!\\)#\s*noqa\s*$", source, DOTALL)
+ if m:
+ return m.group(1), True
+ else:
+ return source, False
+
+
+class PreserveTranslatableMessages(SphinxTransform):
+ """
+ Preserve original translatable messages before translation
+ """
+ default_priority = 10 # this MUST be invoked before Locale transform
+
+ def apply(self, **kwargs: Any) -> None:
+ for node in self.document.findall(addnodes.translatable):
+ node.preserve_original_messages()
+
+
+class _NodeUpdater:
+ """Contains logic for updating one node with the translated content."""
+
+ def __init__(
+ self, node: nodes.Element, patch: nodes.Element, document: nodes.document, noqa: bool,
+ ) -> None:
+ self.node: nodes.Element = node
+ self.patch: nodes.Element = patch
+ self.document: nodes.document = document
+ self.noqa: bool = noqa
+
+ def compare_references(self, old_refs: Sequence[nodes.Element],
+ new_refs: Sequence[nodes.Element],
+ warning_msg: str) -> None:
+ """Warn about mismatches between references in original and translated content."""
+ # FIXME: could use a smarter strategy than len(old_refs) == len(new_refs)
+ if not self.noqa and len(old_refs) != len(new_refs):
+ old_ref_rawsources = [ref.rawsource for ref in old_refs]
+ new_ref_rawsources = [ref.rawsource for ref in new_refs]
+ logger.warning(warning_msg.format(old_ref_rawsources, new_ref_rawsources),
+ location=self.node, type='i18n', subtype='inconsistent_references')
+
+ def update_title_mapping(self) -> bool:
+ processed = False # skip flag
+
+ # update title(section) target name-id mapping
+ if isinstance(self.node, nodes.title) and isinstance(self.node.parent, nodes.section):
+ section_node = self.node.parent
+ new_name = nodes.fully_normalize_name(self.patch.astext())
+ old_name = nodes.fully_normalize_name(self.node.astext())
+
+ if old_name != new_name:
+ # if name would be changed, replace node names and
+ # document nameids mapping with new name.
+ names = section_node.setdefault('names', [])
+ names.append(new_name)
+ # Original section name (reference target name) should be kept to refer
+ # from other nodes which is still not translated or uses explicit target
+ # name like "`text to display <explicit target name_>`_"..
+ # So, `old_name` is still exist in `names`.
+
+ _id = self.document.nameids.get(old_name, None)
+ explicit = self.document.nametypes.get(old_name, None)
+
+ # * if explicit: _id is label. title node need another id.
+ # * if not explicit:
+ #
+ # * if _id is None:
+ #
+ # _id is None means:
+ #
+ # 1. _id was not provided yet.
+ #
+ # 2. _id was duplicated.
+ #
+ # old_name entry still exists in nameids and
+ # nametypes for another duplicated entry.
+ #
+ # * if _id is provided: below process
+ if _id:
+ if not explicit:
+ # _id was not duplicated.
+ # remove old_name entry from document ids database
+ # to reuse original _id.
+ self.document.nameids.pop(old_name, None)
+ self.document.nametypes.pop(old_name, None)
+ self.document.ids.pop(_id, None)
+
+ # re-entry with new named section node.
+ #
+ # Note: msgnode that is a second parameter of the
+ # `note_implicit_target` is not necessary here because
+ # section_node has been noted previously on rst parsing by
+ # `docutils.parsers.rst.states.RSTState.new_subsection()`
+ # and already has `system_message` if needed.
+ self.document.note_implicit_target(section_node)
+
+ # replace target's refname to new target name
+ matcher = NodeMatcher(nodes.target, refname=old_name)
+ for old_target in self.document.findall(matcher): # type: nodes.target
+ old_target['refname'] = new_name
+
+ processed = True
+
+ return processed
+
+ def update_autofootnote_references(self) -> None:
+ # auto-numbered foot note reference should use original 'ids'.
+ def list_replace_or_append(lst: list[N], old: N, new: N) -> None:
+ if old in lst:
+ lst[lst.index(old)] = new
+ else:
+ lst.append(new)
+
+ is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any)
+ old_foot_refs: list[nodes.footnote_reference] = [
+ *self.node.findall(is_autofootnote_ref)]
+ new_foot_refs: list[nodes.footnote_reference] = [
+ *self.patch.findall(is_autofootnote_ref)]
+ self.compare_references(old_foot_refs, new_foot_refs,
+ __('inconsistent footnote references in translated message.' +
+ ' original: {0}, translated: {1}'))
+ old_foot_namerefs: dict[str, list[nodes.footnote_reference]] = {}
+ for r in old_foot_refs:
+ old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
+ for newf in new_foot_refs:
+ refname = newf.get('refname')
+ refs = old_foot_namerefs.get(refname, [])
+ if not refs:
+ newf.parent.remove(newf)
+ continue
+
+ oldf = refs.pop(0)
+ newf['ids'] = oldf['ids']
+ for id in newf['ids']:
+ self.document.ids[id] = newf
+
+ if newf['auto'] == 1:
+ # autofootnote_refs
+ list_replace_or_append(self.document.autofootnote_refs, oldf, newf)
+ else:
+ # symbol_footnote_refs
+ list_replace_or_append(self.document.symbol_footnote_refs, oldf, newf)
+
+ if refname:
+ footnote_refs = self.document.footnote_refs.setdefault(refname, [])
+ list_replace_or_append(footnote_refs, oldf, newf)
+
+ refnames = self.document.refnames.setdefault(refname, [])
+ list_replace_or_append(refnames, oldf, newf)
+
+ def update_refnamed_references(self) -> None:
+ # reference should use new (translated) 'refname'.
+ # * reference target ".. _Python: ..." is not translatable.
+ # * use translated refname for section refname.
+ # * inline reference "`Python <...>`_" has no 'refname'.
+ is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any)
+ old_refs: list[nodes.reference] = [*self.node.findall(is_refnamed_ref)]
+ new_refs: list[nodes.reference] = [*self.patch.findall(is_refnamed_ref)]
+ self.compare_references(old_refs, new_refs,
+ __('inconsistent references in translated message.' +
+ ' original: {0}, translated: {1}'))
+ old_ref_names = [r['refname'] for r in old_refs]
+ new_ref_names = [r['refname'] for r in new_refs]
+ orphans = [*({*old_ref_names} - {*new_ref_names})]
+ for newr in new_refs:
+ if not self.document.has_name(newr['refname']):
+ # Maybe refname is translated but target is not translated.
+ # Note: multiple translated refnames break link ordering.
+ if orphans:
+ newr['refname'] = orphans.pop(0)
+ else:
+ # orphan refnames is already empty!
+ # reference number is same in new_refs and old_refs.
+ pass
+
+ self.document.note_refname(newr)
+
+ def update_refnamed_footnote_references(self) -> None:
+ # refnamed footnote should use original 'ids'.
+ is_refnamed_footnote_ref = NodeMatcher(nodes.footnote_reference, refname=Any)
+ old_foot_refs: list[nodes.footnote_reference] = [*self.node.findall(
+ is_refnamed_footnote_ref)]
+ new_foot_refs: list[nodes.footnote_reference] = [*self.patch.findall(
+ is_refnamed_footnote_ref)]
+ refname_ids_map: dict[str, list[str]] = {}
+ self.compare_references(old_foot_refs, new_foot_refs,
+ __('inconsistent footnote references in translated message.' +
+ ' original: {0}, translated: {1}'))
+ for oldf in old_foot_refs:
+ refname_ids_map.setdefault(oldf["refname"], []).append(oldf["ids"])
+ for newf in new_foot_refs:
+ refname = newf["refname"]
+ if refname_ids_map.get(refname):
+ newf["ids"] = refname_ids_map[refname].pop(0)
+
+ def update_citation_references(self) -> None:
+ # citation should use original 'ids'.
+ is_citation_ref = NodeMatcher(nodes.citation_reference, refname=Any)
+ old_cite_refs: list[nodes.citation_reference] = [*self.node.findall(is_citation_ref)]
+ new_cite_refs: list[nodes.citation_reference] = [*self.patch.findall(is_citation_ref)]
+ self.compare_references(old_cite_refs, new_cite_refs,
+ __('inconsistent citation references in translated message.' +
+ ' original: {0}, translated: {1}'))
+ refname_ids_map: dict[str, list[str]] = {}
+ for oldc in old_cite_refs:
+ refname_ids_map.setdefault(oldc["refname"], []).append(oldc["ids"])
+ for newc in new_cite_refs:
+ refname = newc["refname"]
+ if refname_ids_map.get(refname):
+ newc["ids"] = refname_ids_map[refname].pop()
+
+ def update_pending_xrefs(self) -> None:
+ # Original pending_xref['reftarget'] contain not-translated
+ # target name, new pending_xref must use original one.
+ # This code restricts to change ref-targets in the translation.
+ old_xrefs = [*self.node.findall(addnodes.pending_xref)]
+ new_xrefs = [*self.patch.findall(addnodes.pending_xref)]
+ self.compare_references(old_xrefs, new_xrefs,
+ __('inconsistent term references in translated message.' +
+ ' original: {0}, translated: {1}'))
+
+ xref_reftarget_map: dict[tuple[str, str, str] | None, dict[str, Any]] = {}
+
+ def get_ref_key(node: addnodes.pending_xref) -> tuple[str, str, str] | None:
+ case = node["refdomain"], node["reftype"]
+ if case == ('std', 'term'):
+ return None
+ else:
+ return (
+ node["refdomain"],
+ node["reftype"],
+ node['reftarget'],
+ )
+
+ for old in old_xrefs:
+ key = get_ref_key(old)
+ if key:
+ xref_reftarget_map[key] = old.attributes
+ for new in new_xrefs:
+ key = get_ref_key(new)
+ # Copy attributes to keep original node behavior. Especially
+ # copying 'reftarget', 'py:module', 'py:class' are needed.
+ for k, v in xref_reftarget_map.get(key, {}).items():
+ if k not in EXCLUDED_PENDING_XREF_ATTRIBUTES:
+ new[k] = v
+
+ def update_leaves(self) -> None:
+ for child in self.patch.children:
+ child.parent = self.node
+ self.node.children = self.patch.children
+
+
+class Locale(SphinxTransform):
+ """
+ Replace translatable nodes with their translated doctree.
+ """
+ default_priority = 20
+
+ def apply(self, **kwargs: Any) -> None:
+ settings, source = self.document.settings, self.document['source']
+ msgstr = ''
+
+ textdomain = docname_to_domain(self.env.docname, self.config.gettext_compact)
+
+ # fetch translations
+ dirs = [path.join(self.env.srcdir, directory)
+ for directory in self.config.locale_dirs]
+ catalog, has_catalog = init_locale(dirs, self.config.language, textdomain)
+ if not has_catalog:
+ return
+
+ catalogues = [getattr(catalog, '_catalog', None)]
+ while (catalog := catalog._fallback) is not None: # type: ignore[attr-defined]
+ catalogues.append(getattr(catalog, '_catalog', None))
+ merged: dict[str, str] = {}
+ for catalogue in filter(None, reversed(catalogues)): # type: dict[str, str]
+ merged |= catalogue
+
+ # phase1: replace reference ids with translated names
+ for node, msg in extract_messages(self.document):
+ msgstr = merged.get(msg, '')
+
+ # There is no point in having #noqa on literal blocks because
+ # they cannot contain references. Recognizing it would just
+ # completely prevent escaping the #noqa. Outside of literal
+ # blocks, one can always write \#noqa.
+ if not isinstance(node, LITERAL_TYPE_NODES):
+ msgstr, _ = parse_noqa(msgstr)
+
+ if msgstr.strip() == '':
+ # as-of-yet untranslated
+ node['translated'] = False
+ continue
+ if msgstr == msg:
+ # identical source and translated messages
+ node['translated'] = True
+ continue
+
+ # Avoid "Literal block expected; none found." warnings.
+ # If msgstr ends with '::' then it cause warning message at
+ # parser.parse() processing.
+ # literal-block-warning is only appear in avobe case.
+ if msgstr.strip().endswith('::'):
+ msgstr += '\n\n dummy literal'
+ # dummy literal node will discard by 'patch = patch[0]'
+
+ # literalblock need literal block notation to avoid it become
+ # paragraph.
+ if isinstance(node, LITERAL_TYPE_NODES):
+ msgstr = '::\n\n' + indent(msgstr, ' ' * 3)
+
+ patch = publish_msgstr(self.app, msgstr, source,
+ node.line, self.config, settings)
+ # FIXME: no warnings about inconsistent references in this part
+ # XXX doctest and other block markup
+ if not isinstance(patch, nodes.paragraph):
+ continue # skip for now
+
+ updater = _NodeUpdater(node, patch, self.document, noqa=False)
+ processed = updater.update_title_mapping()
+
+ # glossary terms update refid
+ if isinstance(node, nodes.term):
+ for _id in node['ids']:
+ parts = split_term_classifiers(msgstr)
+ patch = publish_msgstr(
+ self.app, parts[0] or '', source, node.line, self.config, settings,
+ )
+ updater.patch = make_glossary_term(
+ self.env, patch, parts[1] or '', source, node.line, _id, self.document,
+ )
+ processed = True
+
+ # update leaves with processed nodes
+ if processed:
+ updater.update_leaves()
+ node['translated'] = True # to avoid double translation
+ else:
+ node['translated'] = False
+
+ # phase2: translation
+ for node, msg in extract_messages(self.document):
+ if node.setdefault('translated', False): # to avoid double translation
+ continue # skip if the node is already translated by phase1
+
+ msgstr = merged.get(msg, '')
+ noqa = False
+
+ # See above.
+ if not isinstance(node, LITERAL_TYPE_NODES):
+ msgstr, noqa = parse_noqa(msgstr)
+
+ if not msgstr or msgstr == msg: # as-of-yet untranslated
+ node['translated'] = False
+ continue
+
+ # update translatable nodes
+ if isinstance(node, addnodes.translatable):
+ node.apply_translated_message(msg, msgstr) # type: ignore[attr-defined]
+ continue
+
+ # update meta nodes
+ if isinstance(node, nodes.meta): # type: ignore[attr-defined]
+ node['content'] = msgstr
+ node['translated'] = True
+ continue
+
+ if isinstance(node, nodes.image) and node.get('alt') == msg:
+ node['alt'] = msgstr
+ continue
+
+ # Avoid "Literal block expected; none found." warnings.
+ # If msgstr ends with '::' then it cause warning message at
+ # parser.parse() processing.
+ # literal-block-warning is only appear in avobe case.
+ if msgstr.strip().endswith('::'):
+ msgstr += '\n\n dummy literal'
+ # dummy literal node will discard by 'patch = patch[0]'
+
+ # literalblock need literal block notation to avoid it become
+ # paragraph.
+ if isinstance(node, LITERAL_TYPE_NODES):
+ msgstr = '::\n\n' + indent(msgstr, ' ' * 3)
+
+ # Structural Subelements phase1
+ # There is a possibility that only the title node is created.
+ # see: https://docutils.sourceforge.io/docs/ref/doctree.html#structural-subelements
+ if isinstance(node, nodes.title):
+ # This generates: <section ...><title>msgstr</title></section>
+ msgstr = msgstr + '\n' + '=' * len(msgstr) * 2
+
+ patch = publish_msgstr(self.app, msgstr, source,
+ node.line, self.config, settings)
+ # Structural Subelements phase2
+ if isinstance(node, nodes.title):
+ # get <title> node that placed as a first child
+ patch = patch.next_node()
+
+ # ignore unexpected markups in translation message
+ unexpected: tuple[type[nodes.Element], ...] = (
+ nodes.paragraph, # expected form of translation
+ nodes.title, # generated by above "Subelements phase2"
+ )
+
+ # following types are expected if
+ # config.gettext_additional_targets is configured
+ unexpected += LITERAL_TYPE_NODES
+ unexpected += IMAGE_TYPE_NODES
+
+ if not isinstance(patch, unexpected):
+ continue # skip
+
+ updater = _NodeUpdater(node, patch, self.document, noqa)
+ updater.update_autofootnote_references()
+ updater.update_refnamed_references()
+ updater.update_refnamed_footnote_references()
+ updater.update_citation_references()
+ updater.update_pending_xrefs()
+ updater.update_leaves()
+
+ # for highlighting that expects .rawsource and .astext() are same.
+ if isinstance(node, LITERAL_TYPE_NODES):
+ node.rawsource = node.astext()
+
+ if isinstance(node, nodes.image) and node.get('alt') != msg:
+ node['uri'] = patch['uri']
+ node['translated'] = False
+ continue # do not mark translated
+
+ node['translated'] = True # to avoid double translation
+
+ if 'index' in self.config.gettext_additional_targets:
+ # Extract and translate messages for index entries.
+ for node, entries in traverse_translatable_index(self.document):
+ new_entries: list[tuple[str, str, str, str, str | None]] = []
+ for entry_type, value, target_id, main, _category_key in entries:
+ msg_parts = split_index_msg(entry_type, value)
+ msgstr_parts = []
+ for part in msg_parts:
+ msgstr = merged.get(part, '')
+ if not msgstr:
+ msgstr = part
+ msgstr_parts.append(msgstr)
+
+ new_entry = entry_type, ';'.join(msgstr_parts), target_id, main, None
+ new_entries.append(new_entry)
+
+ node['raw_entries'] = entries
+ node['entries'] = new_entries
+
+
+class TranslationProgressTotaliser(SphinxTransform):
+ """
+ Calculate the number of translated and untranslated nodes.
+ """
+ default_priority = 25 # MUST happen after Locale
+
+ def apply(self, **kwargs: Any) -> None:
+ from sphinx.builders.gettext import MessageCatalogBuilder
+ if isinstance(self.app.builder, MessageCatalogBuilder):
+ return
+
+ total = translated = 0
+ for node in self.document.findall(NodeMatcher(translated=Any)): # type: nodes.Element
+ total += 1
+ if node['translated']:
+ translated += 1
+
+ self.document['translation_progress'] = {
+ 'total': total,
+ 'translated': translated,
+ }
+
+
+class AddTranslationClasses(SphinxTransform):
+ """
+ Add ``translated`` or ``untranslated`` classes to indicate translation status.
+ """
+ default_priority = 950
+
+ def apply(self, **kwargs: Any) -> None:
+ from sphinx.builders.gettext import MessageCatalogBuilder
+ if isinstance(self.app.builder, MessageCatalogBuilder):
+ return
+
+ if not self.config.translation_progress_classes:
+ return
+
+ if self.config.translation_progress_classes is True:
+ add_translated = add_untranslated = True
+ elif self.config.translation_progress_classes == 'translated':
+ add_translated = True
+ add_untranslated = False
+ elif self.config.translation_progress_classes == 'untranslated':
+ add_translated = False
+ add_untranslated = True
+ else:
+ msg = ('translation_progress_classes must be '
+ 'True, False, "translated" or "untranslated"')
+ raise ConfigError(msg)
+
+ for node in self.document.findall(NodeMatcher(translated=Any)): # type: nodes.Element
+ if node['translated']:
+ if add_translated:
+ node.setdefault('classes', []).append('translated')
+ else:
+ if add_untranslated:
+ node.setdefault('classes', []).append('untranslated')
+
+
+class RemoveTranslatableInline(SphinxTransform):
+ """
+ Remove inline nodes used for translation as placeholders.
+ """
+ default_priority = 999
+
+ def apply(self, **kwargs: Any) -> None:
+ from sphinx.builders.gettext import MessageCatalogBuilder
+ if isinstance(self.app.builder, MessageCatalogBuilder):
+ return
+
+ matcher = NodeMatcher(nodes.inline, translatable=Any)
+ for inline in list(self.document.findall(matcher)): # type: nodes.inline
+ inline.parent.remove(inline)
+ inline.parent += inline.children
+
+
+def setup(app: Sphinx) -> dict[str, Any]:
+ app.add_transform(PreserveTranslatableMessages)
+ app.add_transform(Locale)
+ app.add_transform(TranslationProgressTotaliser)
+ app.add_transform(AddTranslationClasses)
+ app.add_transform(RemoveTranslatableInline)
+
+ return {
+ 'version': 'builtin',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }
diff --git a/sphinx/transforms/post_transforms/__init__.py b/sphinx/transforms/post_transforms/__init__.py
new file mode 100644
index 0000000..485f1f1
--- /dev/null
+++ b/sphinx/transforms/post_transforms/__init__.py
@@ -0,0 +1,297 @@
+"""Docutils transforms used by Sphinx."""
+
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING, Any, cast
+
+from docutils import nodes
+from docutils.nodes import Element, Node
+
+from sphinx import addnodes
+from sphinx.errors import NoUri
+from sphinx.locale import __
+from sphinx.transforms import SphinxTransform
+from sphinx.util import logging
+from sphinx.util.docutils import SphinxTranslator
+from sphinx.util.nodes import find_pending_xref_condition, process_only_nodes
+
+if TYPE_CHECKING:
+ from collections.abc import Sequence
+
+ from sphinx.addnodes import pending_xref
+ from sphinx.application import Sphinx
+ from sphinx.domains import Domain
+
+logger = logging.getLogger(__name__)
+
+
+class SphinxPostTransform(SphinxTransform):
+ """A base class of post-transforms.
+
+ Post transforms are invoked to modify the document to restructure it for outputting.
+ They resolve references, convert images, do special transformation for each output
+ formats and so on. This class helps to implement these post transforms.
+ """
+ builders: tuple[str, ...] = ()
+ formats: tuple[str, ...] = ()
+
+ def apply(self, **kwargs: Any) -> None:
+ if self.is_supported():
+ self.run(**kwargs)
+
+ def is_supported(self) -> bool:
+ """Check this transform working for current builder."""
+ if self.builders and self.app.builder.name not in self.builders:
+ return False
+ if self.formats and self.app.builder.format not in self.formats:
+ return False
+
+ return True
+
+ def run(self, **kwargs: Any) -> None:
+ """Main method of post transforms.
+
+ Subclasses should override this method instead of ``apply()``.
+ """
+ raise NotImplementedError
+
+
+class ReferencesResolver(SphinxPostTransform):
+ """
+ Resolves cross-references on doctrees.
+ """
+
+ default_priority = 10
+
+ def run(self, **kwargs: Any) -> None:
+ for node in self.document.findall(addnodes.pending_xref):
+ content = self.find_pending_xref_condition(node, ("resolved", "*"))
+ if content:
+ contnode = cast(Element, content[0].deepcopy())
+ else:
+ contnode = cast(Element, node[0].deepcopy())
+
+ newnode = None
+
+ typ = node['reftype']
+ target = node['reftarget']
+ node.setdefault('refdoc', self.env.docname)
+ refdoc = node.get('refdoc')
+ domain = None
+
+ try:
+ if 'refdomain' in node and node['refdomain']:
+ # let the domain try to resolve the reference
+ try:
+ domain = self.env.domains[node['refdomain']]
+ except KeyError as exc:
+ raise NoUri(target, typ) from exc
+ newnode = domain.resolve_xref(self.env, refdoc, self.app.builder,
+ typ, target, node, contnode)
+ # really hardwired reference types
+ elif typ == 'any':
+ newnode = self.resolve_anyref(refdoc, node, contnode)
+ # no new node found? try the missing-reference event
+ if newnode is None:
+ newnode = self.app.emit_firstresult('missing-reference', self.env,
+ node, contnode,
+ allowed_exceptions=(NoUri,))
+ # still not found? warn if node wishes to be warned about or
+ # we are in nit-picky mode
+ if newnode is None:
+ self.warn_missing_reference(refdoc, typ, target, node, domain)
+ except NoUri:
+ newnode = None
+
+ if newnode:
+ newnodes: list[Node] = [newnode]
+ else:
+ newnodes = [contnode]
+ if newnode is None and isinstance(node[0], addnodes.pending_xref_condition):
+ matched = self.find_pending_xref_condition(node, ("*",))
+ if matched:
+ newnodes = matched
+ else:
+ logger.warning(__('Could not determine the fallback text for the '
+ 'cross-reference. Might be a bug.'), location=node)
+
+ node.replace_self(newnodes)
+
+ def resolve_anyref(
+ self, refdoc: str, node: pending_xref, contnode: Element,
+ ) -> Element | None:
+ """Resolve reference generated by the "any" role."""
+ stddomain = self.env.get_domain('std')
+ target = node['reftarget']
+ results: list[tuple[str, Element]] = []
+ # first, try resolving as :doc:
+ doc_ref = stddomain.resolve_xref(self.env, refdoc, self.app.builder,
+ 'doc', target, node, contnode)
+ if doc_ref:
+ results.append(('doc', doc_ref))
+ # next, do the standard domain (makes this a priority)
+ results.extend(stddomain.resolve_any_xref(self.env, refdoc, self.app.builder,
+ target, node, contnode))
+ for domain in self.env.domains.values():
+ if domain.name == 'std':
+ continue # we did this one already
+ try:
+ results.extend(domain.resolve_any_xref(self.env, refdoc, self.app.builder,
+ target, node, contnode))
+ except NotImplementedError:
+ # the domain doesn't yet support the new interface
+ # we have to manually collect possible references (SLOW)
+ for role in domain.roles:
+ res = domain.resolve_xref(self.env, refdoc, self.app.builder,
+ role, target, node, contnode)
+ if res and len(res) > 0 and isinstance(res[0], nodes.Element):
+ results.append((f'{domain.name}:{role}', res))
+ # now, see how many matches we got...
+ if not results:
+ return None
+ if len(results) > 1:
+ def stringify(name: str, node: Element) -> str:
+ reftitle = node.get('reftitle', node.astext())
+ return f':{name}:`{reftitle}`'
+ candidates = ' or '.join(stringify(name, role) for name, role in results)
+ logger.warning(__("more than one target found for 'any' cross-"
+ 'reference %r: could be %s'), target, candidates,
+ location=node)
+ res_role, newnode = results[0]
+ # Override "any" class with the actual role type to get the styling
+ # approximately correct.
+ res_domain = res_role.split(':')[0]
+ if (len(newnode) > 0 and
+ isinstance(newnode[0], nodes.Element) and
+ newnode[0].get('classes')):
+ newnode[0]['classes'].append(res_domain)
+ newnode[0]['classes'].append(res_role.replace(':', '-'))
+ return newnode
+
+ def warn_missing_reference(self, refdoc: str, typ: str, target: str,
+ node: pending_xref, domain: Domain | None) -> None:
+ warn = node.get('refwarn')
+ if self.config.nitpicky:
+ warn = True
+ dtype = f'{domain.name}:{typ}' if domain else typ
+ if self.config.nitpick_ignore:
+ if (dtype, target) in self.config.nitpick_ignore:
+ warn = False
+ # for "std" types also try without domain name
+ if (not domain or domain.name == 'std') and \
+ (typ, target) in self.config.nitpick_ignore:
+ warn = False
+ if self.config.nitpick_ignore_regex:
+ def matches_ignore(entry_type: str, entry_target: str) -> bool:
+ return any(
+ (
+ re.fullmatch(ignore_type, entry_type)
+ and re.fullmatch(ignore_target, entry_target)
+ )
+ for ignore_type, ignore_target
+ in self.config.nitpick_ignore_regex
+ )
+ if matches_ignore(dtype, target):
+ warn = False
+ # for "std" types also try without domain name
+ if (not domain or domain.name == 'std') and \
+ matches_ignore(typ, target):
+ warn = False
+ if not warn:
+ return
+
+ if self.app.emit_firstresult('warn-missing-reference', domain, node):
+ return
+ elif domain and typ in domain.dangling_warnings:
+ msg = domain.dangling_warnings[typ] % {'target': target}
+ elif node.get('refdomain', 'std') not in ('', 'std'):
+ msg = (__('%s:%s reference target not found: %s') %
+ (node['refdomain'], typ, target))
+ else:
+ msg = __('%r reference target not found: %s') % (typ, target)
+ logger.warning(msg, location=node, type='ref', subtype=typ)
+
+ def find_pending_xref_condition(self, node: pending_xref, conditions: Sequence[str],
+ ) -> list[Node] | None:
+ for condition in conditions:
+ matched = find_pending_xref_condition(node, condition)
+ if matched:
+ return matched.children
+ return None
+
+
+class OnlyNodeTransform(SphinxPostTransform):
+ default_priority = 50
+
+ def run(self, **kwargs: Any) -> None:
+ # A comment on the comment() nodes being inserted: replacing by [] would
+ # result in a "Losing ids" exception if there is a target node before
+ # the only node, so we make sure docutils can transfer the id to
+ # something, even if it's just a comment and will lose the id anyway...
+ process_only_nodes(self.document, self.app.builder.tags)
+
+
+class SigElementFallbackTransform(SphinxPostTransform):
+ """Fallback various desc_* nodes to inline if translator does not support them."""
+ default_priority = 200
+
+ def run(self, **kwargs: Any) -> None:
+ def has_visitor(translator: type[nodes.NodeVisitor], node: type[Element]) -> bool:
+ return hasattr(translator, "visit_%s" % node.__name__)
+
+ try:
+ translator = self.app.builder.get_translator_class()
+ except AttributeError:
+ # do nothing if no translator class is specified (e.g., on a dummy builder)
+ return
+
+ if issubclass(translator, SphinxTranslator):
+ # subclass of SphinxTranslator supports desc_sig_element nodes automatically.
+ return
+
+ # for the leaf elements (desc_sig_element), the translator should support _all_,
+ # unless there exists a generic visit_desc_sig_element default visitor
+ if (not all(has_visitor(translator, node) for node in addnodes.SIG_ELEMENTS)
+ and not has_visitor(translator, addnodes.desc_sig_element)):
+ self.fallback(addnodes.desc_sig_element)
+
+ if not has_visitor(translator, addnodes.desc_inline):
+ self.fallback(addnodes.desc_inline)
+
+ def fallback(self, node_type: Any) -> None:
+ """Translate nodes of type *node_type* to docutils inline nodes.
+
+ The original node type name is stored as a string in a private
+ ``_sig_node_type`` attribute if the latter did not exist.
+ """
+ for node in self.document.findall(node_type):
+ newnode = nodes.inline()
+ newnode.update_all_atts(node)
+ newnode.extend(node)
+ # Only set _sig_node_type if not defined by the user
+ newnode.setdefault('_sig_node_type', node.tagname)
+ node.replace_self(newnode)
+
+
+class PropagateDescDomain(SphinxPostTransform):
+ """Add the domain name of the parent node as a class in each desc_signature node."""
+ default_priority = 200
+
+ def run(self, **kwargs: Any) -> None:
+ for node in self.document.findall(addnodes.desc_signature):
+ if node.parent.get('domain'):
+ node['classes'].append(node.parent['domain'])
+
+
+def setup(app: Sphinx) -> dict[str, Any]:
+ app.add_post_transform(ReferencesResolver)
+ app.add_post_transform(OnlyNodeTransform)
+ app.add_post_transform(SigElementFallbackTransform)
+ app.add_post_transform(PropagateDescDomain)
+
+ return {
+ 'version': 'builtin',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }
diff --git a/sphinx/transforms/post_transforms/code.py b/sphinx/transforms/post_transforms/code.py
new file mode 100644
index 0000000..cd8abcc
--- /dev/null
+++ b/sphinx/transforms/post_transforms/code.py
@@ -0,0 +1,139 @@
+"""transforms for code-blocks."""
+
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING, Any, NamedTuple
+
+from docutils import nodes
+from pygments.lexers import PythonConsoleLexer, guess_lexer
+
+from sphinx import addnodes
+from sphinx.ext import doctest
+from sphinx.transforms import SphinxTransform
+
+if TYPE_CHECKING:
+ from docutils.nodes import Node, TextElement
+
+ from sphinx.application import Sphinx
+
+
+class HighlightSetting(NamedTuple):
+ language: str
+ force: bool
+ lineno_threshold: int
+
+
+class HighlightLanguageTransform(SphinxTransform):
+ """
+ Apply highlight_language to all literal_block nodes.
+
+ This refers both :confval:`highlight_language` setting and
+ :rst:dir:`highlight` directive. After processing, this transform
+ removes ``highlightlang`` node from doctree.
+ """
+ default_priority = 400
+
+ def apply(self, **kwargs: Any) -> None:
+ visitor = HighlightLanguageVisitor(self.document,
+ self.config.highlight_language)
+ self.document.walkabout(visitor)
+
+ for node in list(self.document.findall(addnodes.highlightlang)):
+ node.parent.remove(node)
+
+
+class HighlightLanguageVisitor(nodes.NodeVisitor):
+ def __init__(self, document: nodes.document, default_language: str) -> None:
+ self.default_setting = HighlightSetting(default_language, False, sys.maxsize)
+ self.settings: list[HighlightSetting] = []
+ super().__init__(document)
+
+ def unknown_visit(self, node: Node) -> None:
+ pass
+
+ def unknown_departure(self, node: Node) -> None:
+ pass
+
+ def visit_document(self, node: Node) -> None:
+ self.settings.append(self.default_setting)
+
+ def depart_document(self, node: Node) -> None:
+ self.settings.pop()
+
+ def visit_start_of_file(self, node: Node) -> None:
+ self.settings.append(self.default_setting)
+
+ def depart_start_of_file(self, node: Node) -> None:
+ self.settings.pop()
+
+ def visit_highlightlang(self, node: addnodes.highlightlang) -> None:
+ self.settings[-1] = HighlightSetting(node['lang'],
+ node['force'],
+ node['linenothreshold'])
+
+ def visit_literal_block(self, node: nodes.literal_block) -> None:
+ setting = self.settings[-1]
+ if 'language' not in node:
+ node['language'] = setting.language
+ node['force'] = setting.force
+ if 'linenos' not in node:
+ lines = node.astext().count('\n')
+ node['linenos'] = (lines >= setting.lineno_threshold - 1)
+
+
+class TrimDoctestFlagsTransform(SphinxTransform):
+ """
+ Trim doctest flags like ``# doctest: +FLAG`` from python code-blocks.
+
+ see :confval:`trim_doctest_flags` for more information.
+ """
+ default_priority = HighlightLanguageTransform.default_priority + 1
+
+ def apply(self, **kwargs: Any) -> None:
+ for lbnode in self.document.findall(nodes.literal_block):
+ if self.is_pyconsole(lbnode):
+ self.strip_doctest_flags(lbnode)
+
+ for dbnode in self.document.findall(nodes.doctest_block):
+ self.strip_doctest_flags(dbnode)
+
+ def strip_doctest_flags(self, node: TextElement) -> None:
+ if not node.get('trim_flags', self.config.trim_doctest_flags):
+ return
+
+ source = node.rawsource
+ source = doctest.blankline_re.sub('', source)
+ source = doctest.doctestopt_re.sub('', source)
+ node.rawsource = source
+ node[:] = [nodes.Text(source)]
+
+ @staticmethod
+ def is_pyconsole(node: nodes.literal_block) -> bool:
+ if node.rawsource != node.astext():
+ return False # skip parsed-literal node
+
+ language = node.get('language')
+ if language in {'pycon', 'pycon3'}:
+ return True
+ elif language in {'py', 'python', 'py3', 'python3', 'default'}:
+ return node.rawsource.startswith('>>>')
+ elif language == 'guess':
+ try:
+ lexer = guess_lexer(node.rawsource)
+ return isinstance(lexer, PythonConsoleLexer)
+ except Exception:
+ pass
+
+ return False
+
+
+def setup(app: Sphinx) -> dict[str, Any]:
+ app.add_post_transform(HighlightLanguageTransform)
+ app.add_post_transform(TrimDoctestFlagsTransform)
+
+ return {
+ 'version': 'builtin',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }
diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py
new file mode 100644
index 0000000..e220df0
--- /dev/null
+++ b/sphinx/transforms/post_transforms/images.py
@@ -0,0 +1,280 @@
+"""Docutils transforms used by Sphinx."""
+
+from __future__ import annotations
+
+import os
+import re
+from hashlib import sha1
+from math import ceil
+from typing import TYPE_CHECKING, Any
+
+from docutils import nodes
+
+from sphinx.locale import __
+from sphinx.transforms import SphinxTransform
+from sphinx.util import logging, requests
+from sphinx.util.http_date import epoch_to_rfc1123, rfc1123_to_epoch
+from sphinx.util.images import get_image_extension, guess_mimetype, parse_data_uri
+from sphinx.util.osutil import ensuredir
+
+if TYPE_CHECKING:
+ from sphinx.application import Sphinx
+
+logger = logging.getLogger(__name__)
+
+MAX_FILENAME_LEN = 32
+CRITICAL_PATH_CHAR_RE = re.compile('[:;<>|*" ]')
+
+
+class BaseImageConverter(SphinxTransform):
+ def apply(self, **kwargs: Any) -> None:
+ for node in self.document.findall(nodes.image):
+ if self.match(node):
+ self.handle(node)
+
+ def match(self, node: nodes.image) -> bool:
+ return True
+
+ def handle(self, node: nodes.image) -> None:
+ pass
+
+ @property
+ def imagedir(self) -> str:
+ return os.path.join(self.app.doctreedir, 'images')
+
+
+class ImageDownloader(BaseImageConverter):
+ default_priority = 100
+
+ def match(self, node: nodes.image) -> bool:
+ if self.app.builder.supported_image_types == []:
+ return False
+ if self.app.builder.supported_remote_images:
+ return False
+ return '://' in node['uri']
+
+ def handle(self, node: nodes.image) -> None:
+ try:
+ basename = os.path.basename(node['uri'])
+ if '?' in basename:
+ basename = basename.split('?')[0]
+ if basename == '' or len(basename) > MAX_FILENAME_LEN:
+ filename, ext = os.path.splitext(node['uri'])
+ basename = sha1(filename.encode(), usedforsecurity=False).hexdigest() + ext
+ basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename)
+
+ dirname = node['uri'].replace('://', '/').translate({ord("?"): "/",
+ ord("&"): "/"})
+ if len(dirname) > MAX_FILENAME_LEN:
+ dirname = sha1(dirname.encode(), usedforsecurity=False).hexdigest()
+ ensuredir(os.path.join(self.imagedir, dirname))
+ path = os.path.join(self.imagedir, dirname, basename)
+
+ headers = {}
+ if os.path.exists(path):
+ timestamp: float = ceil(os.stat(path).st_mtime)
+ headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp)
+
+ r = requests.get(node['uri'], headers=headers)
+ if r.status_code >= 400:
+ logger.warning(__('Could not fetch remote image: %s [%d]') %
+ (node['uri'], r.status_code))
+ else:
+ self.app.env.original_image_uri[path] = node['uri']
+
+ if r.status_code == 200:
+ with open(path, 'wb') as f:
+ f.write(r.content)
+
+ last_modified = r.headers.get('last-modified')
+ if last_modified:
+ timestamp = rfc1123_to_epoch(last_modified)
+ os.utime(path, (timestamp, timestamp))
+
+ mimetype = guess_mimetype(path, default='*')
+ if mimetype != '*' and os.path.splitext(basename)[1] == '':
+ # append a suffix if URI does not contain suffix
+ ext = get_image_extension(mimetype)
+ newpath = os.path.join(self.imagedir, dirname, basename + ext)
+ os.replace(path, newpath)
+ self.app.env.original_image_uri.pop(path)
+ self.app.env.original_image_uri[newpath] = node['uri']
+ path = newpath
+ node['candidates'].pop('?')
+ node['candidates'][mimetype] = path
+ node['uri'] = path
+ self.app.env.images.add_file(self.env.docname, path)
+ except Exception as exc:
+ logger.warning(__('Could not fetch remote image: %s [%s]') % (node['uri'], exc))
+
+
+class DataURIExtractor(BaseImageConverter):
+ default_priority = 150
+
+ def match(self, node: nodes.image) -> bool:
+ if self.app.builder.supported_remote_images == []:
+ return False
+ if self.app.builder.supported_data_uri_images is True:
+ return False
+ return node['uri'].startswith('data:')
+
+ def handle(self, node: nodes.image) -> None:
+ image = parse_data_uri(node['uri'])
+ assert image is not None
+ ext = get_image_extension(image.mimetype)
+ if ext is None:
+ logger.warning(__('Unknown image format: %s...'), node['uri'][:32],
+ location=node)
+ return
+
+ ensuredir(os.path.join(self.imagedir, 'embeded'))
+ digest = sha1(image.data, usedforsecurity=False).hexdigest()
+ path = os.path.join(self.imagedir, 'embeded', digest + ext)
+ self.app.env.original_image_uri[path] = node['uri']
+
+ with open(path, 'wb') as f:
+ f.write(image.data)
+
+ node['candidates'].pop('?')
+ node['candidates'][image.mimetype] = path
+ node['uri'] = path
+ self.app.env.images.add_file(self.env.docname, path)
+
+
+def get_filename_for(filename: str, mimetype: str) -> str:
+ basename = os.path.basename(filename)
+ basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename)
+ return os.path.splitext(basename)[0] + (get_image_extension(mimetype) or '')
+
+
+class ImageConverter(BaseImageConverter):
+ """A base class for image converters.
+
+ An image converter is kind of Docutils transform module. It is used to
+ convert image files which are not supported by a builder to the
+ appropriate format for that builder.
+
+ For example, :py:class:`LaTeX builder <.LaTeXBuilder>` supports PDF,
+ PNG and JPEG as image formats. However it does not support SVG images.
+ For such case, using image converters allows to embed these
+ unsupported images into the document. One of the image converters;
+ :ref:`sphinx.ext.imgconverter <sphinx.ext.imgconverter>` can convert
+ a SVG image to PNG format using Imagemagick internally.
+
+ There are three steps to make your custom image converter:
+
+ 1. Make a subclass of ``ImageConverter`` class
+ 2. Override ``conversion_rules``, ``is_available()`` and ``convert()``
+ 3. Register your image converter to Sphinx using
+ :py:meth:`.Sphinx.add_post_transform`
+ """
+ default_priority = 200
+
+ #: The converter is available or not. Will be filled at the first call of
+ #: the build. The result is shared in the same process.
+ #:
+ #: .. todo:: This should be refactored not to store the state without class
+ #: variable.
+ available: bool | None = None
+
+ #: A conversion rules the image converter supports.
+ #: It is represented as a list of pair of source image format (mimetype) and
+ #: destination one::
+ #:
+ #: conversion_rules = [
+ #: ('image/svg+xml', 'image/png'),
+ #: ('image/gif', 'image/png'),
+ #: ('application/pdf', 'image/png'),
+ #: ]
+ conversion_rules: list[tuple[str, str]] = []
+
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
+ super().__init__(*args, **kwargs)
+
+ def match(self, node: nodes.image) -> bool:
+ if not self.app.builder.supported_image_types:
+ return False
+ if '?' in node['candidates']:
+ return False
+ if set(self.guess_mimetypes(node)) & set(self.app.builder.supported_image_types):
+ # builder supports the image; no need to convert
+ return False
+ if self.available is None:
+ # store the value to the class variable to share it during the build
+ self.__class__.available = self.is_available()
+
+ if not self.available:
+ return False
+ else:
+ try:
+ self.get_conversion_rule(node)
+ except ValueError:
+ return False
+ else:
+ return True
+
+ def get_conversion_rule(self, node: nodes.image) -> tuple[str, str]:
+ for candidate in self.guess_mimetypes(node):
+ for supported in self.app.builder.supported_image_types:
+ rule = (candidate, supported)
+ if rule in self.conversion_rules:
+ return rule
+
+ msg = 'No conversion rule found'
+ raise ValueError(msg)
+
+ def is_available(self) -> bool:
+ """Return the image converter is available or not."""
+ raise NotImplementedError
+
+ def guess_mimetypes(self, node: nodes.image) -> list[str]:
+ if '?' in node['candidates']:
+ return []
+ elif '*' in node['candidates']:
+ guessed = guess_mimetype(node['uri'])
+ return [guessed] if guessed is not None else []
+ else:
+ return node['candidates'].keys()
+
+ def handle(self, node: nodes.image) -> None:
+ _from, _to = self.get_conversion_rule(node)
+
+ if _from in node['candidates']:
+ srcpath = node['candidates'][_from]
+ else:
+ srcpath = node['candidates']['*']
+
+ filename = self.env.images[srcpath][1]
+ filename = get_filename_for(filename, _to)
+ ensuredir(self.imagedir)
+ destpath = os.path.join(self.imagedir, filename)
+
+ abs_srcpath = os.path.join(self.app.srcdir, srcpath)
+ if self.convert(abs_srcpath, destpath):
+ if '*' in node['candidates']:
+ node['candidates']['*'] = destpath
+ else:
+ node['candidates'][_to] = destpath
+ node['uri'] = destpath
+
+ self.env.original_image_uri[destpath] = srcpath
+ self.env.images.add_file(self.env.docname, destpath)
+
+ def convert(self, _from: str, _to: str) -> bool:
+ """Convert an image file to the expected format.
+
+ *_from* is a path of the source image file, and *_to* is a path
+ of the destination file.
+ """
+ raise NotImplementedError
+
+
+def setup(app: Sphinx) -> dict[str, Any]:
+ app.add_post_transform(ImageDownloader)
+ app.add_post_transform(DataURIExtractor)
+
+ return {
+ 'version': 'builtin',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }
diff --git a/sphinx/transforms/references.py b/sphinx/transforms/references.py
new file mode 100644
index 0000000..5de3a95
--- /dev/null
+++ b/sphinx/transforms/references.py
@@ -0,0 +1,47 @@
+"""Docutils transforms used by Sphinx."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from docutils.transforms.references import DanglingReferences
+
+from sphinx.transforms import SphinxTransform
+
+if TYPE_CHECKING:
+ from sphinx.application import Sphinx
+
+
+class SphinxDanglingReferences(DanglingReferences):
+ """DanglingReferences transform which does not output info messages."""
+
+ def apply(self, **kwargs: Any) -> None:
+ try:
+ reporter = self.document.reporter
+ report_level = reporter.report_level
+
+ # suppress INFO level messages for a while
+ reporter.report_level = max(reporter.WARNING_LEVEL, reporter.report_level)
+ super().apply()
+ finally:
+ reporter.report_level = report_level
+
+
+class SphinxDomains(SphinxTransform):
+ """Collect objects to Sphinx domains for cross references."""
+ default_priority = 850
+
+ def apply(self, **kwargs: Any) -> None:
+ for domain in self.env.domains.values():
+ domain.process_doc(self.env, self.env.docname, self.document)
+
+
+def setup(app: Sphinx) -> dict[str, Any]:
+ app.add_transform(SphinxDanglingReferences)
+ app.add_transform(SphinxDomains)
+
+ return {
+ 'version': 'builtin',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }