diff options
Diffstat (limited to 'sphinx/transforms')
-rw-r--r-- | sphinx/transforms/__init__.py | 516 | ||||
-rw-r--r-- | sphinx/transforms/compact_bullet_list.py | 91 | ||||
-rw-r--r-- | sphinx/transforms/i18n.py | 624 | ||||
-rw-r--r-- | sphinx/transforms/post_transforms/__init__.py | 297 | ||||
-rw-r--r-- | sphinx/transforms/post_transforms/code.py | 139 | ||||
-rw-r--r-- | sphinx/transforms/post_transforms/images.py | 280 | ||||
-rw-r--r-- | sphinx/transforms/references.py | 47 |
7 files changed, 1994 insertions, 0 deletions
diff --git a/sphinx/transforms/__init__.py b/sphinx/transforms/__init__.py new file mode 100644 index 0000000..8a806cd --- /dev/null +++ b/sphinx/transforms/__init__.py @@ -0,0 +1,516 @@ +"""Docutils transforms used by Sphinx when reading documents.""" + +from __future__ import annotations + +import re +import unicodedata +from typing import TYPE_CHECKING, Any, cast + +from docutils import nodes +from docutils.transforms import Transform, Transformer +from docutils.transforms.parts import ContentsFilter +from docutils.transforms.universal import SmartQuotes +from docutils.utils import normalize_language_tag +from docutils.utils.smartquotes import smartchars + +from sphinx import addnodes +from sphinx.locale import _, __ +from sphinx.util import logging +from sphinx.util.docutils import new_document +from sphinx.util.i18n import format_date +from sphinx.util.nodes import apply_source_workaround, is_smartquotable + +if TYPE_CHECKING: + from collections.abc import Generator + + from docutils.nodes import Node, Text + + from sphinx.application import Sphinx + from sphinx.config import Config + from sphinx.domains.std import StandardDomain + from sphinx.environment import BuildEnvironment + + +logger = logging.getLogger(__name__) + +default_substitutions = { + 'version', + 'release', + 'today', + 'translation progress', +} + + +class SphinxTransform(Transform): + """A base class of Transforms. + + Compared with ``docutils.transforms.Transform``, this class improves accessibility to + Sphinx APIs. + """ + + @property + def app(self) -> Sphinx: + """Reference to the :class:`.Sphinx` object.""" + return self.env.app + + @property + def env(self) -> BuildEnvironment: + """Reference to the :class:`.BuildEnvironment` object.""" + return self.document.settings.env + + @property + def config(self) -> Config: + """Reference to the :class:`.Config` object.""" + return self.env.config + + +class SphinxTransformer(Transformer): + """ + A transformer for Sphinx. + """ + + document: nodes.document + env: BuildEnvironment | None = None + + def set_environment(self, env: BuildEnvironment) -> None: + self.env = env + + def apply_transforms(self) -> None: + if isinstance(self.document, nodes.document): + if not hasattr(self.document.settings, 'env') and self.env: + self.document.settings.env = self.env + + super().apply_transforms() + else: + # wrap the target node by document node during transforming + try: + document = new_document('') + if self.env: + document.settings.env = self.env + document += self.document + self.document = document + super().apply_transforms() + finally: + self.document = self.document[0] + + +class DefaultSubstitutions(SphinxTransform): + """ + Replace some substitutions if they aren't defined in the document. + """ + # run before the default Substitutions + default_priority = 210 + + def apply(self, **kwargs: Any) -> None: + # only handle those not otherwise defined in the document + to_handle = default_substitutions - set(self.document.substitution_defs) + for ref in self.document.findall(nodes.substitution_reference): + refname = ref['refname'] + if refname in to_handle: + if refname == 'translation progress': + # special handling: calculate translation progress + text = _calculate_translation_progress(self.document) + else: + text = self.config[refname] + if refname == 'today' and not text: + # special handling: can also specify a strftime format + text = format_date(self.config.today_fmt or _('%b %d, %Y'), + language=self.config.language) + ref.replace_self(nodes.Text(text)) + + +def _calculate_translation_progress(document: nodes.document) -> str: + try: + translation_progress = document['translation_progress'] + except KeyError: + return _('could not calculate translation progress!') + + total = translation_progress['total'] + translated = translation_progress['translated'] + if total <= 0: + return _('no translated elements!') + return f'{translated / total:.2%}' + + +class MoveModuleTargets(SphinxTransform): + """ + Move module targets that are the first thing in a section to the section + title. + + XXX Python specific + """ + default_priority = 210 + + def apply(self, **kwargs: Any) -> None: + for node in list(self.document.findall(nodes.target)): + if not node['ids']: + continue + if ( + 'ismod' in node + and type(node.parent) is nodes.section + # index 0: section title node + # index 1: index node + # index 2: target node + and node.parent.index(node) == 2 + ): + node.parent['ids'][0:0] = node['ids'] + node.parent.remove(node) + + +class HandleCodeBlocks(SphinxTransform): + """ + Several code block related transformations. + """ + default_priority = 210 + + def apply(self, **kwargs: Any) -> None: + # move doctest blocks out of blockquotes + for node in self.document.findall(nodes.block_quote): + if all(isinstance(child, nodes.doctest_block) for child + in node.children): + node.replace_self(node.children) + # combine successive doctest blocks + # for node in self.document.findall(nodes.doctest_block): + # if node not in node.parent.children: + # continue + # parindex = node.parent.index(node) + # while len(node.parent) > parindex+1 and \ + # isinstance(node.parent[parindex+1], nodes.doctest_block): + # node[0] = nodes.Text(node[0] + '\n\n' + + # node.parent[parindex+1][0]) + # del node.parent[parindex+1] + + +class AutoNumbering(SphinxTransform): + """ + Register IDs of tables, figures and literal_blocks to assign numbers. + """ + default_priority = 210 + + def apply(self, **kwargs: Any) -> None: + domain: StandardDomain = self.env.domains['std'] + + for node in self.document.findall(nodes.Element): + if (domain.is_enumerable_node(node) and + domain.get_numfig_title(node) is not None and + node['ids'] == []): + self.document.note_implicit_target(node) + + +class SortIds(SphinxTransform): + """ + Sort section IDs so that the "id[0-9]+" one comes last. + """ + default_priority = 261 + + def apply(self, **kwargs: Any) -> None: + for node in self.document.findall(nodes.section): + if len(node['ids']) > 1 and node['ids'][0].startswith('id'): + node['ids'] = node['ids'][1:] + [node['ids'][0]] + + +TRANSLATABLE_NODES = { + 'literal-block': nodes.literal_block, + 'doctest-block': nodes.doctest_block, + 'raw': nodes.raw, + 'index': addnodes.index, + 'image': nodes.image, +} + + +class ApplySourceWorkaround(SphinxTransform): + """ + Update source and rawsource attributes + """ + default_priority = 10 + + def apply(self, **kwargs: Any) -> None: + for node in self.document.findall(): # type: Node + if isinstance(node, (nodes.TextElement, nodes.image, nodes.topic)): + apply_source_workaround(node) + + +class AutoIndexUpgrader(SphinxTransform): + """ + Detect old style (4 column based indices) and automatically upgrade to new style. + """ + default_priority = 210 + + def apply(self, **kwargs: Any) -> None: + for node in self.document.findall(addnodes.index): + if 'entries' in node and any(len(entry) == 4 for entry in node['entries']): + msg = __('4 column based index found. ' + 'It might be a bug of extensions you use: %r') % node['entries'] + logger.warning(msg, location=node) + for i, entry in enumerate(node['entries']): + if len(entry) == 4: + node['entries'][i] = entry + (None,) + + +class ExtraTranslatableNodes(SphinxTransform): + """ + Make nodes translatable + """ + default_priority = 10 + + def apply(self, **kwargs: Any) -> None: + targets = self.config.gettext_additional_targets + target_nodes = [v for k, v in TRANSLATABLE_NODES.items() if k in targets] + if not target_nodes: + return + + def is_translatable_node(node: Node) -> bool: + return isinstance(node, tuple(target_nodes)) + + for node in self.document.findall(is_translatable_node): # type: nodes.Element + node['translatable'] = True + + +class UnreferencedFootnotesDetector(SphinxTransform): + """ + Detect unreferenced footnotes and emit warnings + """ + default_priority = 200 + + def apply(self, **kwargs: Any) -> None: + for node in self.document.footnotes: + if node['names'] == []: + # footnote having duplicated number. It is already warned at parser. + pass + elif node['names'][0] not in self.document.footnote_refs: + logger.warning(__('Footnote [%s] is not referenced.'), node['names'][0], + type='ref', subtype='footnote', + location=node) + + for node in self.document.autofootnotes: + if not any(ref['auto'] == node['auto'] for ref in self.document.autofootnote_refs): + logger.warning(__('Footnote [#] is not referenced.'), + type='ref', subtype='footnote', + location=node) + + +class DoctestTransform(SphinxTransform): + """Set "doctest" style to each doctest_block node""" + default_priority = 500 + + def apply(self, **kwargs: Any) -> None: + for node in self.document.findall(nodes.doctest_block): + node['classes'].append('doctest') + + +class FilterSystemMessages(SphinxTransform): + """Filter system messages from a doctree.""" + default_priority = 999 + + def apply(self, **kwargs: Any) -> None: + filterlevel = 2 if self.config.keep_warnings else 5 + for node in list(self.document.findall(nodes.system_message)): + if node['level'] < filterlevel: + logger.debug('%s [filtered system message]', node.astext()) + node.parent.remove(node) + + +class SphinxContentsFilter(ContentsFilter): + """ + Used with BuildEnvironment.add_toc_from() to discard cross-file links + within table-of-contents link nodes. + """ + visit_pending_xref = ContentsFilter.ignore_node_but_process_children + + def visit_image(self, node: nodes.image) -> None: + raise nodes.SkipNode + + +class SphinxSmartQuotes(SmartQuotes, SphinxTransform): + """ + Customized SmartQuotes to avoid transform for some extra node types. + + refs: sphinx.parsers.RSTParser + """ + default_priority = 750 + + def apply(self, **kwargs: Any) -> None: + if not self.is_available(): + return + + # override default settings with :confval:`smartquotes_action` + self.smartquotes_action = self.config.smartquotes_action + + super().apply() + + def is_available(self) -> bool: + builders = self.config.smartquotes_excludes.get('builders', []) + languages = self.config.smartquotes_excludes.get('languages', []) + + if self.document.settings.smart_quotes is False: + # disabled by 3rd party extension (workaround) + return False + if self.config.smartquotes is False: + # disabled by confval smartquotes + return False + if self.app.builder.name in builders: + # disabled by confval smartquotes_excludes['builders'] + return False + if self.config.language in languages: + # disabled by confval smartquotes_excludes['languages'] + return False + + # confirm selected language supports smart_quotes or not + language = self.env.settings['language_code'] + return any( + tag in smartchars.quotes + for tag in normalize_language_tag(language) + ) + + def get_tokens(self, txtnodes: list[Text]) -> Generator[tuple[str, str], None, None]: + # A generator that yields ``(texttype, nodetext)`` tuples for a list + # of "Text" nodes (interface to ``smartquotes.educate_tokens()``). + for txtnode in txtnodes: + if is_smartquotable(txtnode): + # SmartQuotes uses backslash escapes instead of null-escapes + text = re.sub(r'(?<=\x00)([-\\\'".`])', r'\\\1', str(txtnode)) + yield 'plain', text + else: + # skip smart quotes + yield 'literal', txtnode.astext() + + +class DoctreeReadEvent(SphinxTransform): + """Emit :event:`doctree-read` event.""" + default_priority = 880 + + def apply(self, **kwargs: Any) -> None: + self.app.emit('doctree-read', self.document) + + +class ManpageLink(SphinxTransform): + """Find manpage section numbers and names""" + default_priority = 999 + + def apply(self, **kwargs: Any) -> None: + for node in self.document.findall(addnodes.manpage): + manpage = ' '.join([str(x) for x in node.children + if isinstance(x, nodes.Text)]) + pattern = r'^(?P<path>(?P<page>.+)[\(\.](?P<section>[1-9]\w*)?\)?)$' + info = {'path': manpage, + 'page': manpage, + 'section': ''} + r = re.match(pattern, manpage) + if r: + info = r.groupdict() + node.attributes.update(info) + + +class GlossarySorter(SphinxTransform): + """Sort glossaries that have the ``sorted`` flag.""" + # This must be done after i18n, therefore not right + # away in the glossary directive. + default_priority = 500 + + def apply(self, **kwargs: Any) -> None: + for glossary in self.document.findall(addnodes.glossary): + if glossary["sorted"]: + definition_list = cast(nodes.definition_list, glossary[0]) + definition_list[:] = sorted( + definition_list, + key=lambda item: unicodedata.normalize( + 'NFD', + cast(nodes.term, item)[0].astext().lower()), + ) + + +class ReorderConsecutiveTargetAndIndexNodes(SphinxTransform): + """Index nodes interspersed between target nodes prevent other + Transformations from combining those target nodes, + e.g. ``PropagateTargets``. This transformation reorders them: + + Given the following ``document`` as input:: + + <document> + <target ids="id1" ...> + <index entries="...1..."> + <target ids="id2" ...> + <target ids="id3" ...> + <index entries="...2..."> + <target ids="id4" ...> + + The transformed result will be:: + + <document> + <index entries="...1..."> + <index entries="...2..."> + <target ids="id1" ...> + <target ids="id2" ...> + <target ids="id3" ...> + <target ids="id4" ...> + """ + + # This transform MUST run before ``PropagateTargets``. + default_priority = 220 + + def apply(self, **kwargs: Any) -> None: + for target in self.document.findall(nodes.target): + _reorder_index_target_nodes(target) + + +def _reorder_index_target_nodes(start_node: nodes.target) -> None: + """Sort target and index nodes. + + Find all consecutive target and index nodes starting from ``start_node``, + and move all index nodes to before the first target node. + """ + nodes_to_reorder: list[nodes.target | addnodes.index] = [] + + # Note that we cannot use 'condition' to filter, + # as we want *consecutive* target & index nodes. + node: nodes.Node + for node in start_node.findall(descend=False, siblings=True): + if isinstance(node, (nodes.target, addnodes.index)): + nodes_to_reorder.append(node) + continue + break # must be a consecutive run of target or index nodes + + if len(nodes_to_reorder) < 2: + return # Nothing to reorder + + parent = nodes_to_reorder[0].parent + if parent == nodes_to_reorder[-1].parent: + first_idx = parent.index(nodes_to_reorder[0]) + last_idx = parent.index(nodes_to_reorder[-1]) + if first_idx + len(nodes_to_reorder) - 1 == last_idx: + parent[first_idx:last_idx + 1] = sorted(nodes_to_reorder, key=_sort_key) + + +def _sort_key(node: nodes.Node) -> int: + # Must be a stable sort. + if isinstance(node, addnodes.index): + return 0 + if isinstance(node, nodes.target): + return 1 + msg = f'_sort_key called with unexpected node type {type(node)!r}' + raise ValueError(msg) + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_transform(ApplySourceWorkaround) + app.add_transform(ExtraTranslatableNodes) + app.add_transform(DefaultSubstitutions) + app.add_transform(MoveModuleTargets) + app.add_transform(HandleCodeBlocks) + app.add_transform(SortIds) + app.add_transform(DoctestTransform) + app.add_transform(AutoNumbering) + app.add_transform(AutoIndexUpgrader) + app.add_transform(FilterSystemMessages) + app.add_transform(UnreferencedFootnotesDetector) + app.add_transform(SphinxSmartQuotes) + app.add_transform(DoctreeReadEvent) + app.add_transform(ManpageLink) + app.add_transform(GlossarySorter) + app.add_transform(ReorderConsecutiveTargetAndIndexNodes) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/transforms/compact_bullet_list.py b/sphinx/transforms/compact_bullet_list.py new file mode 100644 index 0000000..149b5e0 --- /dev/null +++ b/sphinx/transforms/compact_bullet_list.py @@ -0,0 +1,91 @@ +"""Docutils transforms used by Sphinx when reading documents.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, cast + +from docutils import nodes + +from sphinx import addnodes +from sphinx.transforms import SphinxTransform + +if TYPE_CHECKING: + from docutils.nodes import Node + + from sphinx.application import Sphinx + + +class RefOnlyListChecker(nodes.GenericNodeVisitor): + """Raise `nodes.NodeFound` if non-simple list item is encountered. + + Here 'simple' means a list item containing only a paragraph with a + single reference in it. + """ + + def default_visit(self, node: Node) -> None: + raise nodes.NodeFound + + def visit_bullet_list(self, node: nodes.bullet_list) -> None: + pass + + def visit_list_item(self, node: nodes.list_item) -> None: + children: list[Node] = [] + for child in node.children: + if not isinstance(child, nodes.Invisible): + children.append(child) + if len(children) != 1: + raise nodes.NodeFound + if not isinstance(children[0], nodes.paragraph): + raise nodes.NodeFound + para = children[0] + if len(para) != 1: + raise nodes.NodeFound + if not isinstance(para[0], addnodes.pending_xref): + raise nodes.NodeFound + raise nodes.SkipChildren + + def invisible_visit(self, node: Node) -> None: + """Invisible nodes should be ignored.""" + pass + + +class RefOnlyBulletListTransform(SphinxTransform): + """Change refonly bullet lists to use compact_paragraphs. + + Specifically implemented for 'Indices and Tables' section, which looks + odd when html_compact_lists is false. + """ + default_priority = 100 + + def apply(self, **kwargs: Any) -> None: + if self.config.html_compact_lists: + return + + def check_refonly_list(node: Node) -> bool: + """Check for list with only references in it.""" + visitor = RefOnlyListChecker(self.document) + try: + node.walk(visitor) + except nodes.NodeFound: + return False + else: + return True + + for node in self.document.findall(nodes.bullet_list): + if check_refonly_list(node): + for item in node.findall(nodes.list_item): + para = cast(nodes.paragraph, item[0]) + ref = cast(nodes.reference, para[0]) + compact_para = addnodes.compact_paragraph() + compact_para += ref + item.replace(para, compact_para) + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_transform(RefOnlyBulletListTransform) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py new file mode 100644 index 0000000..d26c279 --- /dev/null +++ b/sphinx/transforms/i18n.py @@ -0,0 +1,624 @@ +"""Docutils transforms used by Sphinx when reading documents.""" + +from __future__ import annotations + +import contextlib +from os import path +from re import DOTALL, match +from textwrap import indent +from typing import TYPE_CHECKING, Any, TypeVar + +from docutils import nodes +from docutils.io import StringInput + +from sphinx import addnodes +from sphinx.domains.std import make_glossary_term, split_term_classifiers +from sphinx.errors import ConfigError +from sphinx.locale import __ +from sphinx.locale import init as init_locale +from sphinx.transforms import SphinxTransform +from sphinx.util import get_filetype, logging +from sphinx.util.i18n import docname_to_domain +from sphinx.util.index_entries import split_index_msg +from sphinx.util.nodes import ( + IMAGE_TYPE_NODES, + LITERAL_TYPE_NODES, + NodeMatcher, + extract_messages, + traverse_translatable_index, +) + +if TYPE_CHECKING: + from collections.abc import Sequence + + from sphinx.application import Sphinx + from sphinx.config import Config + + +logger = logging.getLogger(__name__) + +# The attributes not copied to the translated node +# +# * refexplict: For allow to give (or not to give) an explicit title +# to the pending_xref on translation +EXCLUDED_PENDING_XREF_ATTRIBUTES = ('refexplicit',) + + +N = TypeVar('N', bound=nodes.Node) + + +def publish_msgstr(app: Sphinx, source: str, source_path: str, source_line: int, + config: Config, settings: Any) -> nodes.Element: + """Publish msgstr (single line) into docutils document + + :param sphinx.application.Sphinx app: sphinx application + :param str source: source text + :param str source_path: source path for warning indication + :param source_line: source line for warning indication + :param sphinx.config.Config config: sphinx config + :param docutils.frontend.Values settings: docutils settings + :return: document + :rtype: docutils.nodes.document + """ + try: + # clear rst_prolog temporarily + rst_prolog = config.rst_prolog + config.rst_prolog = None # type: ignore[attr-defined] + + from sphinx.io import SphinxI18nReader + reader = SphinxI18nReader() + reader.setup(app) + filetype = get_filetype(config.source_suffix, source_path) + parser = app.registry.create_source_parser(app, filetype) + doc = reader.read( + source=StringInput(source=source, + source_path=f"{source_path}:{source_line}:<translated>"), + parser=parser, + settings=settings, + ) + with contextlib.suppress(IndexError): # empty node + return doc[0] # type: ignore[return-value] + return doc + finally: + config.rst_prolog = rst_prolog # type: ignore[attr-defined] + + +def parse_noqa(source: str) -> tuple[str, bool]: + m = match(r"(.*)(?<!\\)#\s*noqa\s*$", source, DOTALL) + if m: + return m.group(1), True + else: + return source, False + + +class PreserveTranslatableMessages(SphinxTransform): + """ + Preserve original translatable messages before translation + """ + default_priority = 10 # this MUST be invoked before Locale transform + + def apply(self, **kwargs: Any) -> None: + for node in self.document.findall(addnodes.translatable): + node.preserve_original_messages() + + +class _NodeUpdater: + """Contains logic for updating one node with the translated content.""" + + def __init__( + self, node: nodes.Element, patch: nodes.Element, document: nodes.document, noqa: bool, + ) -> None: + self.node: nodes.Element = node + self.patch: nodes.Element = patch + self.document: nodes.document = document + self.noqa: bool = noqa + + def compare_references(self, old_refs: Sequence[nodes.Element], + new_refs: Sequence[nodes.Element], + warning_msg: str) -> None: + """Warn about mismatches between references in original and translated content.""" + # FIXME: could use a smarter strategy than len(old_refs) == len(new_refs) + if not self.noqa and len(old_refs) != len(new_refs): + old_ref_rawsources = [ref.rawsource for ref in old_refs] + new_ref_rawsources = [ref.rawsource for ref in new_refs] + logger.warning(warning_msg.format(old_ref_rawsources, new_ref_rawsources), + location=self.node, type='i18n', subtype='inconsistent_references') + + def update_title_mapping(self) -> bool: + processed = False # skip flag + + # update title(section) target name-id mapping + if isinstance(self.node, nodes.title) and isinstance(self.node.parent, nodes.section): + section_node = self.node.parent + new_name = nodes.fully_normalize_name(self.patch.astext()) + old_name = nodes.fully_normalize_name(self.node.astext()) + + if old_name != new_name: + # if name would be changed, replace node names and + # document nameids mapping with new name. + names = section_node.setdefault('names', []) + names.append(new_name) + # Original section name (reference target name) should be kept to refer + # from other nodes which is still not translated or uses explicit target + # name like "`text to display <explicit target name_>`_".. + # So, `old_name` is still exist in `names`. + + _id = self.document.nameids.get(old_name, None) + explicit = self.document.nametypes.get(old_name, None) + + # * if explicit: _id is label. title node need another id. + # * if not explicit: + # + # * if _id is None: + # + # _id is None means: + # + # 1. _id was not provided yet. + # + # 2. _id was duplicated. + # + # old_name entry still exists in nameids and + # nametypes for another duplicated entry. + # + # * if _id is provided: below process + if _id: + if not explicit: + # _id was not duplicated. + # remove old_name entry from document ids database + # to reuse original _id. + self.document.nameids.pop(old_name, None) + self.document.nametypes.pop(old_name, None) + self.document.ids.pop(_id, None) + + # re-entry with new named section node. + # + # Note: msgnode that is a second parameter of the + # `note_implicit_target` is not necessary here because + # section_node has been noted previously on rst parsing by + # `docutils.parsers.rst.states.RSTState.new_subsection()` + # and already has `system_message` if needed. + self.document.note_implicit_target(section_node) + + # replace target's refname to new target name + matcher = NodeMatcher(nodes.target, refname=old_name) + for old_target in self.document.findall(matcher): # type: nodes.target + old_target['refname'] = new_name + + processed = True + + return processed + + def update_autofootnote_references(self) -> None: + # auto-numbered foot note reference should use original 'ids'. + def list_replace_or_append(lst: list[N], old: N, new: N) -> None: + if old in lst: + lst[lst.index(old)] = new + else: + lst.append(new) + + is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any) + old_foot_refs: list[nodes.footnote_reference] = [ + *self.node.findall(is_autofootnote_ref)] + new_foot_refs: list[nodes.footnote_reference] = [ + *self.patch.findall(is_autofootnote_ref)] + self.compare_references(old_foot_refs, new_foot_refs, + __('inconsistent footnote references in translated message.' + + ' original: {0}, translated: {1}')) + old_foot_namerefs: dict[str, list[nodes.footnote_reference]] = {} + for r in old_foot_refs: + old_foot_namerefs.setdefault(r.get('refname'), []).append(r) + for newf in new_foot_refs: + refname = newf.get('refname') + refs = old_foot_namerefs.get(refname, []) + if not refs: + newf.parent.remove(newf) + continue + + oldf = refs.pop(0) + newf['ids'] = oldf['ids'] + for id in newf['ids']: + self.document.ids[id] = newf + + if newf['auto'] == 1: + # autofootnote_refs + list_replace_or_append(self.document.autofootnote_refs, oldf, newf) + else: + # symbol_footnote_refs + list_replace_or_append(self.document.symbol_footnote_refs, oldf, newf) + + if refname: + footnote_refs = self.document.footnote_refs.setdefault(refname, []) + list_replace_or_append(footnote_refs, oldf, newf) + + refnames = self.document.refnames.setdefault(refname, []) + list_replace_or_append(refnames, oldf, newf) + + def update_refnamed_references(self) -> None: + # reference should use new (translated) 'refname'. + # * reference target ".. _Python: ..." is not translatable. + # * use translated refname for section refname. + # * inline reference "`Python <...>`_" has no 'refname'. + is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any) + old_refs: list[nodes.reference] = [*self.node.findall(is_refnamed_ref)] + new_refs: list[nodes.reference] = [*self.patch.findall(is_refnamed_ref)] + self.compare_references(old_refs, new_refs, + __('inconsistent references in translated message.' + + ' original: {0}, translated: {1}')) + old_ref_names = [r['refname'] for r in old_refs] + new_ref_names = [r['refname'] for r in new_refs] + orphans = [*({*old_ref_names} - {*new_ref_names})] + for newr in new_refs: + if not self.document.has_name(newr['refname']): + # Maybe refname is translated but target is not translated. + # Note: multiple translated refnames break link ordering. + if orphans: + newr['refname'] = orphans.pop(0) + else: + # orphan refnames is already empty! + # reference number is same in new_refs and old_refs. + pass + + self.document.note_refname(newr) + + def update_refnamed_footnote_references(self) -> None: + # refnamed footnote should use original 'ids'. + is_refnamed_footnote_ref = NodeMatcher(nodes.footnote_reference, refname=Any) + old_foot_refs: list[nodes.footnote_reference] = [*self.node.findall( + is_refnamed_footnote_ref)] + new_foot_refs: list[nodes.footnote_reference] = [*self.patch.findall( + is_refnamed_footnote_ref)] + refname_ids_map: dict[str, list[str]] = {} + self.compare_references(old_foot_refs, new_foot_refs, + __('inconsistent footnote references in translated message.' + + ' original: {0}, translated: {1}')) + for oldf in old_foot_refs: + refname_ids_map.setdefault(oldf["refname"], []).append(oldf["ids"]) + for newf in new_foot_refs: + refname = newf["refname"] + if refname_ids_map.get(refname): + newf["ids"] = refname_ids_map[refname].pop(0) + + def update_citation_references(self) -> None: + # citation should use original 'ids'. + is_citation_ref = NodeMatcher(nodes.citation_reference, refname=Any) + old_cite_refs: list[nodes.citation_reference] = [*self.node.findall(is_citation_ref)] + new_cite_refs: list[nodes.citation_reference] = [*self.patch.findall(is_citation_ref)] + self.compare_references(old_cite_refs, new_cite_refs, + __('inconsistent citation references in translated message.' + + ' original: {0}, translated: {1}')) + refname_ids_map: dict[str, list[str]] = {} + for oldc in old_cite_refs: + refname_ids_map.setdefault(oldc["refname"], []).append(oldc["ids"]) + for newc in new_cite_refs: + refname = newc["refname"] + if refname_ids_map.get(refname): + newc["ids"] = refname_ids_map[refname].pop() + + def update_pending_xrefs(self) -> None: + # Original pending_xref['reftarget'] contain not-translated + # target name, new pending_xref must use original one. + # This code restricts to change ref-targets in the translation. + old_xrefs = [*self.node.findall(addnodes.pending_xref)] + new_xrefs = [*self.patch.findall(addnodes.pending_xref)] + self.compare_references(old_xrefs, new_xrefs, + __('inconsistent term references in translated message.' + + ' original: {0}, translated: {1}')) + + xref_reftarget_map: dict[tuple[str, str, str] | None, dict[str, Any]] = {} + + def get_ref_key(node: addnodes.pending_xref) -> tuple[str, str, str] | None: + case = node["refdomain"], node["reftype"] + if case == ('std', 'term'): + return None + else: + return ( + node["refdomain"], + node["reftype"], + node['reftarget'], + ) + + for old in old_xrefs: + key = get_ref_key(old) + if key: + xref_reftarget_map[key] = old.attributes + for new in new_xrefs: + key = get_ref_key(new) + # Copy attributes to keep original node behavior. Especially + # copying 'reftarget', 'py:module', 'py:class' are needed. + for k, v in xref_reftarget_map.get(key, {}).items(): + if k not in EXCLUDED_PENDING_XREF_ATTRIBUTES: + new[k] = v + + def update_leaves(self) -> None: + for child in self.patch.children: + child.parent = self.node + self.node.children = self.patch.children + + +class Locale(SphinxTransform): + """ + Replace translatable nodes with their translated doctree. + """ + default_priority = 20 + + def apply(self, **kwargs: Any) -> None: + settings, source = self.document.settings, self.document['source'] + msgstr = '' + + textdomain = docname_to_domain(self.env.docname, self.config.gettext_compact) + + # fetch translations + dirs = [path.join(self.env.srcdir, directory) + for directory in self.config.locale_dirs] + catalog, has_catalog = init_locale(dirs, self.config.language, textdomain) + if not has_catalog: + return + + catalogues = [getattr(catalog, '_catalog', None)] + while (catalog := catalog._fallback) is not None: # type: ignore[attr-defined] + catalogues.append(getattr(catalog, '_catalog', None)) + merged: dict[str, str] = {} + for catalogue in filter(None, reversed(catalogues)): # type: dict[str, str] + merged |= catalogue + + # phase1: replace reference ids with translated names + for node, msg in extract_messages(self.document): + msgstr = merged.get(msg, '') + + # There is no point in having #noqa on literal blocks because + # they cannot contain references. Recognizing it would just + # completely prevent escaping the #noqa. Outside of literal + # blocks, one can always write \#noqa. + if not isinstance(node, LITERAL_TYPE_NODES): + msgstr, _ = parse_noqa(msgstr) + + if msgstr.strip() == '': + # as-of-yet untranslated + node['translated'] = False + continue + if msgstr == msg: + # identical source and translated messages + node['translated'] = True + continue + + # Avoid "Literal block expected; none found." warnings. + # If msgstr ends with '::' then it cause warning message at + # parser.parse() processing. + # literal-block-warning is only appear in avobe case. + if msgstr.strip().endswith('::'): + msgstr += '\n\n dummy literal' + # dummy literal node will discard by 'patch = patch[0]' + + # literalblock need literal block notation to avoid it become + # paragraph. + if isinstance(node, LITERAL_TYPE_NODES): + msgstr = '::\n\n' + indent(msgstr, ' ' * 3) + + patch = publish_msgstr(self.app, msgstr, source, + node.line, self.config, settings) + # FIXME: no warnings about inconsistent references in this part + # XXX doctest and other block markup + if not isinstance(patch, nodes.paragraph): + continue # skip for now + + updater = _NodeUpdater(node, patch, self.document, noqa=False) + processed = updater.update_title_mapping() + + # glossary terms update refid + if isinstance(node, nodes.term): + for _id in node['ids']: + parts = split_term_classifiers(msgstr) + patch = publish_msgstr( + self.app, parts[0] or '', source, node.line, self.config, settings, + ) + updater.patch = make_glossary_term( + self.env, patch, parts[1] or '', source, node.line, _id, self.document, + ) + processed = True + + # update leaves with processed nodes + if processed: + updater.update_leaves() + node['translated'] = True # to avoid double translation + else: + node['translated'] = False + + # phase2: translation + for node, msg in extract_messages(self.document): + if node.setdefault('translated', False): # to avoid double translation + continue # skip if the node is already translated by phase1 + + msgstr = merged.get(msg, '') + noqa = False + + # See above. + if not isinstance(node, LITERAL_TYPE_NODES): + msgstr, noqa = parse_noqa(msgstr) + + if not msgstr or msgstr == msg: # as-of-yet untranslated + node['translated'] = False + continue + + # update translatable nodes + if isinstance(node, addnodes.translatable): + node.apply_translated_message(msg, msgstr) # type: ignore[attr-defined] + continue + + # update meta nodes + if isinstance(node, nodes.meta): # type: ignore[attr-defined] + node['content'] = msgstr + node['translated'] = True + continue + + if isinstance(node, nodes.image) and node.get('alt') == msg: + node['alt'] = msgstr + continue + + # Avoid "Literal block expected; none found." warnings. + # If msgstr ends with '::' then it cause warning message at + # parser.parse() processing. + # literal-block-warning is only appear in avobe case. + if msgstr.strip().endswith('::'): + msgstr += '\n\n dummy literal' + # dummy literal node will discard by 'patch = patch[0]' + + # literalblock need literal block notation to avoid it become + # paragraph. + if isinstance(node, LITERAL_TYPE_NODES): + msgstr = '::\n\n' + indent(msgstr, ' ' * 3) + + # Structural Subelements phase1 + # There is a possibility that only the title node is created. + # see: https://docutils.sourceforge.io/docs/ref/doctree.html#structural-subelements + if isinstance(node, nodes.title): + # This generates: <section ...><title>msgstr</title></section> + msgstr = msgstr + '\n' + '=' * len(msgstr) * 2 + + patch = publish_msgstr(self.app, msgstr, source, + node.line, self.config, settings) + # Structural Subelements phase2 + if isinstance(node, nodes.title): + # get <title> node that placed as a first child + patch = patch.next_node() + + # ignore unexpected markups in translation message + unexpected: tuple[type[nodes.Element], ...] = ( + nodes.paragraph, # expected form of translation + nodes.title, # generated by above "Subelements phase2" + ) + + # following types are expected if + # config.gettext_additional_targets is configured + unexpected += LITERAL_TYPE_NODES + unexpected += IMAGE_TYPE_NODES + + if not isinstance(patch, unexpected): + continue # skip + + updater = _NodeUpdater(node, patch, self.document, noqa) + updater.update_autofootnote_references() + updater.update_refnamed_references() + updater.update_refnamed_footnote_references() + updater.update_citation_references() + updater.update_pending_xrefs() + updater.update_leaves() + + # for highlighting that expects .rawsource and .astext() are same. + if isinstance(node, LITERAL_TYPE_NODES): + node.rawsource = node.astext() + + if isinstance(node, nodes.image) and node.get('alt') != msg: + node['uri'] = patch['uri'] + node['translated'] = False + continue # do not mark translated + + node['translated'] = True # to avoid double translation + + if 'index' in self.config.gettext_additional_targets: + # Extract and translate messages for index entries. + for node, entries in traverse_translatable_index(self.document): + new_entries: list[tuple[str, str, str, str, str | None]] = [] + for entry_type, value, target_id, main, _category_key in entries: + msg_parts = split_index_msg(entry_type, value) + msgstr_parts = [] + for part in msg_parts: + msgstr = merged.get(part, '') + if not msgstr: + msgstr = part + msgstr_parts.append(msgstr) + + new_entry = entry_type, ';'.join(msgstr_parts), target_id, main, None + new_entries.append(new_entry) + + node['raw_entries'] = entries + node['entries'] = new_entries + + +class TranslationProgressTotaliser(SphinxTransform): + """ + Calculate the number of translated and untranslated nodes. + """ + default_priority = 25 # MUST happen after Locale + + def apply(self, **kwargs: Any) -> None: + from sphinx.builders.gettext import MessageCatalogBuilder + if isinstance(self.app.builder, MessageCatalogBuilder): + return + + total = translated = 0 + for node in self.document.findall(NodeMatcher(translated=Any)): # type: nodes.Element + total += 1 + if node['translated']: + translated += 1 + + self.document['translation_progress'] = { + 'total': total, + 'translated': translated, + } + + +class AddTranslationClasses(SphinxTransform): + """ + Add ``translated`` or ``untranslated`` classes to indicate translation status. + """ + default_priority = 950 + + def apply(self, **kwargs: Any) -> None: + from sphinx.builders.gettext import MessageCatalogBuilder + if isinstance(self.app.builder, MessageCatalogBuilder): + return + + if not self.config.translation_progress_classes: + return + + if self.config.translation_progress_classes is True: + add_translated = add_untranslated = True + elif self.config.translation_progress_classes == 'translated': + add_translated = True + add_untranslated = False + elif self.config.translation_progress_classes == 'untranslated': + add_translated = False + add_untranslated = True + else: + msg = ('translation_progress_classes must be ' + 'True, False, "translated" or "untranslated"') + raise ConfigError(msg) + + for node in self.document.findall(NodeMatcher(translated=Any)): # type: nodes.Element + if node['translated']: + if add_translated: + node.setdefault('classes', []).append('translated') + else: + if add_untranslated: + node.setdefault('classes', []).append('untranslated') + + +class RemoveTranslatableInline(SphinxTransform): + """ + Remove inline nodes used for translation as placeholders. + """ + default_priority = 999 + + def apply(self, **kwargs: Any) -> None: + from sphinx.builders.gettext import MessageCatalogBuilder + if isinstance(self.app.builder, MessageCatalogBuilder): + return + + matcher = NodeMatcher(nodes.inline, translatable=Any) + for inline in list(self.document.findall(matcher)): # type: nodes.inline + inline.parent.remove(inline) + inline.parent += inline.children + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_transform(PreserveTranslatableMessages) + app.add_transform(Locale) + app.add_transform(TranslationProgressTotaliser) + app.add_transform(AddTranslationClasses) + app.add_transform(RemoveTranslatableInline) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/transforms/post_transforms/__init__.py b/sphinx/transforms/post_transforms/__init__.py new file mode 100644 index 0000000..485f1f1 --- /dev/null +++ b/sphinx/transforms/post_transforms/__init__.py @@ -0,0 +1,297 @@ +"""Docutils transforms used by Sphinx.""" + +from __future__ import annotations + +import re +from typing import TYPE_CHECKING, Any, cast + +from docutils import nodes +from docutils.nodes import Element, Node + +from sphinx import addnodes +from sphinx.errors import NoUri +from sphinx.locale import __ +from sphinx.transforms import SphinxTransform +from sphinx.util import logging +from sphinx.util.docutils import SphinxTranslator +from sphinx.util.nodes import find_pending_xref_condition, process_only_nodes + +if TYPE_CHECKING: + from collections.abc import Sequence + + from sphinx.addnodes import pending_xref + from sphinx.application import Sphinx + from sphinx.domains import Domain + +logger = logging.getLogger(__name__) + + +class SphinxPostTransform(SphinxTransform): + """A base class of post-transforms. + + Post transforms are invoked to modify the document to restructure it for outputting. + They resolve references, convert images, do special transformation for each output + formats and so on. This class helps to implement these post transforms. + """ + builders: tuple[str, ...] = () + formats: tuple[str, ...] = () + + def apply(self, **kwargs: Any) -> None: + if self.is_supported(): + self.run(**kwargs) + + def is_supported(self) -> bool: + """Check this transform working for current builder.""" + if self.builders and self.app.builder.name not in self.builders: + return False + if self.formats and self.app.builder.format not in self.formats: + return False + + return True + + def run(self, **kwargs: Any) -> None: + """Main method of post transforms. + + Subclasses should override this method instead of ``apply()``. + """ + raise NotImplementedError + + +class ReferencesResolver(SphinxPostTransform): + """ + Resolves cross-references on doctrees. + """ + + default_priority = 10 + + def run(self, **kwargs: Any) -> None: + for node in self.document.findall(addnodes.pending_xref): + content = self.find_pending_xref_condition(node, ("resolved", "*")) + if content: + contnode = cast(Element, content[0].deepcopy()) + else: + contnode = cast(Element, node[0].deepcopy()) + + newnode = None + + typ = node['reftype'] + target = node['reftarget'] + node.setdefault('refdoc', self.env.docname) + refdoc = node.get('refdoc') + domain = None + + try: + if 'refdomain' in node and node['refdomain']: + # let the domain try to resolve the reference + try: + domain = self.env.domains[node['refdomain']] + except KeyError as exc: + raise NoUri(target, typ) from exc + newnode = domain.resolve_xref(self.env, refdoc, self.app.builder, + typ, target, node, contnode) + # really hardwired reference types + elif typ == 'any': + newnode = self.resolve_anyref(refdoc, node, contnode) + # no new node found? try the missing-reference event + if newnode is None: + newnode = self.app.emit_firstresult('missing-reference', self.env, + node, contnode, + allowed_exceptions=(NoUri,)) + # still not found? warn if node wishes to be warned about or + # we are in nit-picky mode + if newnode is None: + self.warn_missing_reference(refdoc, typ, target, node, domain) + except NoUri: + newnode = None + + if newnode: + newnodes: list[Node] = [newnode] + else: + newnodes = [contnode] + if newnode is None and isinstance(node[0], addnodes.pending_xref_condition): + matched = self.find_pending_xref_condition(node, ("*",)) + if matched: + newnodes = matched + else: + logger.warning(__('Could not determine the fallback text for the ' + 'cross-reference. Might be a bug.'), location=node) + + node.replace_self(newnodes) + + def resolve_anyref( + self, refdoc: str, node: pending_xref, contnode: Element, + ) -> Element | None: + """Resolve reference generated by the "any" role.""" + stddomain = self.env.get_domain('std') + target = node['reftarget'] + results: list[tuple[str, Element]] = [] + # first, try resolving as :doc: + doc_ref = stddomain.resolve_xref(self.env, refdoc, self.app.builder, + 'doc', target, node, contnode) + if doc_ref: + results.append(('doc', doc_ref)) + # next, do the standard domain (makes this a priority) + results.extend(stddomain.resolve_any_xref(self.env, refdoc, self.app.builder, + target, node, contnode)) + for domain in self.env.domains.values(): + if domain.name == 'std': + continue # we did this one already + try: + results.extend(domain.resolve_any_xref(self.env, refdoc, self.app.builder, + target, node, contnode)) + except NotImplementedError: + # the domain doesn't yet support the new interface + # we have to manually collect possible references (SLOW) + for role in domain.roles: + res = domain.resolve_xref(self.env, refdoc, self.app.builder, + role, target, node, contnode) + if res and len(res) > 0 and isinstance(res[0], nodes.Element): + results.append((f'{domain.name}:{role}', res)) + # now, see how many matches we got... + if not results: + return None + if len(results) > 1: + def stringify(name: str, node: Element) -> str: + reftitle = node.get('reftitle', node.astext()) + return f':{name}:`{reftitle}`' + candidates = ' or '.join(stringify(name, role) for name, role in results) + logger.warning(__("more than one target found for 'any' cross-" + 'reference %r: could be %s'), target, candidates, + location=node) + res_role, newnode = results[0] + # Override "any" class with the actual role type to get the styling + # approximately correct. + res_domain = res_role.split(':')[0] + if (len(newnode) > 0 and + isinstance(newnode[0], nodes.Element) and + newnode[0].get('classes')): + newnode[0]['classes'].append(res_domain) + newnode[0]['classes'].append(res_role.replace(':', '-')) + return newnode + + def warn_missing_reference(self, refdoc: str, typ: str, target: str, + node: pending_xref, domain: Domain | None) -> None: + warn = node.get('refwarn') + if self.config.nitpicky: + warn = True + dtype = f'{domain.name}:{typ}' if domain else typ + if self.config.nitpick_ignore: + if (dtype, target) in self.config.nitpick_ignore: + warn = False + # for "std" types also try without domain name + if (not domain or domain.name == 'std') and \ + (typ, target) in self.config.nitpick_ignore: + warn = False + if self.config.nitpick_ignore_regex: + def matches_ignore(entry_type: str, entry_target: str) -> bool: + return any( + ( + re.fullmatch(ignore_type, entry_type) + and re.fullmatch(ignore_target, entry_target) + ) + for ignore_type, ignore_target + in self.config.nitpick_ignore_regex + ) + if matches_ignore(dtype, target): + warn = False + # for "std" types also try without domain name + if (not domain or domain.name == 'std') and \ + matches_ignore(typ, target): + warn = False + if not warn: + return + + if self.app.emit_firstresult('warn-missing-reference', domain, node): + return + elif domain and typ in domain.dangling_warnings: + msg = domain.dangling_warnings[typ] % {'target': target} + elif node.get('refdomain', 'std') not in ('', 'std'): + msg = (__('%s:%s reference target not found: %s') % + (node['refdomain'], typ, target)) + else: + msg = __('%r reference target not found: %s') % (typ, target) + logger.warning(msg, location=node, type='ref', subtype=typ) + + def find_pending_xref_condition(self, node: pending_xref, conditions: Sequence[str], + ) -> list[Node] | None: + for condition in conditions: + matched = find_pending_xref_condition(node, condition) + if matched: + return matched.children + return None + + +class OnlyNodeTransform(SphinxPostTransform): + default_priority = 50 + + def run(self, **kwargs: Any) -> None: + # A comment on the comment() nodes being inserted: replacing by [] would + # result in a "Losing ids" exception if there is a target node before + # the only node, so we make sure docutils can transfer the id to + # something, even if it's just a comment and will lose the id anyway... + process_only_nodes(self.document, self.app.builder.tags) + + +class SigElementFallbackTransform(SphinxPostTransform): + """Fallback various desc_* nodes to inline if translator does not support them.""" + default_priority = 200 + + def run(self, **kwargs: Any) -> None: + def has_visitor(translator: type[nodes.NodeVisitor], node: type[Element]) -> bool: + return hasattr(translator, "visit_%s" % node.__name__) + + try: + translator = self.app.builder.get_translator_class() + except AttributeError: + # do nothing if no translator class is specified (e.g., on a dummy builder) + return + + if issubclass(translator, SphinxTranslator): + # subclass of SphinxTranslator supports desc_sig_element nodes automatically. + return + + # for the leaf elements (desc_sig_element), the translator should support _all_, + # unless there exists a generic visit_desc_sig_element default visitor + if (not all(has_visitor(translator, node) for node in addnodes.SIG_ELEMENTS) + and not has_visitor(translator, addnodes.desc_sig_element)): + self.fallback(addnodes.desc_sig_element) + + if not has_visitor(translator, addnodes.desc_inline): + self.fallback(addnodes.desc_inline) + + def fallback(self, node_type: Any) -> None: + """Translate nodes of type *node_type* to docutils inline nodes. + + The original node type name is stored as a string in a private + ``_sig_node_type`` attribute if the latter did not exist. + """ + for node in self.document.findall(node_type): + newnode = nodes.inline() + newnode.update_all_atts(node) + newnode.extend(node) + # Only set _sig_node_type if not defined by the user + newnode.setdefault('_sig_node_type', node.tagname) + node.replace_self(newnode) + + +class PropagateDescDomain(SphinxPostTransform): + """Add the domain name of the parent node as a class in each desc_signature node.""" + default_priority = 200 + + def run(self, **kwargs: Any) -> None: + for node in self.document.findall(addnodes.desc_signature): + if node.parent.get('domain'): + node['classes'].append(node.parent['domain']) + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_post_transform(ReferencesResolver) + app.add_post_transform(OnlyNodeTransform) + app.add_post_transform(SigElementFallbackTransform) + app.add_post_transform(PropagateDescDomain) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/transforms/post_transforms/code.py b/sphinx/transforms/post_transforms/code.py new file mode 100644 index 0000000..cd8abcc --- /dev/null +++ b/sphinx/transforms/post_transforms/code.py @@ -0,0 +1,139 @@ +"""transforms for code-blocks.""" + +from __future__ import annotations + +import sys +from typing import TYPE_CHECKING, Any, NamedTuple + +from docutils import nodes +from pygments.lexers import PythonConsoleLexer, guess_lexer + +from sphinx import addnodes +from sphinx.ext import doctest +from sphinx.transforms import SphinxTransform + +if TYPE_CHECKING: + from docutils.nodes import Node, TextElement + + from sphinx.application import Sphinx + + +class HighlightSetting(NamedTuple): + language: str + force: bool + lineno_threshold: int + + +class HighlightLanguageTransform(SphinxTransform): + """ + Apply highlight_language to all literal_block nodes. + + This refers both :confval:`highlight_language` setting and + :rst:dir:`highlight` directive. After processing, this transform + removes ``highlightlang`` node from doctree. + """ + default_priority = 400 + + def apply(self, **kwargs: Any) -> None: + visitor = HighlightLanguageVisitor(self.document, + self.config.highlight_language) + self.document.walkabout(visitor) + + for node in list(self.document.findall(addnodes.highlightlang)): + node.parent.remove(node) + + +class HighlightLanguageVisitor(nodes.NodeVisitor): + def __init__(self, document: nodes.document, default_language: str) -> None: + self.default_setting = HighlightSetting(default_language, False, sys.maxsize) + self.settings: list[HighlightSetting] = [] + super().__init__(document) + + def unknown_visit(self, node: Node) -> None: + pass + + def unknown_departure(self, node: Node) -> None: + pass + + def visit_document(self, node: Node) -> None: + self.settings.append(self.default_setting) + + def depart_document(self, node: Node) -> None: + self.settings.pop() + + def visit_start_of_file(self, node: Node) -> None: + self.settings.append(self.default_setting) + + def depart_start_of_file(self, node: Node) -> None: + self.settings.pop() + + def visit_highlightlang(self, node: addnodes.highlightlang) -> None: + self.settings[-1] = HighlightSetting(node['lang'], + node['force'], + node['linenothreshold']) + + def visit_literal_block(self, node: nodes.literal_block) -> None: + setting = self.settings[-1] + if 'language' not in node: + node['language'] = setting.language + node['force'] = setting.force + if 'linenos' not in node: + lines = node.astext().count('\n') + node['linenos'] = (lines >= setting.lineno_threshold - 1) + + +class TrimDoctestFlagsTransform(SphinxTransform): + """ + Trim doctest flags like ``# doctest: +FLAG`` from python code-blocks. + + see :confval:`trim_doctest_flags` for more information. + """ + default_priority = HighlightLanguageTransform.default_priority + 1 + + def apply(self, **kwargs: Any) -> None: + for lbnode in self.document.findall(nodes.literal_block): + if self.is_pyconsole(lbnode): + self.strip_doctest_flags(lbnode) + + for dbnode in self.document.findall(nodes.doctest_block): + self.strip_doctest_flags(dbnode) + + def strip_doctest_flags(self, node: TextElement) -> None: + if not node.get('trim_flags', self.config.trim_doctest_flags): + return + + source = node.rawsource + source = doctest.blankline_re.sub('', source) + source = doctest.doctestopt_re.sub('', source) + node.rawsource = source + node[:] = [nodes.Text(source)] + + @staticmethod + def is_pyconsole(node: nodes.literal_block) -> bool: + if node.rawsource != node.astext(): + return False # skip parsed-literal node + + language = node.get('language') + if language in {'pycon', 'pycon3'}: + return True + elif language in {'py', 'python', 'py3', 'python3', 'default'}: + return node.rawsource.startswith('>>>') + elif language == 'guess': + try: + lexer = guess_lexer(node.rawsource) + return isinstance(lexer, PythonConsoleLexer) + except Exception: + pass + + return False + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_post_transform(HighlightLanguageTransform) + app.add_post_transform(TrimDoctestFlagsTransform) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py new file mode 100644 index 0000000..e220df0 --- /dev/null +++ b/sphinx/transforms/post_transforms/images.py @@ -0,0 +1,280 @@ +"""Docutils transforms used by Sphinx.""" + +from __future__ import annotations + +import os +import re +from hashlib import sha1 +from math import ceil +from typing import TYPE_CHECKING, Any + +from docutils import nodes + +from sphinx.locale import __ +from sphinx.transforms import SphinxTransform +from sphinx.util import logging, requests +from sphinx.util.http_date import epoch_to_rfc1123, rfc1123_to_epoch +from sphinx.util.images import get_image_extension, guess_mimetype, parse_data_uri +from sphinx.util.osutil import ensuredir + +if TYPE_CHECKING: + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + +MAX_FILENAME_LEN = 32 +CRITICAL_PATH_CHAR_RE = re.compile('[:;<>|*" ]') + + +class BaseImageConverter(SphinxTransform): + def apply(self, **kwargs: Any) -> None: + for node in self.document.findall(nodes.image): + if self.match(node): + self.handle(node) + + def match(self, node: nodes.image) -> bool: + return True + + def handle(self, node: nodes.image) -> None: + pass + + @property + def imagedir(self) -> str: + return os.path.join(self.app.doctreedir, 'images') + + +class ImageDownloader(BaseImageConverter): + default_priority = 100 + + def match(self, node: nodes.image) -> bool: + if self.app.builder.supported_image_types == []: + return False + if self.app.builder.supported_remote_images: + return False + return '://' in node['uri'] + + def handle(self, node: nodes.image) -> None: + try: + basename = os.path.basename(node['uri']) + if '?' in basename: + basename = basename.split('?')[0] + if basename == '' or len(basename) > MAX_FILENAME_LEN: + filename, ext = os.path.splitext(node['uri']) + basename = sha1(filename.encode(), usedforsecurity=False).hexdigest() + ext + basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename) + + dirname = node['uri'].replace('://', '/').translate({ord("?"): "/", + ord("&"): "/"}) + if len(dirname) > MAX_FILENAME_LEN: + dirname = sha1(dirname.encode(), usedforsecurity=False).hexdigest() + ensuredir(os.path.join(self.imagedir, dirname)) + path = os.path.join(self.imagedir, dirname, basename) + + headers = {} + if os.path.exists(path): + timestamp: float = ceil(os.stat(path).st_mtime) + headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp) + + r = requests.get(node['uri'], headers=headers) + if r.status_code >= 400: + logger.warning(__('Could not fetch remote image: %s [%d]') % + (node['uri'], r.status_code)) + else: + self.app.env.original_image_uri[path] = node['uri'] + + if r.status_code == 200: + with open(path, 'wb') as f: + f.write(r.content) + + last_modified = r.headers.get('last-modified') + if last_modified: + timestamp = rfc1123_to_epoch(last_modified) + os.utime(path, (timestamp, timestamp)) + + mimetype = guess_mimetype(path, default='*') + if mimetype != '*' and os.path.splitext(basename)[1] == '': + # append a suffix if URI does not contain suffix + ext = get_image_extension(mimetype) + newpath = os.path.join(self.imagedir, dirname, basename + ext) + os.replace(path, newpath) + self.app.env.original_image_uri.pop(path) + self.app.env.original_image_uri[newpath] = node['uri'] + path = newpath + node['candidates'].pop('?') + node['candidates'][mimetype] = path + node['uri'] = path + self.app.env.images.add_file(self.env.docname, path) + except Exception as exc: + logger.warning(__('Could not fetch remote image: %s [%s]') % (node['uri'], exc)) + + +class DataURIExtractor(BaseImageConverter): + default_priority = 150 + + def match(self, node: nodes.image) -> bool: + if self.app.builder.supported_remote_images == []: + return False + if self.app.builder.supported_data_uri_images is True: + return False + return node['uri'].startswith('data:') + + def handle(self, node: nodes.image) -> None: + image = parse_data_uri(node['uri']) + assert image is not None + ext = get_image_extension(image.mimetype) + if ext is None: + logger.warning(__('Unknown image format: %s...'), node['uri'][:32], + location=node) + return + + ensuredir(os.path.join(self.imagedir, 'embeded')) + digest = sha1(image.data, usedforsecurity=False).hexdigest() + path = os.path.join(self.imagedir, 'embeded', digest + ext) + self.app.env.original_image_uri[path] = node['uri'] + + with open(path, 'wb') as f: + f.write(image.data) + + node['candidates'].pop('?') + node['candidates'][image.mimetype] = path + node['uri'] = path + self.app.env.images.add_file(self.env.docname, path) + + +def get_filename_for(filename: str, mimetype: str) -> str: + basename = os.path.basename(filename) + basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename) + return os.path.splitext(basename)[0] + (get_image_extension(mimetype) or '') + + +class ImageConverter(BaseImageConverter): + """A base class for image converters. + + An image converter is kind of Docutils transform module. It is used to + convert image files which are not supported by a builder to the + appropriate format for that builder. + + For example, :py:class:`LaTeX builder <.LaTeXBuilder>` supports PDF, + PNG and JPEG as image formats. However it does not support SVG images. + For such case, using image converters allows to embed these + unsupported images into the document. One of the image converters; + :ref:`sphinx.ext.imgconverter <sphinx.ext.imgconverter>` can convert + a SVG image to PNG format using Imagemagick internally. + + There are three steps to make your custom image converter: + + 1. Make a subclass of ``ImageConverter`` class + 2. Override ``conversion_rules``, ``is_available()`` and ``convert()`` + 3. Register your image converter to Sphinx using + :py:meth:`.Sphinx.add_post_transform` + """ + default_priority = 200 + + #: The converter is available or not. Will be filled at the first call of + #: the build. The result is shared in the same process. + #: + #: .. todo:: This should be refactored not to store the state without class + #: variable. + available: bool | None = None + + #: A conversion rules the image converter supports. + #: It is represented as a list of pair of source image format (mimetype) and + #: destination one:: + #: + #: conversion_rules = [ + #: ('image/svg+xml', 'image/png'), + #: ('image/gif', 'image/png'), + #: ('application/pdf', 'image/png'), + #: ] + conversion_rules: list[tuple[str, str]] = [] + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + def match(self, node: nodes.image) -> bool: + if not self.app.builder.supported_image_types: + return False + if '?' in node['candidates']: + return False + if set(self.guess_mimetypes(node)) & set(self.app.builder.supported_image_types): + # builder supports the image; no need to convert + return False + if self.available is None: + # store the value to the class variable to share it during the build + self.__class__.available = self.is_available() + + if not self.available: + return False + else: + try: + self.get_conversion_rule(node) + except ValueError: + return False + else: + return True + + def get_conversion_rule(self, node: nodes.image) -> tuple[str, str]: + for candidate in self.guess_mimetypes(node): + for supported in self.app.builder.supported_image_types: + rule = (candidate, supported) + if rule in self.conversion_rules: + return rule + + msg = 'No conversion rule found' + raise ValueError(msg) + + def is_available(self) -> bool: + """Return the image converter is available or not.""" + raise NotImplementedError + + def guess_mimetypes(self, node: nodes.image) -> list[str]: + if '?' in node['candidates']: + return [] + elif '*' in node['candidates']: + guessed = guess_mimetype(node['uri']) + return [guessed] if guessed is not None else [] + else: + return node['candidates'].keys() + + def handle(self, node: nodes.image) -> None: + _from, _to = self.get_conversion_rule(node) + + if _from in node['candidates']: + srcpath = node['candidates'][_from] + else: + srcpath = node['candidates']['*'] + + filename = self.env.images[srcpath][1] + filename = get_filename_for(filename, _to) + ensuredir(self.imagedir) + destpath = os.path.join(self.imagedir, filename) + + abs_srcpath = os.path.join(self.app.srcdir, srcpath) + if self.convert(abs_srcpath, destpath): + if '*' in node['candidates']: + node['candidates']['*'] = destpath + else: + node['candidates'][_to] = destpath + node['uri'] = destpath + + self.env.original_image_uri[destpath] = srcpath + self.env.images.add_file(self.env.docname, destpath) + + def convert(self, _from: str, _to: str) -> bool: + """Convert an image file to the expected format. + + *_from* is a path of the source image file, and *_to* is a path + of the destination file. + """ + raise NotImplementedError + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_post_transform(ImageDownloader) + app.add_post_transform(DataURIExtractor) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/transforms/references.py b/sphinx/transforms/references.py new file mode 100644 index 0000000..5de3a95 --- /dev/null +++ b/sphinx/transforms/references.py @@ -0,0 +1,47 @@ +"""Docutils transforms used by Sphinx.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from docutils.transforms.references import DanglingReferences + +from sphinx.transforms import SphinxTransform + +if TYPE_CHECKING: + from sphinx.application import Sphinx + + +class SphinxDanglingReferences(DanglingReferences): + """DanglingReferences transform which does not output info messages.""" + + def apply(self, **kwargs: Any) -> None: + try: + reporter = self.document.reporter + report_level = reporter.report_level + + # suppress INFO level messages for a while + reporter.report_level = max(reporter.WARNING_LEVEL, reporter.report_level) + super().apply() + finally: + reporter.report_level = report_level + + +class SphinxDomains(SphinxTransform): + """Collect objects to Sphinx domains for cross references.""" + default_priority = 850 + + def apply(self, **kwargs: Any) -> None: + for domain in self.env.domains.values(): + domain.process_doc(self.env, self.env.docname, self.document) + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_transform(SphinxDanglingReferences) + app.add_transform(SphinxDomains) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } |