"""Docutils transforms used by Sphinx when reading documents.""" from __future__ import annotations import contextlib from os import path from re import DOTALL, match from textwrap import indent from typing import TYPE_CHECKING, Any, TypeVar from docutils import nodes from docutils.io import StringInput from sphinx import addnodes from sphinx.domains.std import make_glossary_term, split_term_classifiers from sphinx.errors import ConfigError from sphinx.locale import __ from sphinx.locale import init as init_locale from sphinx.transforms import SphinxTransform from sphinx.util import get_filetype, logging from sphinx.util.i18n import docname_to_domain from sphinx.util.index_entries import split_index_msg from sphinx.util.nodes import ( IMAGE_TYPE_NODES, LITERAL_TYPE_NODES, NodeMatcher, extract_messages, traverse_translatable_index, ) if TYPE_CHECKING: from collections.abc import Sequence from sphinx.application import Sphinx from sphinx.config import Config logger = logging.getLogger(__name__) # The attributes not copied to the translated node # # * refexplict: For allow to give (or not to give) an explicit title # to the pending_xref on translation EXCLUDED_PENDING_XREF_ATTRIBUTES = ('refexplicit',) N = TypeVar('N', bound=nodes.Node) def publish_msgstr(app: Sphinx, source: str, source_path: str, source_line: int, config: Config, settings: Any) -> nodes.Element: """Publish msgstr (single line) into docutils document :param sphinx.application.Sphinx app: sphinx application :param str source: source text :param str source_path: source path for warning indication :param source_line: source line for warning indication :param sphinx.config.Config config: sphinx config :param docutils.frontend.Values settings: docutils settings :return: document :rtype: docutils.nodes.document """ try: # clear rst_prolog temporarily rst_prolog = config.rst_prolog config.rst_prolog = None # type: ignore[attr-defined] from sphinx.io import SphinxI18nReader reader = SphinxI18nReader() reader.setup(app) filetype = get_filetype(config.source_suffix, source_path) parser = app.registry.create_source_parser(app, filetype) doc = reader.read( source=StringInput(source=source, source_path=f"{source_path}:{source_line}:"), parser=parser, settings=settings, ) with contextlib.suppress(IndexError): # empty node return doc[0] # type: ignore[return-value] return doc finally: config.rst_prolog = rst_prolog # type: ignore[attr-defined] def parse_noqa(source: str) -> tuple[str, bool]: m = match(r"(.*)(? None: for node in self.document.findall(addnodes.translatable): node.preserve_original_messages() class _NodeUpdater: """Contains logic for updating one node with the translated content.""" def __init__( self, node: nodes.Element, patch: nodes.Element, document: nodes.document, noqa: bool, ) -> None: self.node: nodes.Element = node self.patch: nodes.Element = patch self.document: nodes.document = document self.noqa: bool = noqa def compare_references(self, old_refs: Sequence[nodes.Element], new_refs: Sequence[nodes.Element], warning_msg: str) -> None: """Warn about mismatches between references in original and translated content.""" # FIXME: could use a smarter strategy than len(old_refs) == len(new_refs) if not self.noqa and len(old_refs) != len(new_refs): old_ref_rawsources = [ref.rawsource for ref in old_refs] new_ref_rawsources = [ref.rawsource for ref in new_refs] logger.warning(warning_msg.format(old_ref_rawsources, new_ref_rawsources), location=self.node, type='i18n', subtype='inconsistent_references') def update_title_mapping(self) -> bool: processed = False # skip flag # update title(section) target name-id mapping if isinstance(self.node, nodes.title) and isinstance(self.node.parent, nodes.section): section_node = self.node.parent new_name = nodes.fully_normalize_name(self.patch.astext()) old_name = nodes.fully_normalize_name(self.node.astext()) if old_name != new_name: # if name would be changed, replace node names and # document nameids mapping with new name. names = section_node.setdefault('names', []) names.append(new_name) # Original section name (reference target name) should be kept to refer # from other nodes which is still not translated or uses explicit target # name like "`text to display `_".. # So, `old_name` is still exist in `names`. _id = self.document.nameids.get(old_name, None) explicit = self.document.nametypes.get(old_name, None) # * if explicit: _id is label. title node need another id. # * if not explicit: # # * if _id is None: # # _id is None means: # # 1. _id was not provided yet. # # 2. _id was duplicated. # # old_name entry still exists in nameids and # nametypes for another duplicated entry. # # * if _id is provided: below process if _id: if not explicit: # _id was not duplicated. # remove old_name entry from document ids database # to reuse original _id. self.document.nameids.pop(old_name, None) self.document.nametypes.pop(old_name, None) self.document.ids.pop(_id, None) # re-entry with new named section node. # # Note: msgnode that is a second parameter of the # `note_implicit_target` is not necessary here because # section_node has been noted previously on rst parsing by # `docutils.parsers.rst.states.RSTState.new_subsection()` # and already has `system_message` if needed. self.document.note_implicit_target(section_node) # replace target's refname to new target name matcher = NodeMatcher(nodes.target, refname=old_name) for old_target in self.document.findall(matcher): # type: nodes.target old_target['refname'] = new_name processed = True return processed def update_autofootnote_references(self) -> None: # auto-numbered foot note reference should use original 'ids'. def list_replace_or_append(lst: list[N], old: N, new: N) -> None: if old in lst: lst[lst.index(old)] = new else: lst.append(new) is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any) old_foot_refs: list[nodes.footnote_reference] = [ *self.node.findall(is_autofootnote_ref)] new_foot_refs: list[nodes.footnote_reference] = [ *self.patch.findall(is_autofootnote_ref)] self.compare_references(old_foot_refs, new_foot_refs, __('inconsistent footnote references in translated message.' + ' original: {0}, translated: {1}')) old_foot_namerefs: dict[str, list[nodes.footnote_reference]] = {} for r in old_foot_refs: old_foot_namerefs.setdefault(r.get('refname'), []).append(r) for newf in new_foot_refs: refname = newf.get('refname') refs = old_foot_namerefs.get(refname, []) if not refs: newf.parent.remove(newf) continue oldf = refs.pop(0) newf['ids'] = oldf['ids'] for id in newf['ids']: self.document.ids[id] = newf if newf['auto'] == 1: # autofootnote_refs list_replace_or_append(self.document.autofootnote_refs, oldf, newf) else: # symbol_footnote_refs list_replace_or_append(self.document.symbol_footnote_refs, oldf, newf) if refname: footnote_refs = self.document.footnote_refs.setdefault(refname, []) list_replace_or_append(footnote_refs, oldf, newf) refnames = self.document.refnames.setdefault(refname, []) list_replace_or_append(refnames, oldf, newf) def update_refnamed_references(self) -> None: # reference should use new (translated) 'refname'. # * reference target ".. _Python: ..." is not translatable. # * use translated refname for section refname. # * inline reference "`Python <...>`_" has no 'refname'. is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any) old_refs: list[nodes.reference] = [*self.node.findall(is_refnamed_ref)] new_refs: list[nodes.reference] = [*self.patch.findall(is_refnamed_ref)] self.compare_references(old_refs, new_refs, __('inconsistent references in translated message.' + ' original: {0}, translated: {1}')) old_ref_names = [r['refname'] for r in old_refs] new_ref_names = [r['refname'] for r in new_refs] orphans = [*({*old_ref_names} - {*new_ref_names})] for newr in new_refs: if not self.document.has_name(newr['refname']): # Maybe refname is translated but target is not translated. # Note: multiple translated refnames break link ordering. if orphans: newr['refname'] = orphans.pop(0) else: # orphan refnames is already empty! # reference number is same in new_refs and old_refs. pass self.document.note_refname(newr) def update_refnamed_footnote_references(self) -> None: # refnamed footnote should use original 'ids'. is_refnamed_footnote_ref = NodeMatcher(nodes.footnote_reference, refname=Any) old_foot_refs: list[nodes.footnote_reference] = [*self.node.findall( is_refnamed_footnote_ref)] new_foot_refs: list[nodes.footnote_reference] = [*self.patch.findall( is_refnamed_footnote_ref)] refname_ids_map: dict[str, list[str]] = {} self.compare_references(old_foot_refs, new_foot_refs, __('inconsistent footnote references in translated message.' + ' original: {0}, translated: {1}')) for oldf in old_foot_refs: refname_ids_map.setdefault(oldf["refname"], []).append(oldf["ids"]) for newf in new_foot_refs: refname = newf["refname"] if refname_ids_map.get(refname): newf["ids"] = refname_ids_map[refname].pop(0) def update_citation_references(self) -> None: # citation should use original 'ids'. is_citation_ref = NodeMatcher(nodes.citation_reference, refname=Any) old_cite_refs: list[nodes.citation_reference] = [*self.node.findall(is_citation_ref)] new_cite_refs: list[nodes.citation_reference] = [*self.patch.findall(is_citation_ref)] self.compare_references(old_cite_refs, new_cite_refs, __('inconsistent citation references in translated message.' + ' original: {0}, translated: {1}')) refname_ids_map: dict[str, list[str]] = {} for oldc in old_cite_refs: refname_ids_map.setdefault(oldc["refname"], []).append(oldc["ids"]) for newc in new_cite_refs: refname = newc["refname"] if refname_ids_map.get(refname): newc["ids"] = refname_ids_map[refname].pop() def update_pending_xrefs(self) -> None: # Original pending_xref['reftarget'] contain not-translated # target name, new pending_xref must use original one. # This code restricts to change ref-targets in the translation. old_xrefs = [*self.node.findall(addnodes.pending_xref)] new_xrefs = [*self.patch.findall(addnodes.pending_xref)] self.compare_references(old_xrefs, new_xrefs, __('inconsistent term references in translated message.' + ' original: {0}, translated: {1}')) xref_reftarget_map: dict[tuple[str, str, str] | None, dict[str, Any]] = {} def get_ref_key(node: addnodes.pending_xref) -> tuple[str, str, str] | None: case = node["refdomain"], node["reftype"] if case == ('std', 'term'): return None else: return ( node["refdomain"], node["reftype"], node['reftarget'], ) for old in old_xrefs: key = get_ref_key(old) if key: xref_reftarget_map[key] = old.attributes for new in new_xrefs: key = get_ref_key(new) # Copy attributes to keep original node behavior. Especially # copying 'reftarget', 'py:module', 'py:class' are needed. for k, v in xref_reftarget_map.get(key, {}).items(): if k not in EXCLUDED_PENDING_XREF_ATTRIBUTES: new[k] = v def update_leaves(self) -> None: for child in self.patch.children: child.parent = self.node self.node.children = self.patch.children class Locale(SphinxTransform): """ Replace translatable nodes with their translated doctree. """ default_priority = 20 def apply(self, **kwargs: Any) -> None: settings, source = self.document.settings, self.document['source'] msgstr = '' textdomain = docname_to_domain(self.env.docname, self.config.gettext_compact) # fetch translations dirs = [path.join(self.env.srcdir, directory) for directory in self.config.locale_dirs] catalog, has_catalog = init_locale(dirs, self.config.language, textdomain) if not has_catalog: return catalogues = [getattr(catalog, '_catalog', None)] while (catalog := catalog._fallback) is not None: # type: ignore[attr-defined] catalogues.append(getattr(catalog, '_catalog', None)) merged: dict[str, str] = {} for catalogue in filter(None, reversed(catalogues)): # type: dict[str, str] merged |= catalogue # phase1: replace reference ids with translated names for node, msg in extract_messages(self.document): msgstr = merged.get(msg, '') # There is no point in having #noqa on literal blocks because # they cannot contain references. Recognizing it would just # completely prevent escaping the #noqa. Outside of literal # blocks, one can always write \#noqa. if not isinstance(node, LITERAL_TYPE_NODES): msgstr, _ = parse_noqa(msgstr) if msgstr.strip() == '': # as-of-yet untranslated node['translated'] = False continue if msgstr == msg: # identical source and translated messages node['translated'] = True continue # Avoid "Literal block expected; none found." warnings. # If msgstr ends with '::' then it cause warning message at # parser.parse() processing. # literal-block-warning is only appear in avobe case. if msgstr.strip().endswith('::'): msgstr += '\n\n dummy literal' # dummy literal node will discard by 'patch = patch[0]' # literalblock need literal block notation to avoid it become # paragraph. if isinstance(node, LITERAL_TYPE_NODES): msgstr = '::\n\n' + indent(msgstr, ' ' * 3) patch = publish_msgstr(self.app, msgstr, source, node.line, self.config, settings) # FIXME: no warnings about inconsistent references in this part # XXX doctest and other block markup if not isinstance(patch, nodes.paragraph): continue # skip for now updater = _NodeUpdater(node, patch, self.document, noqa=False) processed = updater.update_title_mapping() # glossary terms update refid if isinstance(node, nodes.term): for _id in node['ids']: parts = split_term_classifiers(msgstr) patch = publish_msgstr( self.app, parts[0] or '', source, node.line, self.config, settings, ) updater.patch = make_glossary_term( self.env, patch, parts[1] or '', source, node.line, _id, self.document, ) processed = True # update leaves with processed nodes if processed: updater.update_leaves() node['translated'] = True # to avoid double translation else: node['translated'] = False # phase2: translation for node, msg in extract_messages(self.document): if node.setdefault('translated', False): # to avoid double translation continue # skip if the node is already translated by phase1 msgstr = merged.get(msg, '') noqa = False # See above. if not isinstance(node, LITERAL_TYPE_NODES): msgstr, noqa = parse_noqa(msgstr) if not msgstr or msgstr == msg: # as-of-yet untranslated node['translated'] = False continue # update translatable nodes if isinstance(node, addnodes.translatable): node.apply_translated_message(msg, msgstr) # type: ignore[attr-defined] continue # update meta nodes if isinstance(node, nodes.meta): # type: ignore[attr-defined] node['content'] = msgstr node['translated'] = True continue if isinstance(node, nodes.image) and node.get('alt') == msg: node['alt'] = msgstr continue # Avoid "Literal block expected; none found." warnings. # If msgstr ends with '::' then it cause warning message at # parser.parse() processing. # literal-block-warning is only appear in avobe case. if msgstr.strip().endswith('::'): msgstr += '\n\n dummy literal' # dummy literal node will discard by 'patch = patch[0]' # literalblock need literal block notation to avoid it become # paragraph. if isinstance(node, LITERAL_TYPE_NODES): msgstr = '::\n\n' + indent(msgstr, ' ' * 3) # Structural Subelements phase1 # There is a possibility that only the title node is created. # see: https://docutils.sourceforge.io/docs/ref/doctree.html#structural-subelements if isinstance(node, nodes.title): # This generates:
msgstr
msgstr = msgstr + '\n' + '=' * len(msgstr) * 2 patch = publish_msgstr(self.app, msgstr, source, node.line, self.config, settings) # Structural Subelements phase2 if isinstance(node, nodes.title): # get node that placed as a first child patch = patch.next_node() # ignore unexpected markups in translation message unexpected: tuple[type[nodes.Element], ...] = ( nodes.paragraph, # expected form of translation nodes.title, # generated by above "Subelements phase2" ) # following types are expected if # config.gettext_additional_targets is configured unexpected += LITERAL_TYPE_NODES unexpected += IMAGE_TYPE_NODES if not isinstance(patch, unexpected): continue # skip updater = _NodeUpdater(node, patch, self.document, noqa) updater.update_autofootnote_references() updater.update_refnamed_references() updater.update_refnamed_footnote_references() updater.update_citation_references() updater.update_pending_xrefs() updater.update_leaves() # for highlighting that expects .rawsource and .astext() are same. if isinstance(node, LITERAL_TYPE_NODES): node.rawsource = node.astext() if isinstance(node, nodes.image) and node.get('alt') != msg: node['uri'] = patch['uri'] node['translated'] = False continue # do not mark translated node['translated'] = True # to avoid double translation if 'index' in self.config.gettext_additional_targets: # Extract and translate messages for index entries. for node, entries in traverse_translatable_index(self.document): new_entries: list[tuple[str, str, str, str, str | None]] = [] for entry_type, value, target_id, main, _category_key in entries: msg_parts = split_index_msg(entry_type, value) msgstr_parts = [] for part in msg_parts: msgstr = merged.get(part, '') if not msgstr: msgstr = part msgstr_parts.append(msgstr) new_entry = entry_type, ';'.join(msgstr_parts), target_id, main, None new_entries.append(new_entry) node['raw_entries'] = entries node['entries'] = new_entries class TranslationProgressTotaliser(SphinxTransform): """ Calculate the number of translated and untranslated nodes. """ default_priority = 25 # MUST happen after Locale def apply(self, **kwargs: Any) -> None: from sphinx.builders.gettext import MessageCatalogBuilder if isinstance(self.app.builder, MessageCatalogBuilder): return total = translated = 0 for node in self.document.findall(NodeMatcher(translated=Any)): # type: nodes.Element total += 1 if node['translated']: translated += 1 self.document['translation_progress'] = { 'total': total, 'translated': translated, } class AddTranslationClasses(SphinxTransform): """ Add ``translated`` or ``untranslated`` classes to indicate translation status. """ default_priority = 950 def apply(self, **kwargs: Any) -> None: from sphinx.builders.gettext import MessageCatalogBuilder if isinstance(self.app.builder, MessageCatalogBuilder): return if not self.config.translation_progress_classes: return if self.config.translation_progress_classes is True: add_translated = add_untranslated = True elif self.config.translation_progress_classes == 'translated': add_translated = True add_untranslated = False elif self.config.translation_progress_classes == 'untranslated': add_translated = False add_untranslated = True else: msg = ('translation_progress_classes must be ' 'True, False, "translated" or "untranslated"') raise ConfigError(msg) for node in self.document.findall(NodeMatcher(translated=Any)): # type: nodes.Element if node['translated']: if add_translated: node.setdefault('classes', []).append('translated') else: if add_untranslated: node.setdefault('classes', []).append('untranslated') class RemoveTranslatableInline(SphinxTransform): """ Remove inline nodes used for translation as placeholders. """ default_priority = 999 def apply(self, **kwargs: Any) -> None: from sphinx.builders.gettext import MessageCatalogBuilder if isinstance(self.app.builder, MessageCatalogBuilder): return matcher = NodeMatcher(nodes.inline, translatable=Any) for inline in list(self.document.findall(matcher)): # type: nodes.inline inline.parent.remove(inline) inline.parent += inline.children def setup(app: Sphinx) -> dict[str, Any]: app.add_transform(PreserveTranslatableMessages) app.add_transform(Locale) app.add_transform(TranslationProgressTotaliser) app.add_transform(AddTranslationClasses) app.add_transform(RemoveTranslatableInline) return { 'version': 'builtin', 'parallel_read_safe': True, 'parallel_write_safe': True, }