summaryrefslogtreecommitdiffstats
path: root/sphinx/transforms/i18n.py
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/transforms/i18n.py')
-rw-r--r--sphinx/transforms/i18n.py624
1 files changed, 624 insertions, 0 deletions
diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py
new file mode 100644
index 0000000..d26c279
--- /dev/null
+++ b/sphinx/transforms/i18n.py
@@ -0,0 +1,624 @@
+"""Docutils transforms used by Sphinx when reading documents."""
+
+from __future__ import annotations
+
+import contextlib
+from os import path
+from re import DOTALL, match
+from textwrap import indent
+from typing import TYPE_CHECKING, Any, TypeVar
+
+from docutils import nodes
+from docutils.io import StringInput
+
+from sphinx import addnodes
+from sphinx.domains.std import make_glossary_term, split_term_classifiers
+from sphinx.errors import ConfigError
+from sphinx.locale import __
+from sphinx.locale import init as init_locale
+from sphinx.transforms import SphinxTransform
+from sphinx.util import get_filetype, logging
+from sphinx.util.i18n import docname_to_domain
+from sphinx.util.index_entries import split_index_msg
+from sphinx.util.nodes import (
+ IMAGE_TYPE_NODES,
+ LITERAL_TYPE_NODES,
+ NodeMatcher,
+ extract_messages,
+ traverse_translatable_index,
+)
+
+if TYPE_CHECKING:
+ from collections.abc import Sequence
+
+ from sphinx.application import Sphinx
+ from sphinx.config import Config
+
+
+logger = logging.getLogger(__name__)
+
+# The attributes not copied to the translated node
+#
+# * refexplict: For allow to give (or not to give) an explicit title
+# to the pending_xref on translation
+EXCLUDED_PENDING_XREF_ATTRIBUTES = ('refexplicit',)
+
+
+N = TypeVar('N', bound=nodes.Node)
+
+
+def publish_msgstr(app: Sphinx, source: str, source_path: str, source_line: int,
+ config: Config, settings: Any) -> nodes.Element:
+ """Publish msgstr (single line) into docutils document
+
+ :param sphinx.application.Sphinx app: sphinx application
+ :param str source: source text
+ :param str source_path: source path for warning indication
+ :param source_line: source line for warning indication
+ :param sphinx.config.Config config: sphinx config
+ :param docutils.frontend.Values settings: docutils settings
+ :return: document
+ :rtype: docutils.nodes.document
+ """
+ try:
+ # clear rst_prolog temporarily
+ rst_prolog = config.rst_prolog
+ config.rst_prolog = None # type: ignore[attr-defined]
+
+ from sphinx.io import SphinxI18nReader
+ reader = SphinxI18nReader()
+ reader.setup(app)
+ filetype = get_filetype(config.source_suffix, source_path)
+ parser = app.registry.create_source_parser(app, filetype)
+ doc = reader.read(
+ source=StringInput(source=source,
+ source_path=f"{source_path}:{source_line}:<translated>"),
+ parser=parser,
+ settings=settings,
+ )
+ with contextlib.suppress(IndexError): # empty node
+ return doc[0] # type: ignore[return-value]
+ return doc
+ finally:
+ config.rst_prolog = rst_prolog # type: ignore[attr-defined]
+
+
+def parse_noqa(source: str) -> tuple[str, bool]:
+ m = match(r"(.*)(?<!\\)#\s*noqa\s*$", source, DOTALL)
+ if m:
+ return m.group(1), True
+ else:
+ return source, False
+
+
+class PreserveTranslatableMessages(SphinxTransform):
+ """
+ Preserve original translatable messages before translation
+ """
+ default_priority = 10 # this MUST be invoked before Locale transform
+
+ def apply(self, **kwargs: Any) -> None:
+ for node in self.document.findall(addnodes.translatable):
+ node.preserve_original_messages()
+
+
+class _NodeUpdater:
+ """Contains logic for updating one node with the translated content."""
+
+ def __init__(
+ self, node: nodes.Element, patch: nodes.Element, document: nodes.document, noqa: bool,
+ ) -> None:
+ self.node: nodes.Element = node
+ self.patch: nodes.Element = patch
+ self.document: nodes.document = document
+ self.noqa: bool = noqa
+
+ def compare_references(self, old_refs: Sequence[nodes.Element],
+ new_refs: Sequence[nodes.Element],
+ warning_msg: str) -> None:
+ """Warn about mismatches between references in original and translated content."""
+ # FIXME: could use a smarter strategy than len(old_refs) == len(new_refs)
+ if not self.noqa and len(old_refs) != len(new_refs):
+ old_ref_rawsources = [ref.rawsource for ref in old_refs]
+ new_ref_rawsources = [ref.rawsource for ref in new_refs]
+ logger.warning(warning_msg.format(old_ref_rawsources, new_ref_rawsources),
+ location=self.node, type='i18n', subtype='inconsistent_references')
+
+ def update_title_mapping(self) -> bool:
+ processed = False # skip flag
+
+ # update title(section) target name-id mapping
+ if isinstance(self.node, nodes.title) and isinstance(self.node.parent, nodes.section):
+ section_node = self.node.parent
+ new_name = nodes.fully_normalize_name(self.patch.astext())
+ old_name = nodes.fully_normalize_name(self.node.astext())
+
+ if old_name != new_name:
+ # if name would be changed, replace node names and
+ # document nameids mapping with new name.
+ names = section_node.setdefault('names', [])
+ names.append(new_name)
+ # Original section name (reference target name) should be kept to refer
+ # from other nodes which is still not translated or uses explicit target
+ # name like "`text to display <explicit target name_>`_"..
+ # So, `old_name` is still exist in `names`.
+
+ _id = self.document.nameids.get(old_name, None)
+ explicit = self.document.nametypes.get(old_name, None)
+
+ # * if explicit: _id is label. title node need another id.
+ # * if not explicit:
+ #
+ # * if _id is None:
+ #
+ # _id is None means:
+ #
+ # 1. _id was not provided yet.
+ #
+ # 2. _id was duplicated.
+ #
+ # old_name entry still exists in nameids and
+ # nametypes for another duplicated entry.
+ #
+ # * if _id is provided: below process
+ if _id:
+ if not explicit:
+ # _id was not duplicated.
+ # remove old_name entry from document ids database
+ # to reuse original _id.
+ self.document.nameids.pop(old_name, None)
+ self.document.nametypes.pop(old_name, None)
+ self.document.ids.pop(_id, None)
+
+ # re-entry with new named section node.
+ #
+ # Note: msgnode that is a second parameter of the
+ # `note_implicit_target` is not necessary here because
+ # section_node has been noted previously on rst parsing by
+ # `docutils.parsers.rst.states.RSTState.new_subsection()`
+ # and already has `system_message` if needed.
+ self.document.note_implicit_target(section_node)
+
+ # replace target's refname to new target name
+ matcher = NodeMatcher(nodes.target, refname=old_name)
+ for old_target in self.document.findall(matcher): # type: nodes.target
+ old_target['refname'] = new_name
+
+ processed = True
+
+ return processed
+
+ def update_autofootnote_references(self) -> None:
+ # auto-numbered foot note reference should use original 'ids'.
+ def list_replace_or_append(lst: list[N], old: N, new: N) -> None:
+ if old in lst:
+ lst[lst.index(old)] = new
+ else:
+ lst.append(new)
+
+ is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any)
+ old_foot_refs: list[nodes.footnote_reference] = [
+ *self.node.findall(is_autofootnote_ref)]
+ new_foot_refs: list[nodes.footnote_reference] = [
+ *self.patch.findall(is_autofootnote_ref)]
+ self.compare_references(old_foot_refs, new_foot_refs,
+ __('inconsistent footnote references in translated message.' +
+ ' original: {0}, translated: {1}'))
+ old_foot_namerefs: dict[str, list[nodes.footnote_reference]] = {}
+ for r in old_foot_refs:
+ old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
+ for newf in new_foot_refs:
+ refname = newf.get('refname')
+ refs = old_foot_namerefs.get(refname, [])
+ if not refs:
+ newf.parent.remove(newf)
+ continue
+
+ oldf = refs.pop(0)
+ newf['ids'] = oldf['ids']
+ for id in newf['ids']:
+ self.document.ids[id] = newf
+
+ if newf['auto'] == 1:
+ # autofootnote_refs
+ list_replace_or_append(self.document.autofootnote_refs, oldf, newf)
+ else:
+ # symbol_footnote_refs
+ list_replace_or_append(self.document.symbol_footnote_refs, oldf, newf)
+
+ if refname:
+ footnote_refs = self.document.footnote_refs.setdefault(refname, [])
+ list_replace_or_append(footnote_refs, oldf, newf)
+
+ refnames = self.document.refnames.setdefault(refname, [])
+ list_replace_or_append(refnames, oldf, newf)
+
+ def update_refnamed_references(self) -> None:
+ # reference should use new (translated) 'refname'.
+ # * reference target ".. _Python: ..." is not translatable.
+ # * use translated refname for section refname.
+ # * inline reference "`Python <...>`_" has no 'refname'.
+ is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any)
+ old_refs: list[nodes.reference] = [*self.node.findall(is_refnamed_ref)]
+ new_refs: list[nodes.reference] = [*self.patch.findall(is_refnamed_ref)]
+ self.compare_references(old_refs, new_refs,
+ __('inconsistent references in translated message.' +
+ ' original: {0}, translated: {1}'))
+ old_ref_names = [r['refname'] for r in old_refs]
+ new_ref_names = [r['refname'] for r in new_refs]
+ orphans = [*({*old_ref_names} - {*new_ref_names})]
+ for newr in new_refs:
+ if not self.document.has_name(newr['refname']):
+ # Maybe refname is translated but target is not translated.
+ # Note: multiple translated refnames break link ordering.
+ if orphans:
+ newr['refname'] = orphans.pop(0)
+ else:
+ # orphan refnames is already empty!
+ # reference number is same in new_refs and old_refs.
+ pass
+
+ self.document.note_refname(newr)
+
+ def update_refnamed_footnote_references(self) -> None:
+ # refnamed footnote should use original 'ids'.
+ is_refnamed_footnote_ref = NodeMatcher(nodes.footnote_reference, refname=Any)
+ old_foot_refs: list[nodes.footnote_reference] = [*self.node.findall(
+ is_refnamed_footnote_ref)]
+ new_foot_refs: list[nodes.footnote_reference] = [*self.patch.findall(
+ is_refnamed_footnote_ref)]
+ refname_ids_map: dict[str, list[str]] = {}
+ self.compare_references(old_foot_refs, new_foot_refs,
+ __('inconsistent footnote references in translated message.' +
+ ' original: {0}, translated: {1}'))
+ for oldf in old_foot_refs:
+ refname_ids_map.setdefault(oldf["refname"], []).append(oldf["ids"])
+ for newf in new_foot_refs:
+ refname = newf["refname"]
+ if refname_ids_map.get(refname):
+ newf["ids"] = refname_ids_map[refname].pop(0)
+
+ def update_citation_references(self) -> None:
+ # citation should use original 'ids'.
+ is_citation_ref = NodeMatcher(nodes.citation_reference, refname=Any)
+ old_cite_refs: list[nodes.citation_reference] = [*self.node.findall(is_citation_ref)]
+ new_cite_refs: list[nodes.citation_reference] = [*self.patch.findall(is_citation_ref)]
+ self.compare_references(old_cite_refs, new_cite_refs,
+ __('inconsistent citation references in translated message.' +
+ ' original: {0}, translated: {1}'))
+ refname_ids_map: dict[str, list[str]] = {}
+ for oldc in old_cite_refs:
+ refname_ids_map.setdefault(oldc["refname"], []).append(oldc["ids"])
+ for newc in new_cite_refs:
+ refname = newc["refname"]
+ if refname_ids_map.get(refname):
+ newc["ids"] = refname_ids_map[refname].pop()
+
+ def update_pending_xrefs(self) -> None:
+ # Original pending_xref['reftarget'] contain not-translated
+ # target name, new pending_xref must use original one.
+ # This code restricts to change ref-targets in the translation.
+ old_xrefs = [*self.node.findall(addnodes.pending_xref)]
+ new_xrefs = [*self.patch.findall(addnodes.pending_xref)]
+ self.compare_references(old_xrefs, new_xrefs,
+ __('inconsistent term references in translated message.' +
+ ' original: {0}, translated: {1}'))
+
+ xref_reftarget_map: dict[tuple[str, str, str] | None, dict[str, Any]] = {}
+
+ def get_ref_key(node: addnodes.pending_xref) -> tuple[str, str, str] | None:
+ case = node["refdomain"], node["reftype"]
+ if case == ('std', 'term'):
+ return None
+ else:
+ return (
+ node["refdomain"],
+ node["reftype"],
+ node['reftarget'],
+ )
+
+ for old in old_xrefs:
+ key = get_ref_key(old)
+ if key:
+ xref_reftarget_map[key] = old.attributes
+ for new in new_xrefs:
+ key = get_ref_key(new)
+ # Copy attributes to keep original node behavior. Especially
+ # copying 'reftarget', 'py:module', 'py:class' are needed.
+ for k, v in xref_reftarget_map.get(key, {}).items():
+ if k not in EXCLUDED_PENDING_XREF_ATTRIBUTES:
+ new[k] = v
+
+ def update_leaves(self) -> None:
+ for child in self.patch.children:
+ child.parent = self.node
+ self.node.children = self.patch.children
+
+
+class Locale(SphinxTransform):
+ """
+ Replace translatable nodes with their translated doctree.
+ """
+ default_priority = 20
+
+ def apply(self, **kwargs: Any) -> None:
+ settings, source = self.document.settings, self.document['source']
+ msgstr = ''
+
+ textdomain = docname_to_domain(self.env.docname, self.config.gettext_compact)
+
+ # fetch translations
+ dirs = [path.join(self.env.srcdir, directory)
+ for directory in self.config.locale_dirs]
+ catalog, has_catalog = init_locale(dirs, self.config.language, textdomain)
+ if not has_catalog:
+ return
+
+ catalogues = [getattr(catalog, '_catalog', None)]
+ while (catalog := catalog._fallback) is not None: # type: ignore[attr-defined]
+ catalogues.append(getattr(catalog, '_catalog', None))
+ merged: dict[str, str] = {}
+ for catalogue in filter(None, reversed(catalogues)): # type: dict[str, str]
+ merged |= catalogue
+
+ # phase1: replace reference ids with translated names
+ for node, msg in extract_messages(self.document):
+ msgstr = merged.get(msg, '')
+
+ # There is no point in having #noqa on literal blocks because
+ # they cannot contain references. Recognizing it would just
+ # completely prevent escaping the #noqa. Outside of literal
+ # blocks, one can always write \#noqa.
+ if not isinstance(node, LITERAL_TYPE_NODES):
+ msgstr, _ = parse_noqa(msgstr)
+
+ if msgstr.strip() == '':
+ # as-of-yet untranslated
+ node['translated'] = False
+ continue
+ if msgstr == msg:
+ # identical source and translated messages
+ node['translated'] = True
+ continue
+
+ # Avoid "Literal block expected; none found." warnings.
+ # If msgstr ends with '::' then it cause warning message at
+ # parser.parse() processing.
+ # literal-block-warning is only appear in avobe case.
+ if msgstr.strip().endswith('::'):
+ msgstr += '\n\n dummy literal'
+ # dummy literal node will discard by 'patch = patch[0]'
+
+ # literalblock need literal block notation to avoid it become
+ # paragraph.
+ if isinstance(node, LITERAL_TYPE_NODES):
+ msgstr = '::\n\n' + indent(msgstr, ' ' * 3)
+
+ patch = publish_msgstr(self.app, msgstr, source,
+ node.line, self.config, settings)
+ # FIXME: no warnings about inconsistent references in this part
+ # XXX doctest and other block markup
+ if not isinstance(patch, nodes.paragraph):
+ continue # skip for now
+
+ updater = _NodeUpdater(node, patch, self.document, noqa=False)
+ processed = updater.update_title_mapping()
+
+ # glossary terms update refid
+ if isinstance(node, nodes.term):
+ for _id in node['ids']:
+ parts = split_term_classifiers(msgstr)
+ patch = publish_msgstr(
+ self.app, parts[0] or '', source, node.line, self.config, settings,
+ )
+ updater.patch = make_glossary_term(
+ self.env, patch, parts[1] or '', source, node.line, _id, self.document,
+ )
+ processed = True
+
+ # update leaves with processed nodes
+ if processed:
+ updater.update_leaves()
+ node['translated'] = True # to avoid double translation
+ else:
+ node['translated'] = False
+
+ # phase2: translation
+ for node, msg in extract_messages(self.document):
+ if node.setdefault('translated', False): # to avoid double translation
+ continue # skip if the node is already translated by phase1
+
+ msgstr = merged.get(msg, '')
+ noqa = False
+
+ # See above.
+ if not isinstance(node, LITERAL_TYPE_NODES):
+ msgstr, noqa = parse_noqa(msgstr)
+
+ if not msgstr or msgstr == msg: # as-of-yet untranslated
+ node['translated'] = False
+ continue
+
+ # update translatable nodes
+ if isinstance(node, addnodes.translatable):
+ node.apply_translated_message(msg, msgstr) # type: ignore[attr-defined]
+ continue
+
+ # update meta nodes
+ if isinstance(node, nodes.meta): # type: ignore[attr-defined]
+ node['content'] = msgstr
+ node['translated'] = True
+ continue
+
+ if isinstance(node, nodes.image) and node.get('alt') == msg:
+ node['alt'] = msgstr
+ continue
+
+ # Avoid "Literal block expected; none found." warnings.
+ # If msgstr ends with '::' then it cause warning message at
+ # parser.parse() processing.
+ # literal-block-warning is only appear in avobe case.
+ if msgstr.strip().endswith('::'):
+ msgstr += '\n\n dummy literal'
+ # dummy literal node will discard by 'patch = patch[0]'
+
+ # literalblock need literal block notation to avoid it become
+ # paragraph.
+ if isinstance(node, LITERAL_TYPE_NODES):
+ msgstr = '::\n\n' + indent(msgstr, ' ' * 3)
+
+ # Structural Subelements phase1
+ # There is a possibility that only the title node is created.
+ # see: https://docutils.sourceforge.io/docs/ref/doctree.html#structural-subelements
+ if isinstance(node, nodes.title):
+ # This generates: <section ...><title>msgstr</title></section>
+ msgstr = msgstr + '\n' + '=' * len(msgstr) * 2
+
+ patch = publish_msgstr(self.app, msgstr, source,
+ node.line, self.config, settings)
+ # Structural Subelements phase2
+ if isinstance(node, nodes.title):
+ # get <title> node that placed as a first child
+ patch = patch.next_node()
+
+ # ignore unexpected markups in translation message
+ unexpected: tuple[type[nodes.Element], ...] = (
+ nodes.paragraph, # expected form of translation
+ nodes.title, # generated by above "Subelements phase2"
+ )
+
+ # following types are expected if
+ # config.gettext_additional_targets is configured
+ unexpected += LITERAL_TYPE_NODES
+ unexpected += IMAGE_TYPE_NODES
+
+ if not isinstance(patch, unexpected):
+ continue # skip
+
+ updater = _NodeUpdater(node, patch, self.document, noqa)
+ updater.update_autofootnote_references()
+ updater.update_refnamed_references()
+ updater.update_refnamed_footnote_references()
+ updater.update_citation_references()
+ updater.update_pending_xrefs()
+ updater.update_leaves()
+
+ # for highlighting that expects .rawsource and .astext() are same.
+ if isinstance(node, LITERAL_TYPE_NODES):
+ node.rawsource = node.astext()
+
+ if isinstance(node, nodes.image) and node.get('alt') != msg:
+ node['uri'] = patch['uri']
+ node['translated'] = False
+ continue # do not mark translated
+
+ node['translated'] = True # to avoid double translation
+
+ if 'index' in self.config.gettext_additional_targets:
+ # Extract and translate messages for index entries.
+ for node, entries in traverse_translatable_index(self.document):
+ new_entries: list[tuple[str, str, str, str, str | None]] = []
+ for entry_type, value, target_id, main, _category_key in entries:
+ msg_parts = split_index_msg(entry_type, value)
+ msgstr_parts = []
+ for part in msg_parts:
+ msgstr = merged.get(part, '')
+ if not msgstr:
+ msgstr = part
+ msgstr_parts.append(msgstr)
+
+ new_entry = entry_type, ';'.join(msgstr_parts), target_id, main, None
+ new_entries.append(new_entry)
+
+ node['raw_entries'] = entries
+ node['entries'] = new_entries
+
+
+class TranslationProgressTotaliser(SphinxTransform):
+ """
+ Calculate the number of translated and untranslated nodes.
+ """
+ default_priority = 25 # MUST happen after Locale
+
+ def apply(self, **kwargs: Any) -> None:
+ from sphinx.builders.gettext import MessageCatalogBuilder
+ if isinstance(self.app.builder, MessageCatalogBuilder):
+ return
+
+ total = translated = 0
+ for node in self.document.findall(NodeMatcher(translated=Any)): # type: nodes.Element
+ total += 1
+ if node['translated']:
+ translated += 1
+
+ self.document['translation_progress'] = {
+ 'total': total,
+ 'translated': translated,
+ }
+
+
+class AddTranslationClasses(SphinxTransform):
+ """
+ Add ``translated`` or ``untranslated`` classes to indicate translation status.
+ """
+ default_priority = 950
+
+ def apply(self, **kwargs: Any) -> None:
+ from sphinx.builders.gettext import MessageCatalogBuilder
+ if isinstance(self.app.builder, MessageCatalogBuilder):
+ return
+
+ if not self.config.translation_progress_classes:
+ return
+
+ if self.config.translation_progress_classes is True:
+ add_translated = add_untranslated = True
+ elif self.config.translation_progress_classes == 'translated':
+ add_translated = True
+ add_untranslated = False
+ elif self.config.translation_progress_classes == 'untranslated':
+ add_translated = False
+ add_untranslated = True
+ else:
+ msg = ('translation_progress_classes must be '
+ 'True, False, "translated" or "untranslated"')
+ raise ConfigError(msg)
+
+ for node in self.document.findall(NodeMatcher(translated=Any)): # type: nodes.Element
+ if node['translated']:
+ if add_translated:
+ node.setdefault('classes', []).append('translated')
+ else:
+ if add_untranslated:
+ node.setdefault('classes', []).append('untranslated')
+
+
+class RemoveTranslatableInline(SphinxTransform):
+ """
+ Remove inline nodes used for translation as placeholders.
+ """
+ default_priority = 999
+
+ def apply(self, **kwargs: Any) -> None:
+ from sphinx.builders.gettext import MessageCatalogBuilder
+ if isinstance(self.app.builder, MessageCatalogBuilder):
+ return
+
+ matcher = NodeMatcher(nodes.inline, translatable=Any)
+ for inline in list(self.document.findall(matcher)): # type: nodes.inline
+ inline.parent.remove(inline)
+ inline.parent += inline.children
+
+
+def setup(app: Sphinx) -> dict[str, Any]:
+ app.add_transform(PreserveTranslatableMessages)
+ app.add_transform(Locale)
+ app.add_transform(TranslationProgressTotaliser)
+ app.add_transform(AddTranslationClasses)
+ app.add_transform(RemoveTranslatableInline)
+
+ return {
+ 'version': 'builtin',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }