diff options
Diffstat (limited to 'sphinx/transforms/i18n.py')
-rw-r--r-- | sphinx/transforms/i18n.py | 624 |
1 files changed, 624 insertions, 0 deletions
diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py new file mode 100644 index 0000000..d26c279 --- /dev/null +++ b/sphinx/transforms/i18n.py @@ -0,0 +1,624 @@ +"""Docutils transforms used by Sphinx when reading documents.""" + +from __future__ import annotations + +import contextlib +from os import path +from re import DOTALL, match +from textwrap import indent +from typing import TYPE_CHECKING, Any, TypeVar + +from docutils import nodes +from docutils.io import StringInput + +from sphinx import addnodes +from sphinx.domains.std import make_glossary_term, split_term_classifiers +from sphinx.errors import ConfigError +from sphinx.locale import __ +from sphinx.locale import init as init_locale +from sphinx.transforms import SphinxTransform +from sphinx.util import get_filetype, logging +from sphinx.util.i18n import docname_to_domain +from sphinx.util.index_entries import split_index_msg +from sphinx.util.nodes import ( + IMAGE_TYPE_NODES, + LITERAL_TYPE_NODES, + NodeMatcher, + extract_messages, + traverse_translatable_index, +) + +if TYPE_CHECKING: + from collections.abc import Sequence + + from sphinx.application import Sphinx + from sphinx.config import Config + + +logger = logging.getLogger(__name__) + +# The attributes not copied to the translated node +# +# * refexplict: For allow to give (or not to give) an explicit title +# to the pending_xref on translation +EXCLUDED_PENDING_XREF_ATTRIBUTES = ('refexplicit',) + + +N = TypeVar('N', bound=nodes.Node) + + +def publish_msgstr(app: Sphinx, source: str, source_path: str, source_line: int, + config: Config, settings: Any) -> nodes.Element: + """Publish msgstr (single line) into docutils document + + :param sphinx.application.Sphinx app: sphinx application + :param str source: source text + :param str source_path: source path for warning indication + :param source_line: source line for warning indication + :param sphinx.config.Config config: sphinx config + :param docutils.frontend.Values settings: docutils settings + :return: document + :rtype: docutils.nodes.document + """ + try: + # clear rst_prolog temporarily + rst_prolog = config.rst_prolog + config.rst_prolog = None # type: ignore[attr-defined] + + from sphinx.io import SphinxI18nReader + reader = SphinxI18nReader() + reader.setup(app) + filetype = get_filetype(config.source_suffix, source_path) + parser = app.registry.create_source_parser(app, filetype) + doc = reader.read( + source=StringInput(source=source, + source_path=f"{source_path}:{source_line}:<translated>"), + parser=parser, + settings=settings, + ) + with contextlib.suppress(IndexError): # empty node + return doc[0] # type: ignore[return-value] + return doc + finally: + config.rst_prolog = rst_prolog # type: ignore[attr-defined] + + +def parse_noqa(source: str) -> tuple[str, bool]: + m = match(r"(.*)(?<!\\)#\s*noqa\s*$", source, DOTALL) + if m: + return m.group(1), True + else: + return source, False + + +class PreserveTranslatableMessages(SphinxTransform): + """ + Preserve original translatable messages before translation + """ + default_priority = 10 # this MUST be invoked before Locale transform + + def apply(self, **kwargs: Any) -> None: + for node in self.document.findall(addnodes.translatable): + node.preserve_original_messages() + + +class _NodeUpdater: + """Contains logic for updating one node with the translated content.""" + + def __init__( + self, node: nodes.Element, patch: nodes.Element, document: nodes.document, noqa: bool, + ) -> None: + self.node: nodes.Element = node + self.patch: nodes.Element = patch + self.document: nodes.document = document + self.noqa: bool = noqa + + def compare_references(self, old_refs: Sequence[nodes.Element], + new_refs: Sequence[nodes.Element], + warning_msg: str) -> None: + """Warn about mismatches between references in original and translated content.""" + # FIXME: could use a smarter strategy than len(old_refs) == len(new_refs) + if not self.noqa and len(old_refs) != len(new_refs): + old_ref_rawsources = [ref.rawsource for ref in old_refs] + new_ref_rawsources = [ref.rawsource for ref in new_refs] + logger.warning(warning_msg.format(old_ref_rawsources, new_ref_rawsources), + location=self.node, type='i18n', subtype='inconsistent_references') + + def update_title_mapping(self) -> bool: + processed = False # skip flag + + # update title(section) target name-id mapping + if isinstance(self.node, nodes.title) and isinstance(self.node.parent, nodes.section): + section_node = self.node.parent + new_name = nodes.fully_normalize_name(self.patch.astext()) + old_name = nodes.fully_normalize_name(self.node.astext()) + + if old_name != new_name: + # if name would be changed, replace node names and + # document nameids mapping with new name. + names = section_node.setdefault('names', []) + names.append(new_name) + # Original section name (reference target name) should be kept to refer + # from other nodes which is still not translated or uses explicit target + # name like "`text to display <explicit target name_>`_".. + # So, `old_name` is still exist in `names`. + + _id = self.document.nameids.get(old_name, None) + explicit = self.document.nametypes.get(old_name, None) + + # * if explicit: _id is label. title node need another id. + # * if not explicit: + # + # * if _id is None: + # + # _id is None means: + # + # 1. _id was not provided yet. + # + # 2. _id was duplicated. + # + # old_name entry still exists in nameids and + # nametypes for another duplicated entry. + # + # * if _id is provided: below process + if _id: + if not explicit: + # _id was not duplicated. + # remove old_name entry from document ids database + # to reuse original _id. + self.document.nameids.pop(old_name, None) + self.document.nametypes.pop(old_name, None) + self.document.ids.pop(_id, None) + + # re-entry with new named section node. + # + # Note: msgnode that is a second parameter of the + # `note_implicit_target` is not necessary here because + # section_node has been noted previously on rst parsing by + # `docutils.parsers.rst.states.RSTState.new_subsection()` + # and already has `system_message` if needed. + self.document.note_implicit_target(section_node) + + # replace target's refname to new target name + matcher = NodeMatcher(nodes.target, refname=old_name) + for old_target in self.document.findall(matcher): # type: nodes.target + old_target['refname'] = new_name + + processed = True + + return processed + + def update_autofootnote_references(self) -> None: + # auto-numbered foot note reference should use original 'ids'. + def list_replace_or_append(lst: list[N], old: N, new: N) -> None: + if old in lst: + lst[lst.index(old)] = new + else: + lst.append(new) + + is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any) + old_foot_refs: list[nodes.footnote_reference] = [ + *self.node.findall(is_autofootnote_ref)] + new_foot_refs: list[nodes.footnote_reference] = [ + *self.patch.findall(is_autofootnote_ref)] + self.compare_references(old_foot_refs, new_foot_refs, + __('inconsistent footnote references in translated message.' + + ' original: {0}, translated: {1}')) + old_foot_namerefs: dict[str, list[nodes.footnote_reference]] = {} + for r in old_foot_refs: + old_foot_namerefs.setdefault(r.get('refname'), []).append(r) + for newf in new_foot_refs: + refname = newf.get('refname') + refs = old_foot_namerefs.get(refname, []) + if not refs: + newf.parent.remove(newf) + continue + + oldf = refs.pop(0) + newf['ids'] = oldf['ids'] + for id in newf['ids']: + self.document.ids[id] = newf + + if newf['auto'] == 1: + # autofootnote_refs + list_replace_or_append(self.document.autofootnote_refs, oldf, newf) + else: + # symbol_footnote_refs + list_replace_or_append(self.document.symbol_footnote_refs, oldf, newf) + + if refname: + footnote_refs = self.document.footnote_refs.setdefault(refname, []) + list_replace_or_append(footnote_refs, oldf, newf) + + refnames = self.document.refnames.setdefault(refname, []) + list_replace_or_append(refnames, oldf, newf) + + def update_refnamed_references(self) -> None: + # reference should use new (translated) 'refname'. + # * reference target ".. _Python: ..." is not translatable. + # * use translated refname for section refname. + # * inline reference "`Python <...>`_" has no 'refname'. + is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any) + old_refs: list[nodes.reference] = [*self.node.findall(is_refnamed_ref)] + new_refs: list[nodes.reference] = [*self.patch.findall(is_refnamed_ref)] + self.compare_references(old_refs, new_refs, + __('inconsistent references in translated message.' + + ' original: {0}, translated: {1}')) + old_ref_names = [r['refname'] for r in old_refs] + new_ref_names = [r['refname'] for r in new_refs] + orphans = [*({*old_ref_names} - {*new_ref_names})] + for newr in new_refs: + if not self.document.has_name(newr['refname']): + # Maybe refname is translated but target is not translated. + # Note: multiple translated refnames break link ordering. + if orphans: + newr['refname'] = orphans.pop(0) + else: + # orphan refnames is already empty! + # reference number is same in new_refs and old_refs. + pass + + self.document.note_refname(newr) + + def update_refnamed_footnote_references(self) -> None: + # refnamed footnote should use original 'ids'. + is_refnamed_footnote_ref = NodeMatcher(nodes.footnote_reference, refname=Any) + old_foot_refs: list[nodes.footnote_reference] = [*self.node.findall( + is_refnamed_footnote_ref)] + new_foot_refs: list[nodes.footnote_reference] = [*self.patch.findall( + is_refnamed_footnote_ref)] + refname_ids_map: dict[str, list[str]] = {} + self.compare_references(old_foot_refs, new_foot_refs, + __('inconsistent footnote references in translated message.' + + ' original: {0}, translated: {1}')) + for oldf in old_foot_refs: + refname_ids_map.setdefault(oldf["refname"], []).append(oldf["ids"]) + for newf in new_foot_refs: + refname = newf["refname"] + if refname_ids_map.get(refname): + newf["ids"] = refname_ids_map[refname].pop(0) + + def update_citation_references(self) -> None: + # citation should use original 'ids'. + is_citation_ref = NodeMatcher(nodes.citation_reference, refname=Any) + old_cite_refs: list[nodes.citation_reference] = [*self.node.findall(is_citation_ref)] + new_cite_refs: list[nodes.citation_reference] = [*self.patch.findall(is_citation_ref)] + self.compare_references(old_cite_refs, new_cite_refs, + __('inconsistent citation references in translated message.' + + ' original: {0}, translated: {1}')) + refname_ids_map: dict[str, list[str]] = {} + for oldc in old_cite_refs: + refname_ids_map.setdefault(oldc["refname"], []).append(oldc["ids"]) + for newc in new_cite_refs: + refname = newc["refname"] + if refname_ids_map.get(refname): + newc["ids"] = refname_ids_map[refname].pop() + + def update_pending_xrefs(self) -> None: + # Original pending_xref['reftarget'] contain not-translated + # target name, new pending_xref must use original one. + # This code restricts to change ref-targets in the translation. + old_xrefs = [*self.node.findall(addnodes.pending_xref)] + new_xrefs = [*self.patch.findall(addnodes.pending_xref)] + self.compare_references(old_xrefs, new_xrefs, + __('inconsistent term references in translated message.' + + ' original: {0}, translated: {1}')) + + xref_reftarget_map: dict[tuple[str, str, str] | None, dict[str, Any]] = {} + + def get_ref_key(node: addnodes.pending_xref) -> tuple[str, str, str] | None: + case = node["refdomain"], node["reftype"] + if case == ('std', 'term'): + return None + else: + return ( + node["refdomain"], + node["reftype"], + node['reftarget'], + ) + + for old in old_xrefs: + key = get_ref_key(old) + if key: + xref_reftarget_map[key] = old.attributes + for new in new_xrefs: + key = get_ref_key(new) + # Copy attributes to keep original node behavior. Especially + # copying 'reftarget', 'py:module', 'py:class' are needed. + for k, v in xref_reftarget_map.get(key, {}).items(): + if k not in EXCLUDED_PENDING_XREF_ATTRIBUTES: + new[k] = v + + def update_leaves(self) -> None: + for child in self.patch.children: + child.parent = self.node + self.node.children = self.patch.children + + +class Locale(SphinxTransform): + """ + Replace translatable nodes with their translated doctree. + """ + default_priority = 20 + + def apply(self, **kwargs: Any) -> None: + settings, source = self.document.settings, self.document['source'] + msgstr = '' + + textdomain = docname_to_domain(self.env.docname, self.config.gettext_compact) + + # fetch translations + dirs = [path.join(self.env.srcdir, directory) + for directory in self.config.locale_dirs] + catalog, has_catalog = init_locale(dirs, self.config.language, textdomain) + if not has_catalog: + return + + catalogues = [getattr(catalog, '_catalog', None)] + while (catalog := catalog._fallback) is not None: # type: ignore[attr-defined] + catalogues.append(getattr(catalog, '_catalog', None)) + merged: dict[str, str] = {} + for catalogue in filter(None, reversed(catalogues)): # type: dict[str, str] + merged |= catalogue + + # phase1: replace reference ids with translated names + for node, msg in extract_messages(self.document): + msgstr = merged.get(msg, '') + + # There is no point in having #noqa on literal blocks because + # they cannot contain references. Recognizing it would just + # completely prevent escaping the #noqa. Outside of literal + # blocks, one can always write \#noqa. + if not isinstance(node, LITERAL_TYPE_NODES): + msgstr, _ = parse_noqa(msgstr) + + if msgstr.strip() == '': + # as-of-yet untranslated + node['translated'] = False + continue + if msgstr == msg: + # identical source and translated messages + node['translated'] = True + continue + + # Avoid "Literal block expected; none found." warnings. + # If msgstr ends with '::' then it cause warning message at + # parser.parse() processing. + # literal-block-warning is only appear in avobe case. + if msgstr.strip().endswith('::'): + msgstr += '\n\n dummy literal' + # dummy literal node will discard by 'patch = patch[0]' + + # literalblock need literal block notation to avoid it become + # paragraph. + if isinstance(node, LITERAL_TYPE_NODES): + msgstr = '::\n\n' + indent(msgstr, ' ' * 3) + + patch = publish_msgstr(self.app, msgstr, source, + node.line, self.config, settings) + # FIXME: no warnings about inconsistent references in this part + # XXX doctest and other block markup + if not isinstance(patch, nodes.paragraph): + continue # skip for now + + updater = _NodeUpdater(node, patch, self.document, noqa=False) + processed = updater.update_title_mapping() + + # glossary terms update refid + if isinstance(node, nodes.term): + for _id in node['ids']: + parts = split_term_classifiers(msgstr) + patch = publish_msgstr( + self.app, parts[0] or '', source, node.line, self.config, settings, + ) + updater.patch = make_glossary_term( + self.env, patch, parts[1] or '', source, node.line, _id, self.document, + ) + processed = True + + # update leaves with processed nodes + if processed: + updater.update_leaves() + node['translated'] = True # to avoid double translation + else: + node['translated'] = False + + # phase2: translation + for node, msg in extract_messages(self.document): + if node.setdefault('translated', False): # to avoid double translation + continue # skip if the node is already translated by phase1 + + msgstr = merged.get(msg, '') + noqa = False + + # See above. + if not isinstance(node, LITERAL_TYPE_NODES): + msgstr, noqa = parse_noqa(msgstr) + + if not msgstr or msgstr == msg: # as-of-yet untranslated + node['translated'] = False + continue + + # update translatable nodes + if isinstance(node, addnodes.translatable): + node.apply_translated_message(msg, msgstr) # type: ignore[attr-defined] + continue + + # update meta nodes + if isinstance(node, nodes.meta): # type: ignore[attr-defined] + node['content'] = msgstr + node['translated'] = True + continue + + if isinstance(node, nodes.image) and node.get('alt') == msg: + node['alt'] = msgstr + continue + + # Avoid "Literal block expected; none found." warnings. + # If msgstr ends with '::' then it cause warning message at + # parser.parse() processing. + # literal-block-warning is only appear in avobe case. + if msgstr.strip().endswith('::'): + msgstr += '\n\n dummy literal' + # dummy literal node will discard by 'patch = patch[0]' + + # literalblock need literal block notation to avoid it become + # paragraph. + if isinstance(node, LITERAL_TYPE_NODES): + msgstr = '::\n\n' + indent(msgstr, ' ' * 3) + + # Structural Subelements phase1 + # There is a possibility that only the title node is created. + # see: https://docutils.sourceforge.io/docs/ref/doctree.html#structural-subelements + if isinstance(node, nodes.title): + # This generates: <section ...><title>msgstr</title></section> + msgstr = msgstr + '\n' + '=' * len(msgstr) * 2 + + patch = publish_msgstr(self.app, msgstr, source, + node.line, self.config, settings) + # Structural Subelements phase2 + if isinstance(node, nodes.title): + # get <title> node that placed as a first child + patch = patch.next_node() + + # ignore unexpected markups in translation message + unexpected: tuple[type[nodes.Element], ...] = ( + nodes.paragraph, # expected form of translation + nodes.title, # generated by above "Subelements phase2" + ) + + # following types are expected if + # config.gettext_additional_targets is configured + unexpected += LITERAL_TYPE_NODES + unexpected += IMAGE_TYPE_NODES + + if not isinstance(patch, unexpected): + continue # skip + + updater = _NodeUpdater(node, patch, self.document, noqa) + updater.update_autofootnote_references() + updater.update_refnamed_references() + updater.update_refnamed_footnote_references() + updater.update_citation_references() + updater.update_pending_xrefs() + updater.update_leaves() + + # for highlighting that expects .rawsource and .astext() are same. + if isinstance(node, LITERAL_TYPE_NODES): + node.rawsource = node.astext() + + if isinstance(node, nodes.image) and node.get('alt') != msg: + node['uri'] = patch['uri'] + node['translated'] = False + continue # do not mark translated + + node['translated'] = True # to avoid double translation + + if 'index' in self.config.gettext_additional_targets: + # Extract and translate messages for index entries. + for node, entries in traverse_translatable_index(self.document): + new_entries: list[tuple[str, str, str, str, str | None]] = [] + for entry_type, value, target_id, main, _category_key in entries: + msg_parts = split_index_msg(entry_type, value) + msgstr_parts = [] + for part in msg_parts: + msgstr = merged.get(part, '') + if not msgstr: + msgstr = part + msgstr_parts.append(msgstr) + + new_entry = entry_type, ';'.join(msgstr_parts), target_id, main, None + new_entries.append(new_entry) + + node['raw_entries'] = entries + node['entries'] = new_entries + + +class TranslationProgressTotaliser(SphinxTransform): + """ + Calculate the number of translated and untranslated nodes. + """ + default_priority = 25 # MUST happen after Locale + + def apply(self, **kwargs: Any) -> None: + from sphinx.builders.gettext import MessageCatalogBuilder + if isinstance(self.app.builder, MessageCatalogBuilder): + return + + total = translated = 0 + for node in self.document.findall(NodeMatcher(translated=Any)): # type: nodes.Element + total += 1 + if node['translated']: + translated += 1 + + self.document['translation_progress'] = { + 'total': total, + 'translated': translated, + } + + +class AddTranslationClasses(SphinxTransform): + """ + Add ``translated`` or ``untranslated`` classes to indicate translation status. + """ + default_priority = 950 + + def apply(self, **kwargs: Any) -> None: + from sphinx.builders.gettext import MessageCatalogBuilder + if isinstance(self.app.builder, MessageCatalogBuilder): + return + + if not self.config.translation_progress_classes: + return + + if self.config.translation_progress_classes is True: + add_translated = add_untranslated = True + elif self.config.translation_progress_classes == 'translated': + add_translated = True + add_untranslated = False + elif self.config.translation_progress_classes == 'untranslated': + add_translated = False + add_untranslated = True + else: + msg = ('translation_progress_classes must be ' + 'True, False, "translated" or "untranslated"') + raise ConfigError(msg) + + for node in self.document.findall(NodeMatcher(translated=Any)): # type: nodes.Element + if node['translated']: + if add_translated: + node.setdefault('classes', []).append('translated') + else: + if add_untranslated: + node.setdefault('classes', []).append('untranslated') + + +class RemoveTranslatableInline(SphinxTransform): + """ + Remove inline nodes used for translation as placeholders. + """ + default_priority = 999 + + def apply(self, **kwargs: Any) -> None: + from sphinx.builders.gettext import MessageCatalogBuilder + if isinstance(self.app.builder, MessageCatalogBuilder): + return + + matcher = NodeMatcher(nodes.inline, translatable=Any) + for inline in list(self.document.findall(matcher)): # type: nodes.inline + inline.parent.remove(inline) + inline.parent += inline.children + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_transform(PreserveTranslatableMessages) + app.add_transform(Locale) + app.add_transform(TranslationProgressTotaliser) + app.add_transform(AddTranslationClasses) + app.add_transform(RemoveTranslatableInline) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } |