summaryrefslogtreecommitdiffstats
path: root/sphinx/builders/gettext.py
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/builders/gettext.py')
-rw-r--r--sphinx/builders/gettext.py306
1 files changed, 306 insertions, 0 deletions
diff --git a/sphinx/builders/gettext.py b/sphinx/builders/gettext.py
new file mode 100644
index 0000000..0b2bede
--- /dev/null
+++ b/sphinx/builders/gettext.py
@@ -0,0 +1,306 @@
+"""The MessageCatalogBuilder class."""
+
+from __future__ import annotations
+
+import time
+from codecs import open
+from collections import defaultdict
+from os import getenv, path, walk
+from typing import TYPE_CHECKING, Any
+from uuid import uuid4
+
+from docutils import nodes
+
+from sphinx import addnodes, package_dir
+from sphinx.builders import Builder
+from sphinx.errors import ThemeError
+from sphinx.locale import __
+from sphinx.util import logging
+from sphinx.util.console import bold # type: ignore[attr-defined]
+from sphinx.util.display import status_iterator
+from sphinx.util.i18n import CatalogInfo, docname_to_domain
+from sphinx.util.index_entries import split_index_msg
+from sphinx.util.nodes import extract_messages, traverse_translatable_index
+from sphinx.util.osutil import canon_path, ensuredir, relpath
+from sphinx.util.tags import Tags
+from sphinx.util.template import SphinxRenderer
+
+if TYPE_CHECKING:
+ import os
+ from collections.abc import Generator, Iterable
+
+ from docutils.nodes import Element
+
+ from sphinx.application import Sphinx
+
+logger = logging.getLogger(__name__)
+
+
+class Message:
+ """An entry of translatable message."""
+ def __init__(self, text: str, locations: list[tuple[str, int]], uuids: list[str]):
+ self.text = text
+ self.locations = locations
+ self.uuids = uuids
+
+
+class Catalog:
+ """Catalog of translatable messages."""
+
+ def __init__(self) -> None:
+ self.messages: list[str] = [] # retain insertion order
+
+ # msgid -> file, line, uid
+ self.metadata: dict[str, list[tuple[str, int, str]]] = {}
+
+ def add(self, msg: str, origin: Element | MsgOrigin) -> None:
+ if not hasattr(origin, 'uid'):
+ # Nodes that are replicated like todo don't have a uid,
+ # however i18n is also unnecessary.
+ return
+ if msg not in self.metadata: # faster lookup in hash
+ self.messages.append(msg)
+ self.metadata[msg] = []
+ line = origin.line
+ if line is None:
+ line = -1
+ self.metadata[msg].append((origin.source, line, origin.uid))
+
+ def __iter__(self) -> Generator[Message, None, None]:
+ for message in self.messages:
+ positions = sorted({(source, line) for source, line, uuid
+ in self.metadata[message]})
+ uuids = [uuid for source, line, uuid in self.metadata[message]]
+ yield Message(message, positions, uuids)
+
+
+class MsgOrigin:
+ """
+ Origin holder for Catalog message origin.
+ """
+
+ def __init__(self, source: str, line: int) -> None:
+ self.source = source
+ self.line = line
+ self.uid = uuid4().hex
+
+
+class GettextRenderer(SphinxRenderer):
+ def __init__(
+ self, template_path: list[str | os.PathLike[str]] | None = None,
+ outdir: str | os.PathLike[str] | None = None,
+ ) -> None:
+ self.outdir = outdir
+ if template_path is None:
+ template_path = [path.join(package_dir, 'templates', 'gettext')]
+ super().__init__(template_path)
+
+ def escape(s: str) -> str:
+ s = s.replace('\\', r'\\')
+ s = s.replace('"', r'\"')
+ return s.replace('\n', '\\n"\n"')
+
+ # use texescape as escape filter
+ self.env.filters['e'] = escape
+ self.env.filters['escape'] = escape
+
+ def render(self, filename: str, context: dict[str, Any]) -> str:
+ def _relpath(s: str) -> str:
+ return canon_path(relpath(s, self.outdir))
+
+ context['relpath'] = _relpath
+ return super().render(filename, context)
+
+
+class I18nTags(Tags):
+ """Dummy tags module for I18nBuilder.
+
+ To translate all text inside of only nodes, this class
+ always returns True value even if no tags are defined.
+ """
+ def eval_condition(self, condition: Any) -> bool:
+ return True
+
+
+class I18nBuilder(Builder):
+ """
+ General i18n builder.
+ """
+ name = 'i18n'
+ versioning_method = 'text'
+ use_message_catalog = False
+
+ def init(self) -> None:
+ super().init()
+ self.env.set_versioning_method(self.versioning_method,
+ self.env.config.gettext_uuid)
+ self.tags = I18nTags()
+ self.catalogs: defaultdict[str, Catalog] = defaultdict(Catalog)
+
+ def get_target_uri(self, docname: str, typ: str | None = None) -> str:
+ return ''
+
+ def get_outdated_docs(self) -> set[str]:
+ return self.env.found_docs
+
+ def prepare_writing(self, docnames: set[str]) -> None:
+ return
+
+ def compile_catalogs(self, catalogs: set[CatalogInfo], message: str) -> None:
+ return
+
+ def write_doc(self, docname: str, doctree: nodes.document) -> None:
+ catalog = self.catalogs[docname_to_domain(docname, self.config.gettext_compact)]
+
+ for toctree in self.env.tocs[docname].findall(addnodes.toctree):
+ for node, msg in extract_messages(toctree):
+ node.uid = '' # type: ignore[attr-defined] # Hack UUID model
+ catalog.add(msg, node)
+
+ for node, msg in extract_messages(doctree):
+ # Do not extract messages from within substitution definitions.
+ if not _is_node_in_substitution_definition(node):
+ catalog.add(msg, node)
+
+ if 'index' in self.env.config.gettext_additional_targets:
+ # Extract translatable messages from index entries.
+ for node, entries in traverse_translatable_index(doctree):
+ for entry_type, value, _target_id, _main, _category_key in entries:
+ for m in split_index_msg(entry_type, value):
+ catalog.add(m, node)
+
+
+# If set, use the timestamp from SOURCE_DATE_EPOCH
+# https://reproducible-builds.org/specs/source-date-epoch/
+if (source_date_epoch := getenv('SOURCE_DATE_EPOCH')) is not None:
+ timestamp = time.gmtime(float(source_date_epoch))
+else:
+ # determine timestamp once to remain unaffected by DST changes during build
+ timestamp = time.localtime()
+ctime = time.strftime('%Y-%m-%d %H:%M%z', timestamp)
+
+
+def should_write(filepath: str, new_content: str) -> bool:
+ if not path.exists(filepath):
+ return True
+ try:
+ with open(filepath, encoding='utf-8') as oldpot:
+ old_content = oldpot.read()
+ old_header_index = old_content.index('"POT-Creation-Date:')
+ new_header_index = new_content.index('"POT-Creation-Date:')
+ old_body_index = old_content.index('"PO-Revision-Date:')
+ new_body_index = new_content.index('"PO-Revision-Date:')
+ return ((old_content[:old_header_index] != new_content[:new_header_index]) or
+ (new_content[new_body_index:] != old_content[old_body_index:]))
+ except ValueError:
+ pass
+
+ return True
+
+
+def _is_node_in_substitution_definition(node: nodes.Node) -> bool:
+ """Check "node" to test if it is in a substitution definition."""
+ while node.parent:
+ if isinstance(node, nodes.substitution_definition):
+ return True
+ node = node.parent
+ return False
+
+
+class MessageCatalogBuilder(I18nBuilder):
+ """
+ Builds gettext-style message catalogs (.pot files).
+ """
+ name = 'gettext'
+ epilog = __('The message catalogs are in %(outdir)s.')
+
+ def init(self) -> None:
+ super().init()
+ self.create_template_bridge()
+ self.templates.init(self)
+
+ def _collect_templates(self) -> set[str]:
+ template_files = set()
+ for template_path in self.config.templates_path:
+ tmpl_abs_path = path.join(self.app.srcdir, template_path)
+ for dirpath, _dirs, files in walk(tmpl_abs_path):
+ for fn in files:
+ if fn.endswith('.html'):
+ filename = canon_path(path.join(dirpath, fn))
+ template_files.add(filename)
+ return template_files
+
+ def _extract_from_template(self) -> None:
+ files = list(self._collect_templates())
+ files.sort()
+ logger.info(bold(__('building [%s]: ') % self.name), nonl=True)
+ logger.info(__('targets for %d template files'), len(files))
+
+ extract_translations = self.templates.environment.extract_translations
+
+ for template in status_iterator(files, __('reading templates... '), "purple",
+ len(files), self.app.verbosity):
+ try:
+ with open(template, encoding='utf-8') as f:
+ context = f.read()
+ for line, _meth, msg in extract_translations(context):
+ origin = MsgOrigin(template, line)
+ self.catalogs['sphinx'].add(msg, origin)
+ except Exception as exc:
+ msg = f'{template}: {exc!r}'
+ raise ThemeError(msg) from exc
+
+ def build(
+ self,
+ docnames: Iterable[str] | None,
+ summary: str | None = None,
+ method: str = 'update',
+ ) -> None:
+ self._extract_from_template()
+ super().build(docnames, summary, method)
+
+ def finish(self) -> None:
+ super().finish()
+ context = {
+ 'version': self.config.version,
+ 'copyright': self.config.copyright,
+ 'project': self.config.project,
+ 'last_translator': self.config.gettext_last_translator,
+ 'language_team': self.config.gettext_language_team,
+ 'ctime': ctime,
+ 'display_location': self.config.gettext_location,
+ 'display_uuid': self.config.gettext_uuid,
+ }
+ for textdomain, catalog in status_iterator(self.catalogs.items(),
+ __("writing message catalogs... "),
+ "darkgreen", len(self.catalogs),
+ self.app.verbosity,
+ lambda textdomain__: textdomain__[0]):
+ # noop if config.gettext_compact is set
+ ensuredir(path.join(self.outdir, path.dirname(textdomain)))
+
+ context['messages'] = list(catalog)
+ content = GettextRenderer(outdir=self.outdir).render('message.pot_t', context)
+
+ pofn = path.join(self.outdir, textdomain + '.pot')
+ if should_write(pofn, content):
+ with open(pofn, 'w', encoding='utf-8') as pofile:
+ pofile.write(content)
+
+
+def setup(app: Sphinx) -> dict[str, Any]:
+ app.add_builder(MessageCatalogBuilder)
+
+ app.add_config_value('gettext_compact', True, 'gettext', {bool, str})
+ app.add_config_value('gettext_location', True, 'gettext')
+ app.add_config_value('gettext_uuid', False, 'gettext')
+ app.add_config_value('gettext_auto_build', True, 'env')
+ app.add_config_value('gettext_additional_targets', [], 'env')
+ app.add_config_value('gettext_last_translator', 'FULL NAME <EMAIL@ADDRESS>', 'gettext')
+ app.add_config_value('gettext_language_team', 'LANGUAGE <LL@li.org>', 'gettext')
+
+ return {
+ 'version': 'builtin',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }