From cf7da1843c45a4c2df7a749f7886a2d2ba0ee92a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 19:25:40 +0200 Subject: Adding upstream version 7.2.6. Signed-off-by: Daniel Baumann --- sphinx/builders/__init__.py | 667 +++++++++++++++++ sphinx/builders/_epub_base.py | 710 ++++++++++++++++++ sphinx/builders/changes.py | 161 ++++ sphinx/builders/dirhtml.py | 53 ++ sphinx/builders/dummy.py | 48 ++ sphinx/builders/epub3.py | 301 ++++++++ sphinx/builders/gettext.py | 306 ++++++++ sphinx/builders/html/__init__.py | 1399 +++++++++++++++++++++++++++++++++++ sphinx/builders/html/_assets.py | 146 ++++ sphinx/builders/html/transforms.py | 86 +++ sphinx/builders/latex/__init__.py | 551 ++++++++++++++ sphinx/builders/latex/constants.py | 210 ++++++ sphinx/builders/latex/nodes.py | 37 + sphinx/builders/latex/theming.py | 135 ++++ sphinx/builders/latex/transforms.py | 642 ++++++++++++++++ sphinx/builders/latex/util.py | 48 ++ sphinx/builders/linkcheck.py | 641 ++++++++++++++++ sphinx/builders/manpage.py | 127 ++++ sphinx/builders/singlehtml.py | 202 +++++ sphinx/builders/texinfo.py | 229 ++++++ sphinx/builders/text.py | 94 +++ sphinx/builders/xml.py | 123 +++ 22 files changed, 6916 insertions(+) create mode 100644 sphinx/builders/__init__.py create mode 100644 sphinx/builders/_epub_base.py create mode 100644 sphinx/builders/changes.py create mode 100644 sphinx/builders/dirhtml.py create mode 100644 sphinx/builders/dummy.py create mode 100644 sphinx/builders/epub3.py create mode 100644 sphinx/builders/gettext.py create mode 100644 sphinx/builders/html/__init__.py create mode 100644 sphinx/builders/html/_assets.py create mode 100644 sphinx/builders/html/transforms.py create mode 100644 sphinx/builders/latex/__init__.py create mode 100644 sphinx/builders/latex/constants.py create mode 100644 sphinx/builders/latex/nodes.py create mode 100644 sphinx/builders/latex/theming.py create mode 100644 sphinx/builders/latex/transforms.py create mode 100644 sphinx/builders/latex/util.py create mode 100644 sphinx/builders/linkcheck.py create mode 100644 sphinx/builders/manpage.py create mode 100644 sphinx/builders/singlehtml.py create mode 100644 sphinx/builders/texinfo.py create mode 100644 sphinx/builders/text.py create mode 100644 sphinx/builders/xml.py (limited to 'sphinx/builders') diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py new file mode 100644 index 0000000..805ee13 --- /dev/null +++ b/sphinx/builders/__init__.py @@ -0,0 +1,667 @@ +"""Builder superclass for all builders.""" + +from __future__ import annotations + +import codecs +import pickle +import time +from os import path +from typing import TYPE_CHECKING, Any + +from docutils import nodes +from docutils.utils import DependencyList + +from sphinx.environment import CONFIG_CHANGED_REASON, CONFIG_OK, BuildEnvironment +from sphinx.environment.adapters.asset import ImageAdapter +from sphinx.errors import SphinxError +from sphinx.locale import __ +from sphinx.util import UnicodeDecodeErrorHandler, get_filetype, import_object, logging, rst +from sphinx.util.build_phase import BuildPhase +from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.display import progress_message, status_iterator +from sphinx.util.docutils import sphinx_domains +from sphinx.util.i18n import CatalogInfo, CatalogRepository, docname_to_domain +from sphinx.util.osutil import SEP, ensuredir, relative_uri, relpath +from sphinx.util.parallel import ParallelTasks, SerialTasks, make_chunks, parallel_available + +# side effect: registers roles and directives +from sphinx import directives # noqa: F401 isort:skip +from sphinx import roles # noqa: F401 isort:skip + +if TYPE_CHECKING: + from collections.abc import Iterable, Sequence + + from docutils.nodes import Node + + from sphinx.application import Sphinx + from sphinx.config import Config + from sphinx.events import EventManager + from sphinx.util.tags import Tags + from sphinx.util.typing import NoneType + + +logger = logging.getLogger(__name__) + + +class Builder: + """ + Builds target formats from the reST sources. + """ + + #: The builder's name, for the -b command line option. + name = '' + #: The builder's output format, or '' if no document output is produced. + format = '' + #: The message emitted upon successful build completion. This can be a + #: printf-style template string with the following keys: ``outdir``, + #: ``project`` + epilog = '' + + #: default translator class for the builder. This can be overridden by + #: :py:meth:`~sphinx.application.Sphinx.set_translator`. + default_translator_class: type[nodes.NodeVisitor] + # doctree versioning method + versioning_method = 'none' + versioning_compare = False + #: allow parallel write_doc() calls + allow_parallel = False + # support translation + use_message_catalog = True + + #: The list of MIME types of image formats supported by the builder. + #: Image files are searched in the order in which they appear here. + supported_image_types: list[str] = [] + #: The builder supports remote images or not. + supported_remote_images = False + #: The builder supports data URIs or not. + supported_data_uri_images = False + + def __init__(self, app: Sphinx, env: BuildEnvironment) -> None: + self.srcdir = app.srcdir + self.confdir = app.confdir + self.outdir = app.outdir + self.doctreedir = app.doctreedir + ensuredir(self.doctreedir) + + self.app: Sphinx = app + self.env: BuildEnvironment = env + self.env.set_versioning_method(self.versioning_method, + self.versioning_compare) + self.events: EventManager = app.events + self.config: Config = app.config + self.tags: Tags = app.tags + self.tags.add(self.format) + self.tags.add(self.name) + self.tags.add("format_%s" % self.format) + self.tags.add("builder_%s" % self.name) + + # images that need to be copied over (source -> dest) + self.images: dict[str, str] = {} + # basename of images directory + self.imagedir = "" + # relative path to image directory from current docname (used at writing docs) + self.imgpath = "" + + # these get set later + self.parallel_ok = False + self.finish_tasks: Any = None + + def get_translator_class(self, *args: Any) -> type[nodes.NodeVisitor]: + """Return a class of translator.""" + return self.app.registry.get_translator_class(self) + + def create_translator(self, *args: Any) -> nodes.NodeVisitor: + """Return an instance of translator. + + This method returns an instance of ``default_translator_class`` by default. + Users can replace the translator class with ``app.set_translator()`` API. + """ + return self.app.registry.create_translator(self, *args) + + # helper methods + def init(self) -> None: + """Load necessary templates and perform initialization. The default + implementation does nothing. + """ + pass + + def create_template_bridge(self) -> None: + """Return the template bridge configured.""" + if self.config.template_bridge: + self.templates = import_object(self.config.template_bridge, + 'template_bridge setting')() + else: + from sphinx.jinja2glue import BuiltinTemplateLoader + self.templates = BuiltinTemplateLoader() + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + """Return the target URI for a document name. + + *typ* can be used to qualify the link characteristic for individual + builders. + """ + raise NotImplementedError + + def get_relative_uri(self, from_: str, to: str, typ: str | None = None) -> str: + """Return a relative URI between two source filenames. + + May raise environment.NoUri if there's no way to return a sensible URI. + """ + return relative_uri(self.get_target_uri(from_), + self.get_target_uri(to, typ)) + + def get_outdated_docs(self) -> str | Iterable[str]: + """Return an iterable of output files that are outdated, or a string + describing what an update build will build. + + If the builder does not output individual files corresponding to + source files, return a string here. If it does, return an iterable + of those files that need to be written. + """ + raise NotImplementedError + + def get_asset_paths(self) -> list[str]: + """Return list of paths for assets (ex. templates, CSS, etc.).""" + return [] + + def post_process_images(self, doctree: Node) -> None: + """Pick the best candidate for all image URIs.""" + images = ImageAdapter(self.env) + for node in doctree.findall(nodes.image): + if '?' in node['candidates']: + # don't rewrite nonlocal image URIs + continue + if '*' not in node['candidates']: + for imgtype in self.supported_image_types: + candidate = node['candidates'].get(imgtype, None) + if candidate: + break + else: + mimetypes = sorted(node['candidates']) + image_uri = images.get_original_image_uri(node['uri']) + if mimetypes: + logger.warning(__('a suitable image for %s builder not found: ' + '%s (%s)'), + self.name, mimetypes, image_uri, location=node) + else: + logger.warning(__('a suitable image for %s builder not found: %s'), + self.name, image_uri, location=node) + continue + node['uri'] = candidate + else: + candidate = node['uri'] + if candidate not in self.env.images: + # non-existing URI; let it alone + continue + self.images[candidate] = self.env.images[candidate][1] + + # compile po methods + + def compile_catalogs(self, catalogs: set[CatalogInfo], message: str) -> None: + if not self.config.gettext_auto_build: + return + + def cat2relpath(cat: CatalogInfo) -> str: + return relpath(cat.mo_path, self.env.srcdir).replace(path.sep, SEP) + + logger.info(bold(__('building [mo]: ')) + message) + for catalog in status_iterator(catalogs, __('writing output... '), "darkgreen", + len(catalogs), self.app.verbosity, + stringify_func=cat2relpath): + catalog.write_mo(self.config.language, + self.config.gettext_allow_fuzzy_translations) + + def compile_all_catalogs(self) -> None: + repo = CatalogRepository(self.srcdir, self.config.locale_dirs, + self.config.language, self.config.source_encoding) + message = __('all of %d po files') % len(list(repo.catalogs)) + self.compile_catalogs(set(repo.catalogs), message) + + def compile_specific_catalogs(self, specified_files: list[str]) -> None: + def to_domain(fpath: str) -> str | None: + docname = self.env.path2doc(path.abspath(fpath)) + if docname: + return docname_to_domain(docname, self.config.gettext_compact) + else: + return None + + catalogs = set() + domains = set(map(to_domain, specified_files)) + repo = CatalogRepository(self.srcdir, self.config.locale_dirs, + self.config.language, self.config.source_encoding) + for catalog in repo.catalogs: + if catalog.domain in domains and catalog.is_outdated(): + catalogs.add(catalog) + message = __('targets for %d po files that are specified') % len(catalogs) + self.compile_catalogs(catalogs, message) + + # TODO(stephenfin): This would make more sense as 'compile_outdated_catalogs' + def compile_update_catalogs(self) -> None: + repo = CatalogRepository(self.srcdir, self.config.locale_dirs, + self.config.language, self.config.source_encoding) + catalogs = {c for c in repo.catalogs if c.is_outdated()} + message = __('targets for %d po files that are out of date') % len(catalogs) + self.compile_catalogs(catalogs, message) + + # build methods + + def build_all(self) -> None: + """Build all source files.""" + self.compile_all_catalogs() + + self.build(None, summary=__('all source files'), method='all') + + def build_specific(self, filenames: list[str]) -> None: + """Only rebuild as much as needed for changes in the *filenames*.""" + docnames: list[str] = [] + + for filename in filenames: + filename = path.normpath(path.abspath(filename)) + + if not path.isfile(filename): + logger.warning(__('file %r given on command line does not exist, '), + filename) + continue + + if not filename.startswith(str(self.srcdir)): + logger.warning(__('file %r given on command line is not under the ' + 'source directory, ignoring'), filename) + continue + + docname = self.env.path2doc(filename) + if not docname: + logger.warning(__('file %r given on command line is not a valid ' + 'document, ignoring'), filename) + continue + + docnames.append(docname) + + self.compile_specific_catalogs(filenames) + + self.build(docnames, method='specific', + summary=__('%d source files given on command line') % len(docnames)) + + def build_update(self) -> None: + """Only rebuild what was changed or added since last build.""" + self.compile_update_catalogs() + + to_build = self.get_outdated_docs() + if isinstance(to_build, str): + self.build(['__all__'], to_build) + else: + to_build = list(to_build) + self.build(to_build, + summary=__('targets for %d source files that are out of date') % + len(to_build)) + + def build( + self, + docnames: Iterable[str] | None, + summary: str | None = None, + method: str = 'update', + ) -> None: + """Main build method. + + First updates the environment, and then calls + :meth:`!write`. + """ + if summary: + logger.info(bold(__('building [%s]: ') % self.name) + summary) + + # while reading, collect all warnings from docutils + with logging.pending_warnings(): + updated_docnames = set(self.read()) + + doccount = len(updated_docnames) + logger.info(bold(__('looking for now-outdated files... ')), nonl=True) + for docname in self.env.check_dependents(self.app, updated_docnames): + updated_docnames.add(docname) + outdated = len(updated_docnames) - doccount + if outdated: + logger.info(__('%d found'), outdated) + else: + logger.info(__('none found')) + + if updated_docnames: + # save the environment + from sphinx.application import ENV_PICKLE_FILENAME + with progress_message(__('pickling environment')), \ + open(path.join(self.doctreedir, ENV_PICKLE_FILENAME), 'wb') as f: + pickle.dump(self.env, f, pickle.HIGHEST_PROTOCOL) + + # global actions + self.app.phase = BuildPhase.CONSISTENCY_CHECK + with progress_message(__('checking consistency')): + self.env.check_consistency() + else: + if method == 'update' and not docnames: + logger.info(bold(__('no targets are out of date.'))) + return + + self.app.phase = BuildPhase.RESOLVING + + # filter "docnames" (list of outdated files) by the updated + # found_docs of the environment; this will remove docs that + # have since been removed + if docnames and docnames != ['__all__']: + docnames = set(docnames) & self.env.found_docs + + # determine if we can write in parallel + if parallel_available and self.app.parallel > 1 and self.allow_parallel: + self.parallel_ok = self.app.is_parallel_allowed('write') + else: + self.parallel_ok = False + + # create a task executor to use for misc. "finish-up" tasks + # if self.parallel_ok: + # self.finish_tasks = ParallelTasks(self.app.parallel) + # else: + # for now, just execute them serially + self.finish_tasks = SerialTasks() + + # write all "normal" documents (or everything for some builders) + self.write(docnames, list(updated_docnames), method) + + # finish (write static files etc.) + self.finish() + + # wait for all tasks + self.finish_tasks.join() + + def read(self) -> list[str]: + """(Re-)read all files new or changed since last update. + + Store all environment docnames in the canonical format (ie using SEP as + a separator in place of os.path.sep). + """ + logger.info(bold(__('updating environment: ')), nonl=True) + + self.env.find_files(self.config, self) + updated = (self.env.config_status != CONFIG_OK) + added, changed, removed = self.env.get_outdated_files(updated) + + # allow user intervention as well + for docs in self.events.emit('env-get-outdated', self.env, added, changed, removed): + changed.update(set(docs) & self.env.found_docs) + + # if files were added or removed, all documents with globbed toctrees + # must be reread + if added or removed: + # ... but not those that already were removed + changed.update(self.env.glob_toctrees & self.env.found_docs) + + if updated: # explain the change iff build config status was not ok + reason = (CONFIG_CHANGED_REASON.get(self.env.config_status, '') + + (self.env.config_status_extra or '')) + logger.info('[%s] ', reason, nonl=True) + + logger.info(__('%s added, %s changed, %s removed'), + len(added), len(changed), len(removed)) + + # clear all files no longer present + for docname in removed: + self.events.emit('env-purge-doc', self.env, docname) + self.env.clear_doc(docname) + + # read all new and changed files + docnames = sorted(added | changed) + # allow changing and reordering the list of docs to read + self.events.emit('env-before-read-docs', self.env, docnames) + + # check if we should do parallel or serial read + if parallel_available and len(docnames) > 5 and self.app.parallel > 1: + par_ok = self.app.is_parallel_allowed('read') + else: + par_ok = False + + if par_ok: + self._read_parallel(docnames, nproc=self.app.parallel) + else: + self._read_serial(docnames) + + if self.config.root_doc not in self.env.all_docs: + raise SphinxError('root file %s not found' % + self.env.doc2path(self.config.root_doc)) + + for retval in self.events.emit('env-updated', self.env): + if retval is not None: + docnames.extend(retval) + + # workaround: marked as okay to call builder.read() twice in same process + self.env.config_status = CONFIG_OK + + return sorted(docnames) + + def _read_serial(self, docnames: list[str]) -> None: + for docname in status_iterator(docnames, __('reading sources... '), "purple", + len(docnames), self.app.verbosity): + # remove all inventory entries for that file + self.events.emit('env-purge-doc', self.env, docname) + self.env.clear_doc(docname) + self.read_doc(docname) + + def _read_parallel(self, docnames: list[str], nproc: int) -> None: + chunks = make_chunks(docnames, nproc) + + # create a status_iterator to step progressbar after reading a document + # (see: ``merge()`` function) + progress = status_iterator(chunks, __('reading sources... '), "purple", + len(chunks), self.app.verbosity) + + # clear all outdated docs at once + for docname in docnames: + self.events.emit('env-purge-doc', self.env, docname) + self.env.clear_doc(docname) + + def read_process(docs: list[str]) -> bytes: + self.env.app = self.app + for docname in docs: + self.read_doc(docname, _cache=False) + # allow pickling self to send it back + return pickle.dumps(self.env, pickle.HIGHEST_PROTOCOL) + + def merge(docs: list[str], otherenv: bytes) -> None: + env = pickle.loads(otherenv) + self.env.merge_info_from(docs, env, self.app) + + next(progress) + + tasks = ParallelTasks(nproc) + for chunk in chunks: + tasks.add_task(read_process, chunk, merge) + + # make sure all threads have finished + tasks.join() + logger.info('') + + def read_doc(self, docname: str, *, _cache: bool = True) -> None: + """Parse a file and add/update inventory entries for the doctree.""" + self.env.prepare_settings(docname) + + # Add confdir/docutils.conf to dependencies list if exists + docutilsconf = path.join(self.confdir, 'docutils.conf') + if path.isfile(docutilsconf): + self.env.note_dependency(docutilsconf) + + filename = self.env.doc2path(docname) + filetype = get_filetype(self.app.config.source_suffix, filename) + publisher = self.app.registry.get_publisher(self.app, filetype) + # record_dependencies is mutable even though it is in settings, + # explicitly re-initialise for each document + publisher.settings.record_dependencies = DependencyList() + with sphinx_domains(self.env), rst.default_role(docname, self.config.default_role): + # set up error_handler for the target document + codecs.register_error('sphinx', + UnicodeDecodeErrorHandler(docname)) # type: ignore[arg-type] + + publisher.set_source(source_path=filename) + publisher.publish() + doctree = publisher.document + + # store time of reading, for outdated files detection + self.env.all_docs[docname] = time.time_ns() // 1_000 + + # cleanup + self.env.temp_data.clear() + self.env.ref_context.clear() + + self.write_doctree(docname, doctree, _cache=_cache) + + def write_doctree( + self, docname: str, doctree: nodes.document, *, _cache: bool = True, + ) -> None: + """Write the doctree to a file.""" + # make it picklable + doctree.reporter = None # type: ignore[assignment] + doctree.transformer = None # type: ignore[assignment] + + # Create a copy of settings object before modification because it is + # shared with other documents. + doctree.settings = doctree.settings.copy() + doctree.settings.warning_stream = None + doctree.settings.env = None + doctree.settings.record_dependencies = None # type: ignore[assignment] + + doctree_filename = path.join(self.doctreedir, docname + '.doctree') + ensuredir(path.dirname(doctree_filename)) + with open(doctree_filename, 'wb') as f: + pickle.dump(doctree, f, pickle.HIGHEST_PROTOCOL) + + # When Sphinx is running in parallel mode, ``write_doctree()`` is invoked + # in the context of a process worker, and thus it does not make sense to + # pickle the doctree and send it to the main process + if _cache: + self.env._write_doc_doctree_cache[docname] = doctree + + def write( + self, + build_docnames: Iterable[str] | None, + updated_docnames: Sequence[str], + method: str = 'update', + ) -> None: + if build_docnames is None or build_docnames == ['__all__']: + # build_all + build_docnames = self.env.found_docs + if method == 'update': + # build updated ones as well + docnames = set(build_docnames) | set(updated_docnames) + else: + docnames = set(build_docnames) + logger.debug(__('docnames to write: %s'), ', '.join(sorted(docnames))) + + # add all toctree-containing files that may have changed + for docname in list(docnames): + for tocdocname in self.env.files_to_rebuild.get(docname, set()): + if tocdocname in self.env.found_docs: + docnames.add(tocdocname) + docnames.add(self.config.root_doc) + + with progress_message(__('preparing documents')): + self.prepare_writing(docnames) + + with progress_message(__('copying assets')): + self.copy_assets() + + if self.parallel_ok: + # number of subprocesses is parallel-1 because the main process + # is busy loading doctrees and doing write_doc_serialized() + self._write_parallel(sorted(docnames), + nproc=self.app.parallel - 1) + else: + self._write_serial(sorted(docnames)) + + def _write_serial(self, docnames: Sequence[str]) -> None: + with logging.pending_warnings(): + for docname in status_iterator(docnames, __('writing output... '), "darkgreen", + len(docnames), self.app.verbosity): + self.app.phase = BuildPhase.RESOLVING + doctree = self.env.get_and_resolve_doctree(docname, self) + self.app.phase = BuildPhase.WRITING + self.write_doc_serialized(docname, doctree) + self.write_doc(docname, doctree) + + def _write_parallel(self, docnames: Sequence[str], nproc: int) -> None: + def write_process(docs: list[tuple[str, nodes.document]]) -> None: + self.app.phase = BuildPhase.WRITING + for docname, doctree in docs: + self.write_doc(docname, doctree) + + # warm up caches/compile templates using the first document + firstname, docnames = docnames[0], docnames[1:] + self.app.phase = BuildPhase.RESOLVING + doctree = self.env.get_and_resolve_doctree(firstname, self) + self.app.phase = BuildPhase.WRITING + self.write_doc_serialized(firstname, doctree) + self.write_doc(firstname, doctree) + + tasks = ParallelTasks(nproc) + chunks = make_chunks(docnames, nproc) + + # create a status_iterator to step progressbar after writing a document + # (see: ``on_chunk_done()`` function) + progress = status_iterator(chunks, __('writing output... '), "darkgreen", + len(chunks), self.app.verbosity) + + def on_chunk_done(args: list[tuple[str, NoneType]], result: NoneType) -> None: + next(progress) + + self.app.phase = BuildPhase.RESOLVING + for chunk in chunks: + arg = [] + for docname in chunk: + doctree = self.env.get_and_resolve_doctree(docname, self) + self.write_doc_serialized(docname, doctree) + arg.append((docname, doctree)) + tasks.add_task(write_process, arg, on_chunk_done) + + # make sure all threads have finished + tasks.join() + logger.info('') + + def prepare_writing(self, docnames: set[str]) -> None: + """A place where you can add logic before :meth:`write_doc` is run""" + raise NotImplementedError + + def copy_assets(self) -> None: + """Where assets (images, static files, etc) are copied before writing""" + pass + + def write_doc(self, docname: str, doctree: nodes.document) -> None: + """Where you actually write something to the filesystem.""" + raise NotImplementedError + + def write_doc_serialized(self, docname: str, doctree: nodes.document) -> None: + """Handle parts of write_doc that must be called in the main process + if parallel build is active. + """ + pass + + def finish(self) -> None: + """Finish the building process. + + The default implementation does nothing. + """ + pass + + def cleanup(self) -> None: + """Cleanup any resources. + + The default implementation does nothing. + """ + pass + + def get_builder_config(self, option: str, default: str) -> Any: + """Return a builder specific option. + + This method allows customization of common builder settings by + inserting the name of the current builder in the option key. + If the key does not exist, use default as builder name. + """ + # At the moment, only XXX_use_index is looked up this way. + # Every new builder variant must be registered in Config.config_values. + try: + optname = f'{self.name}_{option}' + return getattr(self.config, optname) + except AttributeError: + optname = f'{default}_{option}' + return getattr(self.config, optname) diff --git a/sphinx/builders/_epub_base.py b/sphinx/builders/_epub_base.py new file mode 100644 index 0000000..f0db49b --- /dev/null +++ b/sphinx/builders/_epub_base.py @@ -0,0 +1,710 @@ +"""Base class of epub2/epub3 builders.""" + +from __future__ import annotations + +import html +import os +import re +import time +from os import path +from typing import TYPE_CHECKING, Any, NamedTuple +from urllib.parse import quote +from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile + +from docutils import nodes +from docutils.utils import smartquotes + +from sphinx import addnodes +from sphinx.builders.html import BuildInfo, StandaloneHTMLBuilder +from sphinx.locale import __ +from sphinx.util import logging +from sphinx.util.display import status_iterator +from sphinx.util.fileutil import copy_asset_file +from sphinx.util.osutil import copyfile, ensuredir, relpath + +if TYPE_CHECKING: + from docutils.nodes import Element, Node + +try: + from PIL import Image +except ImportError: + Image = None + + +logger = logging.getLogger(__name__) + + +# (Fragment) templates from which the metainfo files content.opf and +# toc.ncx are created. +# This template section also defines strings that are embedded in the html +# output but that may be customized by (re-)setting module attributes, +# e.g. from conf.py. + +COVERPAGE_NAME = 'epub-cover.xhtml' + +TOCTREE_TEMPLATE = 'toctree-l%d' + +LINK_TARGET_TEMPLATE = ' [%(uri)s]' + +FOOTNOTE_LABEL_TEMPLATE = '#%d' + +FOOTNOTES_RUBRIC_NAME = 'Footnotes' + +CSS_LINK_TARGET_CLASS = 'link-target' + +# XXX These strings should be localized according to epub_language +GUIDE_TITLES = { + 'toc': 'Table of Contents', + 'cover': 'Cover', +} + +MEDIA_TYPES = { + '.xhtml': 'application/xhtml+xml', + '.css': 'text/css', + '.png': 'image/png', + '.webp': 'image/webp', + '.gif': 'image/gif', + '.svg': 'image/svg+xml', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.otf': 'font/otf', + '.ttf': 'font/ttf', + '.woff': 'font/woff', +} + +VECTOR_GRAPHICS_EXTENSIONS = ('.svg',) + +# Regular expression to match colons only in local fragment identifiers. +# If the URI contains a colon before the #, +# it is an external link that should not change. +REFURI_RE = re.compile("([^#:]*#)(.*)") + + +class ManifestItem(NamedTuple): + href: str + id: str + media_type: str + + +class Spine(NamedTuple): + idref: str + linear: bool + + +class Guide(NamedTuple): + type: str + title: str + uri: str + + +class NavPoint(NamedTuple): + navpoint: str + playorder: int + text: str + refuri: str + children: list[NavPoint] + + +def sphinx_smarty_pants(t: str, language: str = 'en') -> str: + t = t.replace('"', '"') + t = smartquotes.educateDashesOldSchool(t) + t = smartquotes.educateQuotes(t, language) + t = t.replace('"', '"') + return t + + +ssp = sphinx_smarty_pants + + +# The epub publisher + +class EpubBuilder(StandaloneHTMLBuilder): + """ + Builder that outputs epub files. + + It creates the metainfo files container.opf, toc.ncx, mimetype, and + META-INF/container.xml. Afterwards, all necessary files are zipped to an + epub file. + """ + + # don't copy the reST source + copysource = False + supported_image_types = ['image/svg+xml', 'image/png', 'image/gif', + 'image/jpeg'] + supported_remote_images = False + + # don't add links + add_permalinks = False + # don't use # as current path. ePub check reject it. + allow_sharp_as_current_path = False + # don't add sidebar etc. + embedded = True + # disable download role + download_support = False + # don't create links to original images from images + html_scaled_image_link = False + # don't generate search index or include search page + search = False + + coverpage_name = COVERPAGE_NAME + toctree_template = TOCTREE_TEMPLATE + link_target_template = LINK_TARGET_TEMPLATE + css_link_target_class = CSS_LINK_TARGET_CLASS + guide_titles = GUIDE_TITLES + media_types = MEDIA_TYPES + refuri_re = REFURI_RE + template_dir = "" + doctype = "" + + def init(self) -> None: + super().init() + # the output files for epub must be .html only + self.out_suffix = '.xhtml' + self.link_suffix = '.xhtml' + self.playorder = 0 + self.tocid = 0 + self.id_cache: dict[str, str] = {} + self.use_index = self.get_builder_config('use_index', 'epub') + self.refnodes: list[dict[str, Any]] = [] + + def create_build_info(self) -> BuildInfo: + return BuildInfo(self.config, self.tags, ['html', 'epub']) + + def get_theme_config(self) -> tuple[str, dict]: + return self.config.epub_theme, self.config.epub_theme_options + + # generic support functions + def make_id(self, name: str) -> str: + # id_cache is intentionally mutable + """Return a unique id for name.""" + id = self.id_cache.get(name) + if not id: + id = 'epub-%d' % self.env.new_serialno('epub') + self.id_cache[name] = id + return id + + def get_refnodes( + self, doctree: Node, result: list[dict[str, Any]], + ) -> list[dict[str, Any]]: + """Collect section titles, their depth in the toc and the refuri.""" + # XXX: is there a better way than checking the attribute + # toctree-l[1-8] on the parent node? + if isinstance(doctree, nodes.reference) and doctree.get('refuri'): + refuri = doctree['refuri'] + if refuri.startswith(('http://', 'https://', 'irc:', 'mailto:')): + return result + classes = doctree.parent.attributes['classes'] + for level in range(8, 0, -1): # or range(1, 8)? + if (self.toctree_template % level) in classes: + result.append({ + 'level': level, + 'refuri': html.escape(refuri), + 'text': ssp(html.escape(doctree.astext())), + }) + break + elif isinstance(doctree, nodes.Element): + for elem in doctree: + result = self.get_refnodes(elem, result) + return result + + def check_refnodes(self, nodes: list[dict[str, Any]]) -> None: + appeared: set[str] = set() + for node in nodes: + if node['refuri'] in appeared: + logger.warning( + __('duplicated ToC entry found: %s'), + node['refuri'], + type="epub", + subtype="duplicated_toc_entry", + ) + else: + appeared.add(node['refuri']) + + def get_toc(self) -> None: + """Get the total table of contents, containing the root_doc + and pre and post files not managed by sphinx. + """ + doctree = self.env.get_and_resolve_doctree(self.config.root_doc, + self, prune_toctrees=False, + includehidden=True) + self.refnodes = self.get_refnodes(doctree, []) + master_dir = path.dirname(self.config.root_doc) + if master_dir: + master_dir += '/' # XXX or os.sep? + for item in self.refnodes: + item['refuri'] = master_dir + item['refuri'] + self.toc_add_files(self.refnodes) + + def toc_add_files(self, refnodes: list[dict[str, Any]]) -> None: + """Add the root_doc, pre and post files to a list of refnodes. + """ + refnodes.insert(0, { + 'level': 1, + 'refuri': html.escape(self.config.root_doc + self.out_suffix), + 'text': ssp(html.escape( + self.env.titles[self.config.root_doc].astext())), + }) + for file, text in reversed(self.config.epub_pre_files): + refnodes.insert(0, { + 'level': 1, + 'refuri': html.escape(file), + 'text': ssp(html.escape(text)), + }) + for file, text in self.config.epub_post_files: + refnodes.append({ + 'level': 1, + 'refuri': html.escape(file), + 'text': ssp(html.escape(text)), + }) + + def fix_fragment(self, prefix: str, fragment: str) -> str: + """Return a href/id attribute with colons replaced by hyphens.""" + return prefix + fragment.replace(':', '-') + + def fix_ids(self, tree: nodes.document) -> None: + """Replace colons with hyphens in href and id attributes. + + Some readers crash because they interpret the part as a + transport protocol specification. + """ + def update_node_id(node: Element) -> None: + """Update IDs of given *node*.""" + new_ids: list[str] = [] + for node_id in node['ids']: + new_id = self.fix_fragment('', node_id) + if new_id not in new_ids: + new_ids.append(new_id) + node['ids'] = new_ids + + for reference in tree.findall(nodes.reference): + if 'refuri' in reference: + m = self.refuri_re.match(reference['refuri']) + if m: + reference['refuri'] = self.fix_fragment(m.group(1), m.group(2)) + if 'refid' in reference: + reference['refid'] = self.fix_fragment('', reference['refid']) + + for target in tree.findall(nodes.target): + update_node_id(target) + + next_node: Node = target.next_node(ascend=True) + if isinstance(next_node, nodes.Element): + update_node_id(next_node) + + for desc_signature in tree.findall(addnodes.desc_signature): + update_node_id(desc_signature) + + def add_visible_links(self, tree: nodes.document, show_urls: str = 'inline') -> None: + """Add visible link targets for external links""" + + def make_footnote_ref(doc: nodes.document, label: str) -> nodes.footnote_reference: + """Create a footnote_reference node with children""" + footnote_ref = nodes.footnote_reference('[#]_') + footnote_ref.append(nodes.Text(label)) + doc.note_autofootnote_ref(footnote_ref) + return footnote_ref + + def make_footnote(doc: nodes.document, label: str, uri: str) -> nodes.footnote: + """Create a footnote node with children""" + footnote = nodes.footnote(uri) + para = nodes.paragraph() + para.append(nodes.Text(uri)) + footnote.append(para) + footnote.insert(0, nodes.label('', label)) + doc.note_autofootnote(footnote) + return footnote + + def footnote_spot(tree: nodes.document) -> tuple[Element, int]: + """Find or create a spot to place footnotes. + + The function returns the tuple (parent, index).""" + # The code uses the following heuristic: + # a) place them after the last existing footnote + # b) place them after an (empty) Footnotes rubric + # c) create an empty Footnotes rubric at the end of the document + fns = list(tree.findall(nodes.footnote)) + if fns: + fn = fns[-1] + return fn.parent, fn.parent.index(fn) + 1 + for node in tree.findall(nodes.rubric): + if len(node) == 1 and node.astext() == FOOTNOTES_RUBRIC_NAME: + return node.parent, node.parent.index(node) + 1 + doc = next(tree.findall(nodes.document)) + rub = nodes.rubric() + rub.append(nodes.Text(FOOTNOTES_RUBRIC_NAME)) + doc.append(rub) + return doc, doc.index(rub) + 1 + + if show_urls == 'no': + return + if show_urls == 'footnote': + doc = next(tree.findall(nodes.document)) + fn_spot, fn_idx = footnote_spot(tree) + nr = 1 + for node in list(tree.findall(nodes.reference)): + uri = node.get('refuri', '') + if uri.startswith(('http:', 'https:', 'ftp:')) and uri not in node.astext(): + idx = node.parent.index(node) + 1 + if show_urls == 'inline': + uri = self.link_target_template % {'uri': uri} + link = nodes.inline(uri, uri) + link['classes'].append(self.css_link_target_class) + node.parent.insert(idx, link) + elif show_urls == 'footnote': + label = FOOTNOTE_LABEL_TEMPLATE % nr + nr += 1 + footnote_ref = make_footnote_ref(doc, label) + node.parent.insert(idx, footnote_ref) + footnote = make_footnote(doc, label, uri) + fn_spot.insert(fn_idx, footnote) + footnote_ref['refid'] = footnote['ids'][0] + footnote.add_backref(footnote_ref['ids'][0]) + fn_idx += 1 + + def write_doc(self, docname: str, doctree: nodes.document) -> None: + """Write one document file. + + This method is overwritten in order to fix fragment identifiers + and to add visible external links. + """ + self.fix_ids(doctree) + self.add_visible_links(doctree, self.config.epub_show_urls) + super().write_doc(docname, doctree) + + def fix_genindex(self, tree: list[tuple[str, list[tuple[str, Any]]]]) -> None: + """Fix href attributes for genindex pages.""" + # XXX: modifies tree inline + # Logic modeled from themes/basic/genindex.html + for _key, columns in tree: + for _entryname, (links, subitems, _key) in columns: + for (i, (ismain, link)) in enumerate(links): + m = self.refuri_re.match(link) + if m: + links[i] = (ismain, + self.fix_fragment(m.group(1), m.group(2))) + for _subentryname, subentrylinks in subitems: + for (i, (ismain, link)) in enumerate(subentrylinks): + m = self.refuri_re.match(link) + if m: + subentrylinks[i] = (ismain, + self.fix_fragment(m.group(1), m.group(2))) + + def is_vector_graphics(self, filename: str) -> bool: + """Does the filename extension indicate a vector graphic format?""" + ext = path.splitext(filename)[-1] + return ext in VECTOR_GRAPHICS_EXTENSIONS + + def copy_image_files_pil(self) -> None: + """Copy images using Pillow, the Python Imaging Library. + The method tries to read and write the files with Pillow, converting + the format and resizing the image if necessary/possible. + """ + ensuredir(path.join(self.outdir, self.imagedir)) + for src in status_iterator(self.images, __('copying images... '), "brown", + len(self.images), self.app.verbosity): + dest = self.images[src] + try: + img = Image.open(path.join(self.srcdir, src)) + except OSError: + if not self.is_vector_graphics(src): + logger.warning(__('cannot read image file %r: copying it instead'), + path.join(self.srcdir, src)) + try: + copyfile(path.join(self.srcdir, src), + path.join(self.outdir, self.imagedir, dest)) + except OSError as err: + logger.warning(__('cannot copy image file %r: %s'), + path.join(self.srcdir, src), err) + continue + if self.config.epub_fix_images: + if img.mode in ('P',): + # See the Pillow documentation for Image.convert() + # https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert + img = img.convert() + if self.config.epub_max_image_width > 0: + (width, height) = img.size + nw = self.config.epub_max_image_width + if width > nw: + nh = round((height * nw) / width) + img = img.resize((nw, nh), Image.BICUBIC) + try: + img.save(path.join(self.outdir, self.imagedir, dest)) + except OSError as err: + logger.warning(__('cannot write image file %r: %s'), + path.join(self.srcdir, src), err) + + def copy_image_files(self) -> None: + """Copy image files to destination directory. + This overwritten method can use Pillow to convert image files. + """ + if self.images: + if self.config.epub_fix_images or self.config.epub_max_image_width: + if not Image: + logger.warning(__('Pillow not found - copying image files')) + super().copy_image_files() + else: + self.copy_image_files_pil() + else: + super().copy_image_files() + + def copy_download_files(self) -> None: + pass + + def handle_page(self, pagename: str, addctx: dict, templatename: str = 'page.html', + outfilename: str | None = None, event_arg: Any = None) -> None: + """Create a rendered page. + + This method is overwritten for genindex pages in order to fix href link + attributes. + """ + if pagename.startswith('genindex') and 'genindexentries' in addctx: + if not self.use_index: + return + self.fix_genindex(addctx['genindexentries']) + addctx['doctype'] = self.doctype + super().handle_page(pagename, addctx, templatename, outfilename, event_arg) + + def build_mimetype(self) -> None: + """Write the metainfo file mimetype.""" + logger.info(__('writing mimetype file...')) + copy_asset_file(path.join(self.template_dir, 'mimetype'), self.outdir) + + def build_container(self, outname: str = 'META-INF/container.xml') -> None: + """Write the metainfo file META-INF/container.xml.""" + logger.info(__('writing META-INF/container.xml file...')) + outdir = path.join(self.outdir, 'META-INF') + ensuredir(outdir) + copy_asset_file(path.join(self.template_dir, 'container.xml'), outdir) + + def content_metadata(self) -> dict[str, Any]: + """Create a dictionary with all metadata for the content.opf + file properly escaped. + """ + + if (source_date_epoch := os.getenv('SOURCE_DATE_EPOCH')) is not None: + time_tuple = time.gmtime(int(source_date_epoch)) + else: + time_tuple = time.gmtime() + + metadata: dict[str, Any] = {} + metadata['title'] = html.escape(self.config.epub_title) + metadata['author'] = html.escape(self.config.epub_author) + metadata['uid'] = html.escape(self.config.epub_uid) + metadata['lang'] = html.escape(self.config.epub_language) + metadata['publisher'] = html.escape(self.config.epub_publisher) + metadata['copyright'] = html.escape(self.config.epub_copyright) + metadata['scheme'] = html.escape(self.config.epub_scheme) + metadata['id'] = html.escape(self.config.epub_identifier) + metadata['date'] = html.escape(time.strftime('%Y-%m-%d', time_tuple)) + metadata['manifest_items'] = [] + metadata['spines'] = [] + metadata['guides'] = [] + return metadata + + def build_content(self) -> None: + """Write the metainfo file content.opf It contains bibliographic data, + a file list and the spine (the reading order). + """ + logger.info(__('writing content.opf file...')) + metadata = self.content_metadata() + + # files + self.files: list[str] = [] + self.ignored_files = ['.buildinfo', 'mimetype', 'content.opf', + 'toc.ncx', 'META-INF/container.xml', + 'Thumbs.db', 'ehthumbs.db', '.DS_Store', + 'nav.xhtml', self.config.epub_basename + '.epub'] + \ + self.config.epub_exclude_files + if not self.use_index: + self.ignored_files.append('genindex' + self.out_suffix) + for root, dirs, files in os.walk(self.outdir): + dirs.sort() + for fn in sorted(files): + filename = relpath(path.join(root, fn), self.outdir) + if filename in self.ignored_files: + continue + ext = path.splitext(filename)[-1] + if ext not in self.media_types: + # we always have JS and potentially OpenSearch files, don't + # always warn about them + if ext not in ('.js', '.xml'): + logger.warning(__('unknown mimetype for %s, ignoring'), filename, + type='epub', subtype='unknown_project_files') + continue + filename = filename.replace(os.sep, '/') + item = ManifestItem(html.escape(quote(filename)), + html.escape(self.make_id(filename)), + html.escape(self.media_types[ext])) + metadata['manifest_items'].append(item) + self.files.append(filename) + + # spine + spinefiles = set() + for refnode in self.refnodes: + if '#' in refnode['refuri']: + continue + if refnode['refuri'] in self.ignored_files: + continue + spine = Spine(html.escape(self.make_id(refnode['refuri'])), True) + metadata['spines'].append(spine) + spinefiles.add(refnode['refuri']) + for info in self.domain_indices: + spine = Spine(html.escape(self.make_id(info[0] + self.out_suffix)), True) + metadata['spines'].append(spine) + spinefiles.add(info[0] + self.out_suffix) + if self.use_index: + spine = Spine(html.escape(self.make_id('genindex' + self.out_suffix)), True) + metadata['spines'].append(spine) + spinefiles.add('genindex' + self.out_suffix) + # add auto generated files + for name in self.files: + if name not in spinefiles and name.endswith(self.out_suffix): + spine = Spine(html.escape(self.make_id(name)), False) + metadata['spines'].append(spine) + + # add the optional cover + html_tmpl = None + if self.config.epub_cover: + image, html_tmpl = self.config.epub_cover + image = image.replace(os.sep, '/') + metadata['cover'] = html.escape(self.make_id(image)) + if html_tmpl: + spine = Spine(html.escape(self.make_id(self.coverpage_name)), True) + metadata['spines'].insert(0, spine) + if self.coverpage_name not in self.files: + ext = path.splitext(self.coverpage_name)[-1] + self.files.append(self.coverpage_name) + item = ManifestItem(html.escape(self.coverpage_name), + html.escape(self.make_id(self.coverpage_name)), + html.escape(self.media_types[ext])) + metadata['manifest_items'].append(item) + ctx = {'image': html.escape(image), 'title': self.config.project} + self.handle_page( + path.splitext(self.coverpage_name)[0], ctx, html_tmpl) + spinefiles.add(self.coverpage_name) + + auto_add_cover = True + auto_add_toc = True + if self.config.epub_guide: + for type, uri, title in self.config.epub_guide: + file = uri.split('#')[0] + if file not in self.files: + self.files.append(file) + if type == 'cover': + auto_add_cover = False + if type == 'toc': + auto_add_toc = False + metadata['guides'].append(Guide(html.escape(type), + html.escape(title), + html.escape(uri))) + if auto_add_cover and html_tmpl: + metadata['guides'].append(Guide('cover', + self.guide_titles['cover'], + html.escape(self.coverpage_name))) + if auto_add_toc and self.refnodes: + metadata['guides'].append(Guide('toc', + self.guide_titles['toc'], + html.escape(self.refnodes[0]['refuri']))) + + # write the project file + copy_asset_file(path.join(self.template_dir, 'content.opf_t'), self.outdir, metadata) + + def new_navpoint(self, node: dict[str, Any], level: int, incr: bool = True) -> NavPoint: + """Create a new entry in the toc from the node at given level.""" + # XXX Modifies the node + if incr: + self.playorder += 1 + self.tocid += 1 + return NavPoint('navPoint%d' % self.tocid, self.playorder, + node['text'], node['refuri'], []) + + def build_navpoints(self, nodes: list[dict[str, Any]]) -> list[NavPoint]: + """Create the toc navigation structure. + + Subelements of a node are nested inside the navpoint. For nested nodes + the parent node is reinserted in the subnav. + """ + navstack: list[NavPoint] = [] + navstack.append(NavPoint('dummy', 0, '', '', [])) + level = 0 + lastnode = None + for node in nodes: + if not node['text']: + continue + file = node['refuri'].split('#')[0] + if file in self.ignored_files: + continue + if node['level'] > self.config.epub_tocdepth: + continue + if node['level'] == level: + navpoint = self.new_navpoint(node, level) + navstack.pop() + navstack[-1].children.append(navpoint) + navstack.append(navpoint) + elif node['level'] == level + 1: + level += 1 + if lastnode and self.config.epub_tocdup: + # Insert starting point in subtoc with same playOrder + navstack[-1].children.append(self.new_navpoint(lastnode, level, False)) + navpoint = self.new_navpoint(node, level) + navstack[-1].children.append(navpoint) + navstack.append(navpoint) + elif node['level'] < level: + while node['level'] < len(navstack): + navstack.pop() + level = node['level'] + navpoint = self.new_navpoint(node, level) + navstack[-1].children.append(navpoint) + navstack.append(navpoint) + else: + raise + lastnode = node + + return navstack[0].children + + def toc_metadata(self, level: int, navpoints: list[NavPoint]) -> dict[str, Any]: + """Create a dictionary with all metadata for the toc.ncx file + properly escaped. + """ + metadata: dict[str, Any] = {} + metadata['uid'] = self.config.epub_uid + metadata['title'] = html.escape(self.config.epub_title) + metadata['level'] = level + metadata['navpoints'] = navpoints + return metadata + + def build_toc(self) -> None: + """Write the metainfo file toc.ncx.""" + logger.info(__('writing toc.ncx file...')) + + if self.config.epub_tocscope == 'default': + doctree = self.env.get_and_resolve_doctree(self.config.root_doc, + self, prune_toctrees=False, + includehidden=False) + refnodes = self.get_refnodes(doctree, []) + self.toc_add_files(refnodes) + else: + # 'includehidden' + refnodes = self.refnodes + self.check_refnodes(refnodes) + navpoints = self.build_navpoints(refnodes) + level = max(item['level'] for item in self.refnodes) + level = min(level, self.config.epub_tocdepth) + copy_asset_file(path.join(self.template_dir, 'toc.ncx_t'), self.outdir, + self.toc_metadata(level, navpoints)) + + def build_epub(self) -> None: + """Write the epub file. + + It is a zip file with the mimetype file stored uncompressed as the first + entry. + """ + outname = self.config.epub_basename + '.epub' + logger.info(__('writing %s file...'), outname) + epub_filename = path.join(self.outdir, outname) + with ZipFile(epub_filename, 'w', ZIP_DEFLATED) as epub: + epub.write(path.join(self.outdir, 'mimetype'), 'mimetype', ZIP_STORED) + for filename in ('META-INF/container.xml', 'content.opf', 'toc.ncx'): + epub.write(path.join(self.outdir, filename), filename, ZIP_DEFLATED) + for filename in self.files: + epub.write(path.join(self.outdir, filename), filename, ZIP_DEFLATED) diff --git a/sphinx/builders/changes.py b/sphinx/builders/changes.py new file mode 100644 index 0000000..3e24e7d --- /dev/null +++ b/sphinx/builders/changes.py @@ -0,0 +1,161 @@ +"""Changelog builder.""" + +from __future__ import annotations + +import html +from os import path +from typing import TYPE_CHECKING, Any, cast + +from sphinx import package_dir +from sphinx.builders import Builder +from sphinx.domains.changeset import ChangeSetDomain +from sphinx.locale import _, __ +from sphinx.theming import HTMLThemeFactory +from sphinx.util import logging +from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.fileutil import copy_asset_file +from sphinx.util.osutil import ensuredir, os_path + +if TYPE_CHECKING: + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + + +class ChangesBuilder(Builder): + """ + Write a summary with all versionadded/changed directives. + """ + name = 'changes' + epilog = __('The overview file is in %(outdir)s.') + + def init(self) -> None: + self.create_template_bridge() + theme_factory = HTMLThemeFactory(self.app) + self.theme = theme_factory.create('default') + self.templates.init(self, self.theme) + + def get_outdated_docs(self) -> str: + return str(self.outdir) + + typemap = { + 'versionadded': 'added', + 'versionchanged': 'changed', + 'deprecated': 'deprecated', + } + + def write(self, *ignored: Any) -> None: + version = self.config.version + domain = cast(ChangeSetDomain, self.env.get_domain('changeset')) + libchanges: dict[str, list[tuple[str, str, int]]] = {} + apichanges: list[tuple[str, str, int]] = [] + otherchanges: dict[tuple[str, str], list[tuple[str, str, int]]] = {} + + changesets = domain.get_changesets_for(version) + if not changesets: + logger.info(bold(__('no changes in version %s.') % version)) + return + logger.info(bold(__('writing summary file...'))) + for changeset in changesets: + if isinstance(changeset.descname, tuple): + descname = changeset.descname[0] + else: + descname = changeset.descname + ttext = self.typemap[changeset.type] + context = changeset.content.replace('\n', ' ') + if descname and changeset.docname.startswith('c-api'): + if context: + entry = f'{descname}: {ttext}: {context}' + else: + entry = f'{descname}: {ttext}.' + apichanges.append((entry, changeset.docname, changeset.lineno)) + elif descname or changeset.module: + module = changeset.module or _('Builtins') + if not descname: + descname = _('Module level') + if context: + entry = f'{descname}: {ttext}: {context}' + else: + entry = f'{descname}: {ttext}.' + libchanges.setdefault(module, []).append((entry, changeset.docname, + changeset.lineno)) + else: + if not context: + continue + entry = f'{ttext.capitalize()}: {context}' + title = self.env.titles[changeset.docname].astext() + otherchanges.setdefault((changeset.docname, title), []).append( + (entry, changeset.docname, changeset.lineno)) + + ctx = { + 'project': self.config.project, + 'version': version, + 'docstitle': self.config.html_title, + 'shorttitle': self.config.html_short_title, + 'libchanges': sorted(libchanges.items()), + 'apichanges': sorted(apichanges), + 'otherchanges': sorted(otherchanges.items()), + 'show_copyright': self.config.html_show_copyright, + 'show_sphinx': self.config.html_show_sphinx, + } + with open(path.join(self.outdir, 'index.html'), 'w', encoding='utf8') as f: + f.write(self.templates.render('changes/frameset.html', ctx)) + with open(path.join(self.outdir, 'changes.html'), 'w', encoding='utf8') as f: + f.write(self.templates.render('changes/versionchanges.html', ctx)) + + hltext = ['.. versionadded:: %s' % version, + '.. versionchanged:: %s' % version, + '.. deprecated:: %s' % version] + + def hl(no: int, line: str) -> str: + line = ' ' % no + html.escape(line) + for x in hltext: + if x in line: + line = '%s' % line + break + return line + + logger.info(bold(__('copying source files...'))) + for docname in self.env.all_docs: + with open(self.env.doc2path(docname), + encoding=self.env.config.source_encoding) as f: + try: + lines = f.readlines() + except UnicodeDecodeError: + logger.warning(__('could not read %r for changelog creation'), docname) + continue + targetfn = path.join(self.outdir, 'rst', os_path(docname)) + '.html' + ensuredir(path.dirname(targetfn)) + with open(targetfn, 'w', encoding='utf-8') as f: + text = ''.join(hl(i + 1, line) for (i, line) in enumerate(lines)) + ctx = { + 'filename': self.env.doc2path(docname, False), + 'text': text, + } + f.write(self.templates.render('changes/rstsource.html', ctx)) + themectx = {'theme_' + key: val for (key, val) in + self.theme.get_options({}).items()} + copy_asset_file(path.join(package_dir, 'themes', 'default', 'static', 'default.css_t'), + self.outdir, context=themectx, renderer=self.templates) + copy_asset_file(path.join(package_dir, 'themes', 'basic', 'static', 'basic.css'), + self.outdir) + + def hl(self, text: str, version: str) -> str: + text = html.escape(text) + for directive in ('versionchanged', 'versionadded', 'deprecated'): + text = text.replace(f'.. {directive}:: {version}', + f'.. {directive}:: {version}') + return text + + def finish(self) -> None: + pass + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_builder(ChangesBuilder) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/dirhtml.py b/sphinx/builders/dirhtml.py new file mode 100644 index 0000000..9683ee6 --- /dev/null +++ b/sphinx/builders/dirhtml.py @@ -0,0 +1,53 @@ +"""Directory HTML builders.""" + +from __future__ import annotations + +from os import path +from typing import TYPE_CHECKING, Any + +from sphinx.builders.html import StandaloneHTMLBuilder +from sphinx.util import logging +from sphinx.util.osutil import SEP, os_path + +if TYPE_CHECKING: + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + + +class DirectoryHTMLBuilder(StandaloneHTMLBuilder): + """ + A StandaloneHTMLBuilder that creates all HTML pages as "index.html" in + a directory given by their pagename, so that generated URLs don't have + ``.html`` in them. + """ + name = 'dirhtml' + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + if docname == 'index': + return '' + if docname.endswith(SEP + 'index'): + return docname[:-5] # up to sep + return docname + SEP + + def get_outfilename(self, pagename: str) -> str: + if pagename == 'index' or pagename.endswith(SEP + 'index'): + outfilename = path.join(self.outdir, os_path(pagename) + + self.out_suffix) + else: + outfilename = path.join(self.outdir, os_path(pagename), + 'index' + self.out_suffix) + + return outfilename + + +def setup(app: Sphinx) -> dict[str, Any]: + app.setup_extension('sphinx.builders.html') + + app.add_builder(DirectoryHTMLBuilder) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/dummy.py b/sphinx/builders/dummy.py new file mode 100644 index 0000000..f025311 --- /dev/null +++ b/sphinx/builders/dummy.py @@ -0,0 +1,48 @@ +"""Do syntax checks, but no writing.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from sphinx.builders import Builder +from sphinx.locale import __ + +if TYPE_CHECKING: + from docutils.nodes import Node + + from sphinx.application import Sphinx + + +class DummyBuilder(Builder): + name = 'dummy' + epilog = __('The dummy builder generates no files.') + + allow_parallel = True + + def init(self) -> None: + pass + + def get_outdated_docs(self) -> set[str]: + return self.env.found_docs + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + return '' + + def prepare_writing(self, docnames: set[str]) -> None: + pass + + def write_doc(self, docname: str, doctree: Node) -> None: + pass + + def finish(self) -> None: + pass + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_builder(DummyBuilder) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/epub3.py b/sphinx/builders/epub3.py new file mode 100644 index 0000000..40d3ce7 --- /dev/null +++ b/sphinx/builders/epub3.py @@ -0,0 +1,301 @@ +"""Build epub3 files. + +Originally derived from epub.py. +""" + +from __future__ import annotations + +import html +import os +import re +import time +from os import path +from typing import TYPE_CHECKING, Any, NamedTuple + +from sphinx import package_dir +from sphinx.builders import _epub_base +from sphinx.config import ENUM, Config +from sphinx.locale import __ +from sphinx.util import logging +from sphinx.util.fileutil import copy_asset_file +from sphinx.util.osutil import make_filename + +if TYPE_CHECKING: + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + + +class NavPoint(NamedTuple): + text: str + refuri: str + children: list[NavPoint] + + +# writing modes +PAGE_PROGRESSION_DIRECTIONS = { + 'horizontal': 'ltr', + 'vertical': 'rtl', +} +IBOOK_SCROLL_AXIS = { + 'horizontal': 'vertical', + 'vertical': 'horizontal', +} +THEME_WRITING_MODES = { + 'vertical': 'vertical-rl', + 'horizontal': 'horizontal-tb', +} + +DOCTYPE = '''''' + +HTML_TAG = ( + '' +) + +# https://www.w3.org/TR/REC-xml/#NT-Name +_xml_name_start_char = ( + ':|[A-Z]|_|[a-z]|[\u00C0-\u00D6]' + '|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]' + '|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]' + '|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]' + '|[\uFDF0-\uFFFD]|[\U00010000-\U000EFFFF]' +) +_xml_name_char = ( + _xml_name_start_char + r'\-|\.' '|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040]' +) +_XML_NAME_PATTERN = re.compile(f'({_xml_name_start_char})({_xml_name_char})*') + + +class Epub3Builder(_epub_base.EpubBuilder): + """ + Builder that outputs epub3 files. + + It creates the metainfo files content.opf, nav.xhtml, toc.ncx, mimetype, + and META-INF/container.xml. Afterwards, all necessary files are zipped to + an epub file. + """ + name = 'epub' + epilog = __('The ePub file is in %(outdir)s.') + + supported_remote_images = False + template_dir = path.join(package_dir, 'templates', 'epub3') + doctype = DOCTYPE + html_tag = HTML_TAG + use_meta_charset = True + + # Finish by building the epub file + def handle_finish(self) -> None: + """Create the metainfo files and finally the epub.""" + self.get_toc() + self.build_mimetype() + self.build_container() + self.build_content() + self.build_navigation_doc() + self.build_toc() + self.build_epub() + + def content_metadata(self) -> dict[str, Any]: + """Create a dictionary with all metadata for the content.opf + file properly escaped. + """ + writing_mode = self.config.epub_writing_mode + + if (source_date_epoch := os.getenv('SOURCE_DATE_EPOCH')) is not None: + time_tuple = time.gmtime(int(source_date_epoch)) + else: + time_tuple = time.gmtime() + + metadata = super().content_metadata() + metadata['description'] = html.escape(self.config.epub_description) + metadata['contributor'] = html.escape(self.config.epub_contributor) + metadata['page_progression_direction'] = PAGE_PROGRESSION_DIRECTIONS.get(writing_mode) + metadata['ibook_scroll_axis'] = IBOOK_SCROLL_AXIS.get(writing_mode) + metadata['date'] = html.escape(time.strftime("%Y-%m-%dT%H:%M:%SZ", time_tuple)) + metadata['version'] = html.escape(self.config.version) + metadata['epub_version'] = self.config.epub_version + return metadata + + def prepare_writing(self, docnames: set[str]) -> None: + super().prepare_writing(docnames) + + writing_mode = self.config.epub_writing_mode + self.globalcontext['theme_writing_mode'] = THEME_WRITING_MODES.get(writing_mode) + self.globalcontext['html_tag'] = self.html_tag + self.globalcontext['use_meta_charset'] = self.use_meta_charset + self.globalcontext['skip_ua_compatible'] = True + + def build_navlist(self, navnodes: list[dict[str, Any]]) -> list[NavPoint]: + """Create the toc navigation structure. + + This method is almost same as build_navpoints method in epub.py. + This is because the logical navigation structure of epub3 is not + different from one of epub2. + + The difference from build_navpoints method is templates which are used + when generating navigation documents. + """ + navstack: list[NavPoint] = [] + navstack.append(NavPoint('', '', [])) + level = 0 + for node in navnodes: + if not node['text']: + continue + file = node['refuri'].split('#')[0] + if file in self.ignored_files: + continue + if node['level'] > self.config.epub_tocdepth: + continue + + navpoint = NavPoint(node['text'], node['refuri'], []) + if node['level'] == level: + navstack.pop() + navstack[-1].children.append(navpoint) + navstack.append(navpoint) + elif node['level'] == level + 1: + level += 1 + navstack[-1].children.append(navpoint) + navstack.append(navpoint) + elif node['level'] < level: + while node['level'] < len(navstack): + navstack.pop() + level = node['level'] + navstack[-1].children.append(navpoint) + navstack.append(navpoint) + else: + unreachable = 'Should never reach here. It might be a bug.' + raise RuntimeError(unreachable) + + return navstack[0].children + + def navigation_doc_metadata(self, navlist: list[NavPoint]) -> dict[str, Any]: + """Create a dictionary with all metadata for the nav.xhtml file + properly escaped. + """ + return { + 'lang': html.escape(self.config.epub_language), + 'toc_locale': html.escape(self.guide_titles['toc']), + 'navlist': navlist, + } + + def build_navigation_doc(self) -> None: + """Write the metainfo file nav.xhtml.""" + logger.info(__('writing nav.xhtml file...')) + + if self.config.epub_tocscope == 'default': + doctree = self.env.get_and_resolve_doctree( + self.config.root_doc, self, + prune_toctrees=False, includehidden=False) + refnodes = self.get_refnodes(doctree, []) + self.toc_add_files(refnodes) + else: + # 'includehidden' + refnodes = self.refnodes + navlist = self.build_navlist(refnodes) + copy_asset_file(path.join(self.template_dir, 'nav.xhtml_t'), self.outdir, + self.navigation_doc_metadata(navlist)) + + # Add nav.xhtml to epub file + if 'nav.xhtml' not in self.files: + self.files.append('nav.xhtml') + + +def validate_config_values(app: Sphinx) -> None: + if app.builder.name != 'epub': + return + + # lang attribute, dc:language + if not app.config.epub_language: + logger.warning(__('conf value "epub_language" (or "language") ' + 'should not be empty for EPUB3')) + # unique-identifier attribute + if not _XML_NAME_PATTERN.match(app.config.epub_uid): + logger.warning(__('conf value "epub_uid" should be XML NAME for EPUB3')) + # dc:title + if not app.config.epub_title: + logger.warning(__('conf value "epub_title" (or "html_title") ' + 'should not be empty for EPUB3')) + # dc:creator + if not app.config.epub_author: + logger.warning(__('conf value "epub_author" should not be empty for EPUB3')) + # dc:contributor + if not app.config.epub_contributor: + logger.warning(__('conf value "epub_contributor" should not be empty for EPUB3')) + # dc:description + if not app.config.epub_description: + logger.warning(__('conf value "epub_description" should not be empty for EPUB3')) + # dc:publisher + if not app.config.epub_publisher: + logger.warning(__('conf value "epub_publisher" should not be empty for EPUB3')) + # dc:rights + if not app.config.epub_copyright: + logger.warning(__('conf value "epub_copyright" (or "copyright")' + 'should not be empty for EPUB3')) + # dc:identifier + if not app.config.epub_identifier: + logger.warning(__('conf value "epub_identifier" should not be empty for EPUB3')) + # meta ibooks:version + if not app.config.version: + logger.warning(__('conf value "version" should not be empty for EPUB3')) + + +def convert_epub_css_files(app: Sphinx, config: Config) -> None: + """This converts string styled epub_css_files to tuple styled one.""" + epub_css_files: list[tuple[str, dict[str, Any]]] = [] + for entry in config.epub_css_files: + if isinstance(entry, str): + epub_css_files.append((entry, {})) + else: + try: + filename, attrs = entry + epub_css_files.append((filename, attrs)) + except Exception: + logger.warning(__('invalid css_file: %r, ignored'), entry) + continue + + config.epub_css_files = epub_css_files # type: ignore[attr-defined] + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_builder(Epub3Builder) + + # config values + app.add_config_value('epub_basename', lambda self: make_filename(self.project), False) + app.add_config_value('epub_version', 3.0, 'epub') # experimental + app.add_config_value('epub_theme', 'epub', 'epub') + app.add_config_value('epub_theme_options', {}, 'epub') + app.add_config_value('epub_title', lambda self: self.project, 'epub') + app.add_config_value('epub_author', lambda self: self.author, 'epub') + app.add_config_value('epub_language', lambda self: self.language or 'en', 'epub') + app.add_config_value('epub_publisher', lambda self: self.author, 'epub') + app.add_config_value('epub_copyright', lambda self: self.copyright, 'epub') + app.add_config_value('epub_identifier', 'unknown', 'epub') + app.add_config_value('epub_scheme', 'unknown', 'epub') + app.add_config_value('epub_uid', 'unknown', 'env') + app.add_config_value('epub_cover', (), 'env') + app.add_config_value('epub_guide', (), 'env') + app.add_config_value('epub_pre_files', [], 'env') + app.add_config_value('epub_post_files', [], 'env') + app.add_config_value('epub_css_files', lambda config: config.html_css_files, 'epub') + app.add_config_value('epub_exclude_files', [], 'env') + app.add_config_value('epub_tocdepth', 3, 'env') + app.add_config_value('epub_tocdup', True, 'env') + app.add_config_value('epub_tocscope', 'default', 'env') + app.add_config_value('epub_fix_images', False, 'env') + app.add_config_value('epub_max_image_width', 0, 'env') + app.add_config_value('epub_show_urls', 'inline', 'epub') + app.add_config_value('epub_use_index', lambda self: self.html_use_index, 'epub') + app.add_config_value('epub_description', 'unknown', 'epub') + app.add_config_value('epub_contributor', 'unknown', 'epub') + app.add_config_value('epub_writing_mode', 'horizontal', 'epub', + ENUM('horizontal', 'vertical')) + + # event handlers + app.connect('config-inited', convert_epub_css_files, priority=800) + app.connect('builder-inited', validate_config_values) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/gettext.py b/sphinx/builders/gettext.py new file mode 100644 index 0000000..0b2bede --- /dev/null +++ b/sphinx/builders/gettext.py @@ -0,0 +1,306 @@ +"""The MessageCatalogBuilder class.""" + +from __future__ import annotations + +import time +from codecs import open +from collections import defaultdict +from os import getenv, path, walk +from typing import TYPE_CHECKING, Any +from uuid import uuid4 + +from docutils import nodes + +from sphinx import addnodes, package_dir +from sphinx.builders import Builder +from sphinx.errors import ThemeError +from sphinx.locale import __ +from sphinx.util import logging +from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.display import status_iterator +from sphinx.util.i18n import CatalogInfo, docname_to_domain +from sphinx.util.index_entries import split_index_msg +from sphinx.util.nodes import extract_messages, traverse_translatable_index +from sphinx.util.osutil import canon_path, ensuredir, relpath +from sphinx.util.tags import Tags +from sphinx.util.template import SphinxRenderer + +if TYPE_CHECKING: + import os + from collections.abc import Generator, Iterable + + from docutils.nodes import Element + + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + + +class Message: + """An entry of translatable message.""" + def __init__(self, text: str, locations: list[tuple[str, int]], uuids: list[str]): + self.text = text + self.locations = locations + self.uuids = uuids + + +class Catalog: + """Catalog of translatable messages.""" + + def __init__(self) -> None: + self.messages: list[str] = [] # retain insertion order + + # msgid -> file, line, uid + self.metadata: dict[str, list[tuple[str, int, str]]] = {} + + def add(self, msg: str, origin: Element | MsgOrigin) -> None: + if not hasattr(origin, 'uid'): + # Nodes that are replicated like todo don't have a uid, + # however i18n is also unnecessary. + return + if msg not in self.metadata: # faster lookup in hash + self.messages.append(msg) + self.metadata[msg] = [] + line = origin.line + if line is None: + line = -1 + self.metadata[msg].append((origin.source, line, origin.uid)) + + def __iter__(self) -> Generator[Message, None, None]: + for message in self.messages: + positions = sorted({(source, line) for source, line, uuid + in self.metadata[message]}) + uuids = [uuid for source, line, uuid in self.metadata[message]] + yield Message(message, positions, uuids) + + +class MsgOrigin: + """ + Origin holder for Catalog message origin. + """ + + def __init__(self, source: str, line: int) -> None: + self.source = source + self.line = line + self.uid = uuid4().hex + + +class GettextRenderer(SphinxRenderer): + def __init__( + self, template_path: list[str | os.PathLike[str]] | None = None, + outdir: str | os.PathLike[str] | None = None, + ) -> None: + self.outdir = outdir + if template_path is None: + template_path = [path.join(package_dir, 'templates', 'gettext')] + super().__init__(template_path) + + def escape(s: str) -> str: + s = s.replace('\\', r'\\') + s = s.replace('"', r'\"') + return s.replace('\n', '\\n"\n"') + + # use texescape as escape filter + self.env.filters['e'] = escape + self.env.filters['escape'] = escape + + def render(self, filename: str, context: dict[str, Any]) -> str: + def _relpath(s: str) -> str: + return canon_path(relpath(s, self.outdir)) + + context['relpath'] = _relpath + return super().render(filename, context) + + +class I18nTags(Tags): + """Dummy tags module for I18nBuilder. + + To translate all text inside of only nodes, this class + always returns True value even if no tags are defined. + """ + def eval_condition(self, condition: Any) -> bool: + return True + + +class I18nBuilder(Builder): + """ + General i18n builder. + """ + name = 'i18n' + versioning_method = 'text' + use_message_catalog = False + + def init(self) -> None: + super().init() + self.env.set_versioning_method(self.versioning_method, + self.env.config.gettext_uuid) + self.tags = I18nTags() + self.catalogs: defaultdict[str, Catalog] = defaultdict(Catalog) + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + return '' + + def get_outdated_docs(self) -> set[str]: + return self.env.found_docs + + def prepare_writing(self, docnames: set[str]) -> None: + return + + def compile_catalogs(self, catalogs: set[CatalogInfo], message: str) -> None: + return + + def write_doc(self, docname: str, doctree: nodes.document) -> None: + catalog = self.catalogs[docname_to_domain(docname, self.config.gettext_compact)] + + for toctree in self.env.tocs[docname].findall(addnodes.toctree): + for node, msg in extract_messages(toctree): + node.uid = '' # type: ignore[attr-defined] # Hack UUID model + catalog.add(msg, node) + + for node, msg in extract_messages(doctree): + # Do not extract messages from within substitution definitions. + if not _is_node_in_substitution_definition(node): + catalog.add(msg, node) + + if 'index' in self.env.config.gettext_additional_targets: + # Extract translatable messages from index entries. + for node, entries in traverse_translatable_index(doctree): + for entry_type, value, _target_id, _main, _category_key in entries: + for m in split_index_msg(entry_type, value): + catalog.add(m, node) + + +# If set, use the timestamp from SOURCE_DATE_EPOCH +# https://reproducible-builds.org/specs/source-date-epoch/ +if (source_date_epoch := getenv('SOURCE_DATE_EPOCH')) is not None: + timestamp = time.gmtime(float(source_date_epoch)) +else: + # determine timestamp once to remain unaffected by DST changes during build + timestamp = time.localtime() +ctime = time.strftime('%Y-%m-%d %H:%M%z', timestamp) + + +def should_write(filepath: str, new_content: str) -> bool: + if not path.exists(filepath): + return True + try: + with open(filepath, encoding='utf-8') as oldpot: + old_content = oldpot.read() + old_header_index = old_content.index('"POT-Creation-Date:') + new_header_index = new_content.index('"POT-Creation-Date:') + old_body_index = old_content.index('"PO-Revision-Date:') + new_body_index = new_content.index('"PO-Revision-Date:') + return ((old_content[:old_header_index] != new_content[:new_header_index]) or + (new_content[new_body_index:] != old_content[old_body_index:])) + except ValueError: + pass + + return True + + +def _is_node_in_substitution_definition(node: nodes.Node) -> bool: + """Check "node" to test if it is in a substitution definition.""" + while node.parent: + if isinstance(node, nodes.substitution_definition): + return True + node = node.parent + return False + + +class MessageCatalogBuilder(I18nBuilder): + """ + Builds gettext-style message catalogs (.pot files). + """ + name = 'gettext' + epilog = __('The message catalogs are in %(outdir)s.') + + def init(self) -> None: + super().init() + self.create_template_bridge() + self.templates.init(self) + + def _collect_templates(self) -> set[str]: + template_files = set() + for template_path in self.config.templates_path: + tmpl_abs_path = path.join(self.app.srcdir, template_path) + for dirpath, _dirs, files in walk(tmpl_abs_path): + for fn in files: + if fn.endswith('.html'): + filename = canon_path(path.join(dirpath, fn)) + template_files.add(filename) + return template_files + + def _extract_from_template(self) -> None: + files = list(self._collect_templates()) + files.sort() + logger.info(bold(__('building [%s]: ') % self.name), nonl=True) + logger.info(__('targets for %d template files'), len(files)) + + extract_translations = self.templates.environment.extract_translations + + for template in status_iterator(files, __('reading templates... '), "purple", + len(files), self.app.verbosity): + try: + with open(template, encoding='utf-8') as f: + context = f.read() + for line, _meth, msg in extract_translations(context): + origin = MsgOrigin(template, line) + self.catalogs['sphinx'].add(msg, origin) + except Exception as exc: + msg = f'{template}: {exc!r}' + raise ThemeError(msg) from exc + + def build( + self, + docnames: Iterable[str] | None, + summary: str | None = None, + method: str = 'update', + ) -> None: + self._extract_from_template() + super().build(docnames, summary, method) + + def finish(self) -> None: + super().finish() + context = { + 'version': self.config.version, + 'copyright': self.config.copyright, + 'project': self.config.project, + 'last_translator': self.config.gettext_last_translator, + 'language_team': self.config.gettext_language_team, + 'ctime': ctime, + 'display_location': self.config.gettext_location, + 'display_uuid': self.config.gettext_uuid, + } + for textdomain, catalog in status_iterator(self.catalogs.items(), + __("writing message catalogs... "), + "darkgreen", len(self.catalogs), + self.app.verbosity, + lambda textdomain__: textdomain__[0]): + # noop if config.gettext_compact is set + ensuredir(path.join(self.outdir, path.dirname(textdomain))) + + context['messages'] = list(catalog) + content = GettextRenderer(outdir=self.outdir).render('message.pot_t', context) + + pofn = path.join(self.outdir, textdomain + '.pot') + if should_write(pofn, content): + with open(pofn, 'w', encoding='utf-8') as pofile: + pofile.write(content) + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_builder(MessageCatalogBuilder) + + app.add_config_value('gettext_compact', True, 'gettext', {bool, str}) + app.add_config_value('gettext_location', True, 'gettext') + app.add_config_value('gettext_uuid', False, 'gettext') + app.add_config_value('gettext_auto_build', True, 'env') + app.add_config_value('gettext_additional_targets', [], 'env') + app.add_config_value('gettext_last_translator', 'FULL NAME ', 'gettext') + app.add_config_value('gettext_language_team', 'LANGUAGE ', 'gettext') + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py new file mode 100644 index 0000000..85067be --- /dev/null +++ b/sphinx/builders/html/__init__.py @@ -0,0 +1,1399 @@ +"""Several HTML builders.""" + +from __future__ import annotations + +import contextlib +import hashlib +import html +import os +import posixpath +import re +import sys +import time +import warnings +from os import path +from typing import IO, TYPE_CHECKING, Any +from urllib.parse import quote + +import docutils.readers.doctree +from docutils import nodes +from docutils.core import Publisher +from docutils.frontend import OptionParser +from docutils.io import DocTreeInput, StringOutput +from docutils.utils import relative_path + +from sphinx import __display_version__, package_dir +from sphinx import version_info as sphinx_version +from sphinx.builders import Builder +from sphinx.builders.html._assets import _CascadingStyleSheet, _file_checksum, _JavaScript +from sphinx.config import ENUM, Config +from sphinx.deprecation import _deprecation_warning +from sphinx.domains import Domain, Index, IndexEntry +from sphinx.environment.adapters.asset import ImageAdapter +from sphinx.environment.adapters.indexentries import IndexEntries +from sphinx.environment.adapters.toctree import document_toc, global_toctree_for_doc +from sphinx.errors import ConfigError, ThemeError +from sphinx.highlighting import PygmentsBridge +from sphinx.locale import _, __ +from sphinx.search import js_index +from sphinx.theming import HTMLThemeFactory +from sphinx.util import isurl, logging +from sphinx.util.display import progress_message, status_iterator +from sphinx.util.docutils import new_document +from sphinx.util.fileutil import copy_asset +from sphinx.util.i18n import format_date +from sphinx.util.inventory import InventoryFile +from sphinx.util.matching import DOTFILES, Matcher, patmatch +from sphinx.util.osutil import SEP, copyfile, ensuredir, os_path, relative_uri +from sphinx.writers.html import HTMLWriter +from sphinx.writers.html5 import HTML5Translator + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator, Sequence + + from docutils.nodes import Node + + from sphinx.application import Sphinx + from sphinx.environment import BuildEnvironment + from sphinx.util.tags import Tags + +#: the filename for the inventory of objects +INVENTORY_FILENAME = 'objects.inv' + +logger = logging.getLogger(__name__) +return_codes_re = re.compile('[\r\n]+') + +DOMAIN_INDEX_TYPE = tuple[ + # Index name (e.g. py-modindex) + str, + # Index class + type[Index], + # list of (heading string, list of index entries) pairs. + list[tuple[str, list[IndexEntry]]], + # whether sub-entries should start collapsed + bool, +] + + +def get_stable_hash(obj: Any) -> str: + """ + Return a stable hash for a Python data structure. We can't just use + the md5 of str(obj) since for example dictionary items are enumerated + in unpredictable order due to hash randomization in newer Pythons. + """ + if isinstance(obj, dict): + return get_stable_hash(list(obj.items())) + elif isinstance(obj, (list, tuple)): + obj = sorted(get_stable_hash(o) for o in obj) + return hashlib.md5(str(obj).encode(), usedforsecurity=False).hexdigest() + + +def convert_locale_to_language_tag(locale: str | None) -> str | None: + """Convert a locale string to a language tag (ex. en_US -> en-US). + + refs: BCP 47 (:rfc:`5646`) + """ + if locale: + return locale.replace('_', '-') + else: + return None + + +class BuildInfo: + """buildinfo file manipulator. + + HTMLBuilder and its family are storing their own envdata to ``.buildinfo``. + This class is a manipulator for the file. + """ + + @classmethod + def load(cls, f: IO) -> BuildInfo: + try: + lines = f.readlines() + assert lines[0].rstrip() == '# Sphinx build info version 1' + assert lines[2].startswith('config: ') + assert lines[3].startswith('tags: ') + + build_info = BuildInfo() + build_info.config_hash = lines[2].split()[1].strip() + build_info.tags_hash = lines[3].split()[1].strip() + return build_info + except Exception as exc: + raise ValueError(__('build info file is broken: %r') % exc) from exc + + def __init__( + self, + config: Config | None = None, + tags: Tags | None = None, + config_categories: Sequence[str] = (), + ) -> None: + self.config_hash = '' + self.tags_hash = '' + + if config: + values = {c.name: c.value for c in config.filter(config_categories)} + self.config_hash = get_stable_hash(values) + + if tags: + self.tags_hash = get_stable_hash(sorted(tags)) + + def __eq__(self, other: BuildInfo) -> bool: # type: ignore[override] + return (self.config_hash == other.config_hash and + self.tags_hash == other.tags_hash) + + def dump(self, f: IO) -> None: + f.write('# Sphinx build info version 1\n' + '# This file hashes the configuration used when building these files.' + ' When it is not found, a full rebuild will be done.\n' + 'config: %s\n' + 'tags: %s\n' % + (self.config_hash, self.tags_hash)) + + +class StandaloneHTMLBuilder(Builder): + """ + Builds standalone HTML docs. + """ + name = 'html' + format = 'html' + epilog = __('The HTML pages are in %(outdir)s.') + + default_translator_class = HTML5Translator + copysource = True + allow_parallel = True + out_suffix = '.html' + link_suffix = '.html' # defaults to matching out_suffix + indexer_format: Any = js_index + indexer_dumps_unicode = True + # create links to original images from images [True/False] + html_scaled_image_link = True + supported_image_types = ['image/svg+xml', 'image/png', + 'image/gif', 'image/jpeg'] + supported_remote_images = True + supported_data_uri_images = True + searchindex_filename = 'searchindex.js' + add_permalinks = True + allow_sharp_as_current_path = True + embedded = False # for things like HTML help or Qt help: suppresses sidebar + search = True # for things like HTML help and Apple help: suppress search + use_index = False + download_support = True # enable download role + + imgpath: str = '' + domain_indices: list[DOMAIN_INDEX_TYPE] = [] + + def __init__(self, app: Sphinx, env: BuildEnvironment) -> None: + super().__init__(app, env) + + # CSS files + self._css_files: list[_CascadingStyleSheet] = [] + + # JS files + self._js_files: list[_JavaScript] = [] + + # Cached Publisher for writing doctrees to HTML + reader = docutils.readers.doctree.Reader(parser_name='restructuredtext') + pub = Publisher( + reader=reader, + parser=reader.parser, + writer=HTMLWriter(self), + source_class=DocTreeInput, + destination=StringOutput(encoding='unicode'), + ) + if docutils.__version_info__[:2] >= (0, 19): + pub.get_settings(output_encoding='unicode', traceback=True) + else: + op = pub.setup_option_parser(output_encoding='unicode', traceback=True) + pub.settings = op.get_default_values() + self._publisher = pub + + def init(self) -> None: + self.build_info = self.create_build_info() + # basename of images directory + self.imagedir = '_images' + # section numbers for headings in the currently visited document + self.secnumbers: dict[str, tuple[int, ...]] = {} + # currently written docname + self.current_docname: str = '' + + self.init_templates() + self.init_highlighter() + self.init_css_files() + self.init_js_files() + + html_file_suffix = self.get_builder_config('file_suffix', 'html') + if html_file_suffix is not None: + self.out_suffix = html_file_suffix + + html_link_suffix = self.get_builder_config('link_suffix', 'html') + if html_link_suffix is not None: + self.link_suffix = html_link_suffix + else: + self.link_suffix = self.out_suffix + + self.use_index = self.get_builder_config('use_index', 'html') + + def create_build_info(self) -> BuildInfo: + return BuildInfo(self.config, self.tags, ['html']) + + def _get_translations_js(self) -> str: + candidates = [path.join(dir, self.config.language, + 'LC_MESSAGES', 'sphinx.js') + for dir in self.config.locale_dirs] + \ + [path.join(package_dir, 'locale', self.config.language, + 'LC_MESSAGES', 'sphinx.js'), + path.join(sys.prefix, 'share/sphinx/locale', + self.config.language, 'sphinx.js')] + + for jsfile in candidates: + if path.isfile(jsfile): + return jsfile + return '' + + def _get_style_filenames(self) -> Iterator[str]: + if isinstance(self.config.html_style, str): + yield self.config.html_style + elif self.config.html_style is not None: + yield from self.config.html_style + elif self.theme: + stylesheet = self.theme.get_config('theme', 'stylesheet') + yield from map(str.strip, stylesheet.split(',')) + else: + yield 'default.css' + + def get_theme_config(self) -> tuple[str, dict]: + return self.config.html_theme, self.config.html_theme_options + + def init_templates(self) -> None: + theme_factory = HTMLThemeFactory(self.app) + themename, themeoptions = self.get_theme_config() + self.theme = theme_factory.create(themename) + self.theme_options = themeoptions.copy() + self.create_template_bridge() + self.templates.init(self, self.theme) + + def init_highlighter(self) -> None: + # determine Pygments style and create the highlighter + if self.config.pygments_style is not None: + style = self.config.pygments_style + elif self.theme: + style = self.theme.get_config('theme', 'pygments_style', 'none') + else: + style = 'sphinx' + self.highlighter = PygmentsBridge('html', style) + + if self.theme: + dark_style = self.theme.get_config('theme', 'pygments_dark_style', None) + else: + dark_style = None + + self.dark_highlighter: PygmentsBridge | None + if dark_style is not None: + self.dark_highlighter = PygmentsBridge('html', dark_style) + self.app.add_css_file('pygments_dark.css', + media='(prefers-color-scheme: dark)', + id='pygments_dark_css') + else: + self.dark_highlighter = None + + @property + def css_files(self) -> list[_CascadingStyleSheet]: + _deprecation_warning(__name__, f'{self.__class__.__name__}.css_files', '', + remove=(9, 0)) + return self._css_files + + def init_css_files(self) -> None: + self._css_files = [] + self.add_css_file('pygments.css', priority=200) + + for filename in self._get_style_filenames(): + self.add_css_file(filename, priority=200) + + for filename, attrs in self.app.registry.css_files: + self.add_css_file(filename, **attrs) + + for filename, attrs in self.get_builder_config('css_files', 'html'): + attrs.setdefault('priority', 800) # User's CSSs are loaded after extensions' + self.add_css_file(filename, **attrs) + + def add_css_file(self, filename: str, **kwargs: Any) -> None: + if '://' not in filename: + filename = posixpath.join('_static', filename) + + if (asset := _CascadingStyleSheet(filename, **kwargs)) not in self._css_files: + self._css_files.append(asset) + + @property + def script_files(self) -> list[_JavaScript]: + _deprecation_warning(__name__, f'{self.__class__.__name__}.script_files', '', + remove=(9, 0)) + return self._js_files + + def init_js_files(self) -> None: + self._js_files = [] + self.add_js_file('documentation_options.js', priority=200) + self.add_js_file('doctools.js', priority=200) + self.add_js_file('sphinx_highlight.js', priority=200) + + for filename, attrs in self.app.registry.js_files: + self.add_js_file(filename or '', **attrs) + + for filename, attrs in self.get_builder_config('js_files', 'html'): + attrs.setdefault('priority', 800) # User's JSs are loaded after extensions' + self.add_js_file(filename or '', **attrs) + + if self._get_translations_js(): + self.add_js_file('translations.js') + + def add_js_file(self, filename: str, **kwargs: Any) -> None: + if filename and '://' not in filename: + filename = posixpath.join('_static', filename) + + if (asset := _JavaScript(filename, **kwargs)) not in self._js_files: + self._js_files.append(asset) + + @property + def math_renderer_name(self) -> str | None: + name = self.get_builder_config('math_renderer', 'html') + if name is not None: + # use given name + return name + else: + # not given: choose a math_renderer from registered ones as possible + renderers = list(self.app.registry.html_inline_math_renderers) + if len(renderers) == 1: + # only default math_renderer (mathjax) is registered + return renderers[0] + elif len(renderers) == 2: + # default and another math_renderer are registered; prior the another + renderers.remove('mathjax') + return renderers[0] + else: + # many math_renderers are registered. can't choose automatically! + return None + + def get_outdated_docs(self) -> Iterator[str]: + try: + with open(path.join(self.outdir, '.buildinfo'), encoding="utf-8") as fp: + buildinfo = BuildInfo.load(fp) + + if self.build_info != buildinfo: + logger.debug('[build target] did not match: build_info ') + yield from self.env.found_docs + return + except ValueError as exc: + logger.warning(__('Failed to read build info file: %r'), exc) + except OSError: + # ignore errors on reading + pass + + if self.templates: + template_mtime = self.templates.newest_template_mtime() + else: + template_mtime = 0 + for docname in self.env.found_docs: + if docname not in self.env.all_docs: + logger.debug('[build target] did not in env: %r', docname) + yield docname + continue + targetname = self.get_outfilename(docname) + try: + targetmtime = path.getmtime(targetname) + except Exception: + targetmtime = 0 + try: + srcmtime = max(path.getmtime(self.env.doc2path(docname)), template_mtime) + if srcmtime > targetmtime: + logger.debug( + '[build target] targetname %r(%s), template(%s), docname %r(%s)', + targetname, + _format_modified_time(targetmtime), + _format_modified_time(template_mtime), + docname, + _format_modified_time(path.getmtime(self.env.doc2path(docname))), + ) + yield docname + except OSError: + # source doesn't exist anymore + pass + + def get_asset_paths(self) -> list[str]: + return self.config.html_extra_path + self.config.html_static_path + + def render_partial(self, node: Node | None) -> dict[str, str]: + """Utility: Render a lone doctree node.""" + if node is None: + return {'fragment': ''} + + doc = new_document('') + doc.append(node) + self._publisher.set_source(doc) + self._publisher.publish() + return self._publisher.writer.parts # type: ignore[union-attr] + + def prepare_writing(self, docnames: set[str]) -> None: + # create the search indexer + self.indexer = None + if self.search: + from sphinx.search import IndexBuilder + lang = self.config.html_search_language or self.config.language + self.indexer = IndexBuilder(self.env, lang, + self.config.html_search_options, + self.config.html_search_scorer) + self.load_indexer(docnames) + + self.docwriter = HTMLWriter(self) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=DeprecationWarning) + # DeprecationWarning: The frontend.OptionParser class will be replaced + # by a subclass of argparse.ArgumentParser in Docutils 0.21 or later. + self.docsettings: Any = OptionParser( + defaults=self.env.settings, + components=(self.docwriter,), + read_config_files=True).get_default_values() + self.docsettings.compact_lists = bool(self.config.html_compact_lists) + + # determine the additional indices to include + self.domain_indices = [] + # html_domain_indices can be False/True or a list of index names + indices_config = self.config.html_domain_indices + if indices_config: + for domain_name in sorted(self.env.domains): + domain: Domain = self.env.domains[domain_name] + for indexcls in domain.indices: + indexname = f'{domain.name}-{indexcls.name}' + if isinstance(indices_config, list): + if indexname not in indices_config: + continue + content, collapse = indexcls(domain).generate() + if content: + self.domain_indices.append( + (indexname, indexcls, content, collapse)) + + # format the "last updated on" string, only once is enough since it + # typically doesn't include the time of day + self.last_updated: str | None + lufmt = self.config.html_last_updated_fmt + if lufmt is not None: + self.last_updated = format_date(lufmt or _('%b %d, %Y'), + language=self.config.language) + else: + self.last_updated = None + + # If the logo or favicon are urls, keep them as-is, otherwise + # strip the relative path as the files will be copied into _static. + logo = self.config.html_logo or '' + favicon = self.config.html_favicon or '' + + if not isurl(logo): + logo = path.basename(logo) + if not isurl(favicon): + favicon = path.basename(favicon) + + self.relations = self.env.collect_relations() + + rellinks: list[tuple[str, str, str, str]] = [] + if self.use_index: + rellinks.append(('genindex', _('General Index'), 'I', _('index'))) + for indexname, indexcls, _content, _collapse in self.domain_indices: + # if it has a short name + if indexcls.shortname: + rellinks.append((indexname, indexcls.localname, + '', indexcls.shortname)) + + # add assets registered after ``Builder.init()``. + for css_filename, attrs in self.app.registry.css_files: + self.add_css_file(css_filename, **attrs) + for js_filename, attrs in self.app.registry.js_files: + self.add_js_file(js_filename or '', **attrs) + + # back up _css_files and _js_files to allow adding CSS/JS files to a specific page. + self._orig_css_files = list(dict.fromkeys(self._css_files)) + self._orig_js_files = list(dict.fromkeys(self._js_files)) + styles = list(self._get_style_filenames()) + + self.globalcontext = { + 'embedded': self.embedded, + 'project': self.config.project, + 'release': return_codes_re.sub('', self.config.release), + 'version': self.config.version, + 'last_updated': self.last_updated, + 'copyright': self.config.copyright, + 'master_doc': self.config.root_doc, + 'root_doc': self.config.root_doc, + 'use_opensearch': self.config.html_use_opensearch, + 'docstitle': self.config.html_title, + 'shorttitle': self.config.html_short_title, + 'show_copyright': self.config.html_show_copyright, + 'show_search_summary': self.config.html_show_search_summary, + 'show_sphinx': self.config.html_show_sphinx, + 'has_source': self.config.html_copy_source, + 'show_source': self.config.html_show_sourcelink, + 'sourcelink_suffix': self.config.html_sourcelink_suffix, + 'file_suffix': self.out_suffix, + 'link_suffix': self.link_suffix, + 'script_files': self._js_files, + 'language': convert_locale_to_language_tag(self.config.language), + 'css_files': self._css_files, + 'sphinx_version': __display_version__, + 'sphinx_version_tuple': sphinx_version, + 'docutils_version_info': docutils.__version_info__[:5], + 'styles': styles, + 'rellinks': rellinks, + 'builder': self.name, + 'parents': [], + 'logo_url': logo, + 'favicon_url': favicon, + 'html5_doctype': True, + } + if self.theme: + self.globalcontext.update( + ('theme_' + key, val) for (key, val) in + self.theme.get_options(self.theme_options).items()) + self.globalcontext.update(self.config.html_context) + + def get_doc_context(self, docname: str, body: str, metatags: str) -> dict[str, Any]: + """Collect items for the template context of a page.""" + # find out relations + prev = next = None + parents = [] + rellinks = self.globalcontext['rellinks'][:] + related = self.relations.get(docname) + titles = self.env.titles + if related and related[2]: + try: + next = { + 'link': self.get_relative_uri(docname, related[2]), + 'title': self.render_partial(titles[related[2]])['title'], + } + rellinks.append((related[2], next['title'], 'N', _('next'))) + except KeyError: + next = None + if related and related[1]: + try: + prev = { + 'link': self.get_relative_uri(docname, related[1]), + 'title': self.render_partial(titles[related[1]])['title'], + } + rellinks.append((related[1], prev['title'], 'P', _('previous'))) + except KeyError: + # the relation is (somehow) not in the TOC tree, handle + # that gracefully + prev = None + while related and related[0]: + with contextlib.suppress(KeyError): + parents.append( + {'link': self.get_relative_uri(docname, related[0]), + 'title': self.render_partial(titles[related[0]])['title']}) + + related = self.relations.get(related[0]) + if parents: + # remove link to the master file; we have a generic + # "back to index" link already + parents.pop() + parents.reverse() + + # title rendered as HTML + title_node = self.env.longtitles.get(docname) + title = self.render_partial(title_node)['title'] if title_node else '' + + # Suffix for the document + source_suffix = self.env.doc2path(docname, False)[len(docname):] + + # the name for the copied source + if self.config.html_copy_source: + sourcename = docname + source_suffix + if source_suffix != self.config.html_sourcelink_suffix: + sourcename += self.config.html_sourcelink_suffix + else: + sourcename = '' + + # metadata for the document + meta = self.env.metadata.get(docname) + + # local TOC and global TOC tree + self_toc = document_toc(self.env, docname, self.tags) + toc = self.render_partial(self_toc)['fragment'] + + return { + 'parents': parents, + 'prev': prev, + 'next': next, + 'title': title, + 'meta': meta, + 'body': body, + 'metatags': metatags, + 'rellinks': rellinks, + 'sourcename': sourcename, + 'toc': toc, + # only display a TOC if there's more than one item to show + 'display_toc': (self.env.toc_num_entries[docname] > 1), + 'page_source_suffix': source_suffix, + } + + def copy_assets(self) -> None: + self.finish_tasks.add_task(self.copy_download_files) + self.finish_tasks.add_task(self.copy_static_files) + self.finish_tasks.add_task(self.copy_extra_files) + self.finish_tasks.join() + + def write_doc(self, docname: str, doctree: nodes.document) -> None: + destination = StringOutput(encoding='utf-8') + doctree.settings = self.docsettings + + self.secnumbers = self.env.toc_secnumbers.get(docname, {}) + self.fignumbers = self.env.toc_fignumbers.get(docname, {}) + self.imgpath = relative_uri(self.get_target_uri(docname), '_images') + self.dlpath = relative_uri(self.get_target_uri(docname), '_downloads') + self.current_docname = docname + self.docwriter.write(doctree, destination) + self.docwriter.assemble_parts() + body = self.docwriter.parts['fragment'] + metatags = self.docwriter.clean_meta + + ctx = self.get_doc_context(docname, body, metatags) + self.handle_page(docname, ctx, event_arg=doctree) + + def write_doc_serialized(self, docname: str, doctree: nodes.document) -> None: + self.imgpath = relative_uri(self.get_target_uri(docname), self.imagedir) + self.post_process_images(doctree) + title_node = self.env.longtitles.get(docname) + title = self.render_partial(title_node)['title'] if title_node else '' + self.index_page(docname, doctree, title) + + def finish(self) -> None: + self.finish_tasks.add_task(self.gen_indices) + self.finish_tasks.add_task(self.gen_pages_from_extensions) + self.finish_tasks.add_task(self.gen_additional_pages) + self.finish_tasks.add_task(self.copy_image_files) + self.finish_tasks.add_task(self.write_buildinfo) + + # dump the search index + self.handle_finish() + + @progress_message(__('generating indices')) + def gen_indices(self) -> None: + # the global general index + if self.use_index: + self.write_genindex() + + # the global domain-specific indices + self.write_domain_indices() + + def gen_pages_from_extensions(self) -> None: + # pages from extensions + for pagelist in self.events.emit('html-collect-pages'): + for pagename, context, template in pagelist: + self.handle_page(pagename, context, template) + + @progress_message(__('writing additional pages')) + def gen_additional_pages(self) -> None: + # additional pages from conf.py + for pagename, template in self.config.html_additional_pages.items(): + logger.info(pagename + ' ', nonl=True) + self.handle_page(pagename, {}, template) + + # the search page + if self.search: + logger.info('search ', nonl=True) + self.handle_page('search', {}, 'search.html') + + # the opensearch xml file + if self.config.html_use_opensearch and self.search: + logger.info('opensearch ', nonl=True) + fn = path.join(self.outdir, '_static', 'opensearch.xml') + self.handle_page('opensearch', {}, 'opensearch.xml', outfilename=fn) + + def write_genindex(self) -> None: + # the total count of lines for each index letter, used to distribute + # the entries into two columns + genindex = IndexEntries(self.env).create_index(self) + indexcounts = [] + for _k, entries in genindex: + indexcounts.append(sum(1 + len(subitems) + for _, (_, subitems, _) in entries)) + + genindexcontext = { + 'genindexentries': genindex, + 'genindexcounts': indexcounts, + 'split_index': self.config.html_split_index, + } + logger.info('genindex ', nonl=True) + + if self.config.html_split_index: + self.handle_page('genindex', genindexcontext, + 'genindex-split.html') + self.handle_page('genindex-all', genindexcontext, + 'genindex.html') + for (key, entries), count in zip(genindex, indexcounts): + ctx = {'key': key, 'entries': entries, 'count': count, + 'genindexentries': genindex} + self.handle_page('genindex-' + key, ctx, + 'genindex-single.html') + else: + self.handle_page('genindex', genindexcontext, 'genindex.html') + + def write_domain_indices(self) -> None: + for indexname, indexcls, content, collapse in self.domain_indices: + indexcontext = { + 'indextitle': indexcls.localname, + 'content': content, + 'collapse_index': collapse, + } + logger.info(indexname + ' ', nonl=True) + self.handle_page(indexname, indexcontext, 'domainindex.html') + + def copy_image_files(self) -> None: + if self.images: + stringify_func = ImageAdapter(self.app.env).get_original_image_uri + ensuredir(path.join(self.outdir, self.imagedir)) + for src in status_iterator(self.images, __('copying images... '), "brown", + len(self.images), self.app.verbosity, + stringify_func=stringify_func): + dest = self.images[src] + try: + copyfile(path.join(self.srcdir, src), + path.join(self.outdir, self.imagedir, dest)) + except Exception as err: + logger.warning(__('cannot copy image file %r: %s'), + path.join(self.srcdir, src), err) + + def copy_download_files(self) -> None: + def to_relpath(f: str) -> str: + return relative_path(self.srcdir, f) # type: ignore[arg-type] + + # copy downloadable files + if self.env.dlfiles: + ensuredir(path.join(self.outdir, '_downloads')) + for src in status_iterator(self.env.dlfiles, __('copying downloadable files... '), + "brown", len(self.env.dlfiles), self.app.verbosity, + stringify_func=to_relpath): + try: + dest = path.join(self.outdir, '_downloads', self.env.dlfiles[src][1]) + ensuredir(path.dirname(dest)) + copyfile(path.join(self.srcdir, src), dest) + except OSError as err: + logger.warning(__('cannot copy downloadable file %r: %s'), + path.join(self.srcdir, src), err) + + def create_pygments_style_file(self) -> None: + """create a style file for pygments.""" + with open(path.join(self.outdir, '_static', 'pygments.css'), 'w', + encoding="utf-8") as f: + f.write(self.highlighter.get_stylesheet()) + + if self.dark_highlighter: + with open(path.join(self.outdir, '_static', 'pygments_dark.css'), 'w', + encoding="utf-8") as f: + f.write(self.dark_highlighter.get_stylesheet()) + + def copy_translation_js(self) -> None: + """Copy a JavaScript file for translations.""" + jsfile = self._get_translations_js() + if jsfile: + copyfile(jsfile, path.join(self.outdir, '_static', 'translations.js')) + + def copy_stemmer_js(self) -> None: + """Copy a JavaScript file for stemmer.""" + if self.indexer is not None: + if hasattr(self.indexer, 'get_js_stemmer_rawcodes'): + for jsfile in self.indexer.get_js_stemmer_rawcodes(): + copyfile(jsfile, path.join(self.outdir, '_static', path.basename(jsfile))) + else: + if js_stemmer_rawcode := self.indexer.get_js_stemmer_rawcode(): + copyfile(js_stemmer_rawcode, + path.join(self.outdir, '_static', '_stemmer.js')) + + def copy_theme_static_files(self, context: dict[str, Any]) -> None: + def onerror(filename: str, error: Exception) -> None: + logger.warning(__('Failed to copy a file in html_static_file: %s: %r'), + filename, error) + + if self.theme: + for entry in self.theme.get_theme_dirs()[::-1]: + copy_asset(path.join(entry, 'static'), + path.join(self.outdir, '_static'), + excluded=DOTFILES, context=context, + renderer=self.templates, onerror=onerror) + + def copy_html_static_files(self, context: dict) -> None: + def onerror(filename: str, error: Exception) -> None: + logger.warning(__('Failed to copy a file in html_static_file: %s: %r'), + filename, error) + + excluded = Matcher(self.config.exclude_patterns + ["**/.*"]) + for entry in self.config.html_static_path: + copy_asset(path.join(self.confdir, entry), + path.join(self.outdir, '_static'), + excluded, context=context, renderer=self.templates, onerror=onerror) + + def copy_html_logo(self) -> None: + if self.config.html_logo and not isurl(self.config.html_logo): + copy_asset(path.join(self.confdir, self.config.html_logo), + path.join(self.outdir, '_static')) + + def copy_html_favicon(self) -> None: + if self.config.html_favicon and not isurl(self.config.html_favicon): + copy_asset(path.join(self.confdir, self.config.html_favicon), + path.join(self.outdir, '_static')) + + def copy_static_files(self) -> None: + try: + with progress_message(__('copying static files')): + ensuredir(path.join(self.outdir, '_static')) + + # prepare context for templates + context = self.globalcontext.copy() + if self.indexer is not None: + context.update(self.indexer.context_for_searchtool()) + + self.create_pygments_style_file() + self.copy_translation_js() + self.copy_stemmer_js() + self.copy_theme_static_files(context) + self.copy_html_static_files(context) + self.copy_html_logo() + self.copy_html_favicon() + except OSError as err: + logger.warning(__('cannot copy static file %r'), err) + + def copy_extra_files(self) -> None: + """copy html_extra_path files.""" + try: + with progress_message(__('copying extra files')): + excluded = Matcher(self.config.exclude_patterns) + for extra_path in self.config.html_extra_path: + entry = path.join(self.confdir, extra_path) + copy_asset(entry, self.outdir, excluded) + except OSError as err: + logger.warning(__('cannot copy extra file %r'), err) + + def write_buildinfo(self) -> None: + try: + with open(path.join(self.outdir, '.buildinfo'), 'w', encoding="utf-8") as fp: + self.build_info.dump(fp) + except OSError as exc: + logger.warning(__('Failed to write build info file: %r'), exc) + + def cleanup(self) -> None: + # clean up theme stuff + if self.theme: + self.theme.cleanup() + + def post_process_images(self, doctree: Node) -> None: + """Pick the best candidate for an image and link down-scaled images to + their high res version. + """ + super().post_process_images(doctree) + + if self.config.html_scaled_image_link and self.html_scaled_image_link: + for node in doctree.findall(nodes.image): + if not any((key in node) for key in ['scale', 'width', 'height']): + # resizing options are not given. scaled image link is available + # only for resized images. + continue + if isinstance(node.parent, nodes.reference): + # A image having hyperlink target + continue + if 'no-scaled-link' in node['classes']: + # scaled image link is disabled for this node + continue + + uri = node['uri'] + reference = nodes.reference('', '', internal=True) + if uri in self.images: + reference['refuri'] = posixpath.join(self.imgpath, + self.images[uri]) + else: + reference['refuri'] = uri + node.replace_self(reference) + reference.append(node) + + def load_indexer(self, docnames: Iterable[str]) -> None: + assert self.indexer is not None + keep = set(self.env.all_docs) - set(docnames) + try: + searchindexfn = path.join(self.outdir, self.searchindex_filename) + if self.indexer_dumps_unicode: + with open(searchindexfn, encoding='utf-8') as ft: + self.indexer.load(ft, self.indexer_format) + else: + with open(searchindexfn, 'rb') as fb: + self.indexer.load(fb, self.indexer_format) + except (OSError, ValueError): + if keep: + logger.warning(__("search index couldn't be loaded, but not all " + 'documents will be built: the index will be ' + 'incomplete.')) + # delete all entries for files that will be rebuilt + self.indexer.prune(keep) + + def index_page(self, pagename: str, doctree: nodes.document, title: str) -> None: + # only index pages with title + if self.indexer is not None and title: + filename = self.env.doc2path(pagename, base=False) + metadata = self.env.metadata.get(pagename, {}) + if 'nosearch' in metadata: + self.indexer.feed(pagename, filename, '', new_document('')) + else: + self.indexer.feed(pagename, filename, title, doctree) + + def _get_local_toctree(self, docname: str, collapse: bool = True, **kwargs: Any) -> str: + if 'includehidden' not in kwargs: + kwargs['includehidden'] = False + if kwargs.get('maxdepth') == '': + kwargs.pop('maxdepth') + toctree = global_toctree_for_doc(self.env, docname, self, collapse=collapse, **kwargs) + return self.render_partial(toctree)['fragment'] + + def get_outfilename(self, pagename: str) -> str: + return path.join(self.outdir, os_path(pagename) + self.out_suffix) + + def add_sidebars(self, pagename: str, ctx: dict) -> None: + def has_wildcard(pattern: str) -> bool: + return any(char in pattern for char in '*?[') + + sidebars = None + matched = None + customsidebar = None + + # default sidebars settings for selected theme + if self.theme.name == 'alabaster': + # provide default settings for alabaster (for compatibility) + # Note: this will be removed before Sphinx-2.0 + try: + # get default sidebars settings from alabaster (if defined) + theme_default_sidebars = self.theme.config.get('theme', 'sidebars') + if theme_default_sidebars: + sidebars = [name.strip() for name in theme_default_sidebars.split(',')] + except Exception: + # fallback to better default settings + sidebars = ['about.html', 'navigation.html', 'relations.html', + 'searchbox.html', 'donate.html'] + else: + theme_default_sidebars = self.theme.get_config('theme', 'sidebars', None) + if theme_default_sidebars: + sidebars = [name.strip() for name in theme_default_sidebars.split(',')] + + # user sidebar settings + html_sidebars = self.get_builder_config('sidebars', 'html') + for pattern, patsidebars in html_sidebars.items(): + if patmatch(pagename, pattern): + if matched: + if has_wildcard(pattern): + # warn if both patterns contain wildcards + if has_wildcard(matched): + logger.warning(__('page %s matches two patterns in ' + 'html_sidebars: %r and %r'), + pagename, matched, pattern) + # else the already matched pattern is more specific + # than the present one, because it contains no wildcard + continue + matched = pattern + sidebars = patsidebars + + if sidebars is None: + # keep defaults + pass + + ctx['sidebars'] = sidebars + ctx['customsidebar'] = customsidebar + + # --------- these are overwritten by the serialization builder + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + return quote(docname) + self.link_suffix + + def handle_page(self, pagename: str, addctx: dict, templatename: str = 'page.html', + outfilename: str | None = None, event_arg: Any = None) -> None: + ctx = self.globalcontext.copy() + # current_page_name is backwards compatibility + ctx['pagename'] = ctx['current_page_name'] = pagename + ctx['encoding'] = self.config.html_output_encoding + default_baseuri = self.get_target_uri(pagename) + # in the singlehtml builder, default_baseuri still contains an #anchor + # part, which relative_uri doesn't really like... + default_baseuri = default_baseuri.rsplit('#', 1)[0] + + if self.config.html_baseurl: + ctx['pageurl'] = posixpath.join(self.config.html_baseurl, + pagename + self.out_suffix) + else: + ctx['pageurl'] = None + + def pathto( + otheruri: str, resource: bool = False, baseuri: str = default_baseuri, + ) -> str: + if resource and '://' in otheruri: + # allow non-local resources given by scheme + return otheruri + elif not resource: + otheruri = self.get_target_uri(otheruri) + uri = relative_uri(baseuri, otheruri) or '#' + if uri == '#' and not self.allow_sharp_as_current_path: + uri = baseuri + return uri + ctx['pathto'] = pathto + + def hasdoc(name: str) -> bool: + if name in self.env.all_docs: + return True + if name == 'search' and self.search: + return True + if name == 'genindex' and self.get_builder_config('use_index', 'html'): + return True + return False + ctx['hasdoc'] = hasdoc + + ctx['toctree'] = lambda **kwargs: self._get_local_toctree(pagename, **kwargs) + self.add_sidebars(pagename, ctx) + ctx.update(addctx) + + # 'blah.html' should have content_root = './' not ''. + ctx['content_root'] = (f'..{SEP}' * default_baseuri.count(SEP)) or f'.{SEP}' + + outdir = self.app.outdir + + def css_tag(css: _CascadingStyleSheet) -> str: + attrs = [] + for key, value in css.attributes.items(): + if value is not None: + attrs.append(f'{key}="{html.escape(value, quote=True)}"') + uri = pathto(os.fspath(css.filename), resource=True) + if checksum := _file_checksum(outdir, css.filename): + uri += f'?v={checksum}' + return f'' + + ctx['css_tag'] = css_tag + + def js_tag(js: _JavaScript | str) -> str: + if not isinstance(js, _JavaScript): + # str value (old styled) + return f'' + + attrs = [] + body = js.attributes.get('body', '') + for key, value in js.attributes.items(): + if key == 'body': + continue + if value is not None: + attrs.append(f'{key}="{html.escape(value, quote=True)}"') + + if not js.filename: + if attrs: + return f'' + return f'' + + uri = pathto(os.fspath(js.filename), resource=True) + if 'MathJax.js?' in os.fspath(js.filename): + # MathJax v2 reads a ``?config=...`` query parameter, + # special case this and just skip adding the checksum. + # https://docs.mathjax.org/en/v2.7-latest/configuration.html#considerations-for-using-combined-configuration-files + # https://github.com/sphinx-doc/sphinx/issues/11658 + pass + elif checksum := _file_checksum(outdir, js.filename): + uri += f'?v={checksum}' + if attrs: + return f'' + return f'' + + ctx['js_tag'] = js_tag + + # revert _css_files and _js_files + self._css_files[:] = self._orig_css_files + self._js_files[:] = self._orig_js_files + + self.update_page_context(pagename, templatename, ctx, event_arg) + newtmpl = self.app.emit_firstresult('html-page-context', pagename, + templatename, ctx, event_arg) + if newtmpl: + templatename = newtmpl + + # sort JS/CSS before rendering HTML + try: # NoQA: SIM105 + # Convert script_files to list to support non-list script_files (refs: #8889) + ctx['script_files'] = sorted(ctx['script_files'], key=lambda js: js.priority) + except AttributeError: + # Skip sorting if users modifies script_files directly (maybe via `html_context`). + # refs: #8885 + # + # Note: priority sorting feature will not work in this case. + pass + + with contextlib.suppress(AttributeError): + ctx['css_files'] = sorted(ctx['css_files'], key=lambda css: css.priority) + + try: + output = self.templates.render(templatename, ctx) + except UnicodeError: + logger.warning(__("a Unicode error occurred when rendering the page %s. " + "Please make sure all config values that contain " + "non-ASCII content are Unicode strings."), pagename) + return + except Exception as exc: + raise ThemeError(__("An error happened in rendering the page %s.\nReason: %r") % + (pagename, exc)) from exc + + if not outfilename: + outfilename = self.get_outfilename(pagename) + # outfilename's path is in general different from self.outdir + ensuredir(path.dirname(outfilename)) + try: + with open(outfilename, 'w', encoding=ctx['encoding'], + errors='xmlcharrefreplace') as f: + f.write(output) + except OSError as err: + logger.warning(__("error writing file %s: %s"), outfilename, err) + if self.copysource and ctx.get('sourcename'): + # copy the source file for the "show source" link + source_name = path.join(self.outdir, '_sources', + os_path(ctx['sourcename'])) + ensuredir(path.dirname(source_name)) + copyfile(self.env.doc2path(pagename), source_name) + + def update_page_context(self, pagename: str, templatename: str, + ctx: dict, event_arg: Any) -> None: + pass + + def handle_finish(self) -> None: + self.finish_tasks.add_task(self.dump_search_index) + self.finish_tasks.add_task(self.dump_inventory) + + @progress_message(__('dumping object inventory')) + def dump_inventory(self) -> None: + InventoryFile.dump(path.join(self.outdir, INVENTORY_FILENAME), self.env, self) + + def dump_search_index(self) -> None: + if self.indexer is None: + return + + with progress_message(__('dumping search index in %s') % self.indexer.label()): + self.indexer.prune(self.env.all_docs) + searchindexfn = path.join(self.outdir, self.searchindex_filename) + # first write to a temporary file, so that if dumping fails, + # the existing index won't be overwritten + if self.indexer_dumps_unicode: + with open(searchindexfn + '.tmp', 'w', encoding='utf-8') as ft: + self.indexer.dump(ft, self.indexer_format) + else: + with open(searchindexfn + '.tmp', 'wb') as fb: + self.indexer.dump(fb, self.indexer_format) + os.replace(searchindexfn + '.tmp', searchindexfn) + + +def convert_html_css_files(app: Sphinx, config: Config) -> None: + """This converts string styled html_css_files to tuple styled one.""" + html_css_files: list[tuple[str, dict]] = [] + for entry in config.html_css_files: + if isinstance(entry, str): + html_css_files.append((entry, {})) + else: + try: + filename, attrs = entry + html_css_files.append((filename, attrs)) + except Exception: + logger.warning(__('invalid css_file: %r, ignored'), entry) + continue + + config.html_css_files = html_css_files # type: ignore[attr-defined] + + +def _format_modified_time(timestamp: float) -> str: + """Return an RFC 3339 formatted string representing the given timestamp.""" + seconds, fraction = divmod(timestamp, 1) + return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(seconds)) + f'.{fraction:.3f}' + + +def convert_html_js_files(app: Sphinx, config: Config) -> None: + """This converts string styled html_js_files to tuple styled one.""" + html_js_files: list[tuple[str, dict]] = [] + for entry in config.html_js_files: + if isinstance(entry, str): + html_js_files.append((entry, {})) + else: + try: + filename, attrs = entry + html_js_files.append((filename, attrs)) + except Exception: + logger.warning(__('invalid js_file: %r, ignored'), entry) + continue + + config.html_js_files = html_js_files # type: ignore[attr-defined] + + +def setup_resource_paths(app: Sphinx, pagename: str, templatename: str, + context: dict, doctree: Node) -> None: + """Set up relative resource paths.""" + pathto = context['pathto'] + + # favicon_url + favicon_url = context.get('favicon_url') + if favicon_url and not isurl(favicon_url): + context['favicon_url'] = pathto('_static/' + favicon_url, resource=True) + + # logo_url + logo_url = context.get('logo_url') + if logo_url and not isurl(logo_url): + context['logo_url'] = pathto('_static/' + logo_url, resource=True) + + +def validate_math_renderer(app: Sphinx) -> None: + if app.builder.format != 'html': + return + + name = app.builder.math_renderer_name # type: ignore[attr-defined] + if name is None: + raise ConfigError(__('Many math_renderers are registered. ' + 'But no math_renderer is selected.')) + if name not in app.registry.html_inline_math_renderers: + raise ConfigError(__('Unknown math_renderer %r is given.') % name) + + +def validate_html_extra_path(app: Sphinx, config: Config) -> None: + """Check html_extra_paths setting.""" + for entry in config.html_extra_path[:]: + extra_path = path.normpath(path.join(app.confdir, entry)) + if not path.exists(extra_path): + logger.warning(__('html_extra_path entry %r does not exist'), entry) + config.html_extra_path.remove(entry) + elif (path.splitdrive(app.outdir)[0] == path.splitdrive(extra_path)[0] and + path.commonpath((app.outdir, extra_path)) == path.normpath(app.outdir)): + logger.warning(__('html_extra_path entry %r is placed inside outdir'), entry) + config.html_extra_path.remove(entry) + + +def validate_html_static_path(app: Sphinx, config: Config) -> None: + """Check html_static_paths setting.""" + for entry in config.html_static_path[:]: + static_path = path.normpath(path.join(app.confdir, entry)) + if not path.exists(static_path): + logger.warning(__('html_static_path entry %r does not exist'), entry) + config.html_static_path.remove(entry) + elif (path.splitdrive(app.outdir)[0] == path.splitdrive(static_path)[0] and + path.commonpath((app.outdir, static_path)) == path.normpath(app.outdir)): + logger.warning(__('html_static_path entry %r is placed inside outdir'), entry) + config.html_static_path.remove(entry) + + +def validate_html_logo(app: Sphinx, config: Config) -> None: + """Check html_logo setting.""" + if (config.html_logo and + not path.isfile(path.join(app.confdir, config.html_logo)) and + not isurl(config.html_logo)): + logger.warning(__('logo file %r does not exist'), config.html_logo) + config.html_logo = None # type: ignore[attr-defined] + + +def validate_html_favicon(app: Sphinx, config: Config) -> None: + """Check html_favicon setting.""" + if (config.html_favicon and + not path.isfile(path.join(app.confdir, config.html_favicon)) and + not isurl(config.html_favicon)): + logger.warning(__('favicon file %r does not exist'), config.html_favicon) + config.html_favicon = None # type: ignore[attr-defined] + + +def error_on_html_4(_app: Sphinx, config: Config) -> None: + """Error on HTML 4.""" + if config.html4_writer: + raise ConfigError(_( + 'HTML 4 is no longer supported by Sphinx. ' + '("html4_writer=True" detected in configuration options)', + )) + + +def setup(app: Sphinx) -> dict[str, Any]: + # builders + app.add_builder(StandaloneHTMLBuilder) + + # config values + app.add_config_value('html_theme', 'alabaster', 'html') + app.add_config_value('html_theme_path', [], 'html') + app.add_config_value('html_theme_options', {}, 'html') + app.add_config_value('html_title', + lambda self: _('%s %s documentation') % (self.project, self.release), + 'html', [str]) + app.add_config_value('html_short_title', lambda self: self.html_title, 'html') + app.add_config_value('html_style', None, 'html', [list, str]) + app.add_config_value('html_logo', None, 'html', [str]) + app.add_config_value('html_favicon', None, 'html', [str]) + app.add_config_value('html_css_files', [], 'html') + app.add_config_value('html_js_files', [], 'html') + app.add_config_value('html_static_path', [], 'html') + app.add_config_value('html_extra_path', [], 'html') + app.add_config_value('html_last_updated_fmt', None, 'html', [str]) + app.add_config_value('html_sidebars', {}, 'html') + app.add_config_value('html_additional_pages', {}, 'html') + app.add_config_value('html_domain_indices', True, 'html', [list]) + app.add_config_value('html_permalinks', True, 'html') + app.add_config_value('html_permalinks_icon', '¶', 'html') + app.add_config_value('html_use_index', True, 'html') + app.add_config_value('html_split_index', False, 'html') + app.add_config_value('html_copy_source', True, 'html') + app.add_config_value('html_show_sourcelink', True, 'html') + app.add_config_value('html_sourcelink_suffix', '.txt', 'html') + app.add_config_value('html_use_opensearch', '', 'html') + app.add_config_value('html_file_suffix', None, 'html', [str]) + app.add_config_value('html_link_suffix', None, 'html', [str]) + app.add_config_value('html_show_copyright', True, 'html') + app.add_config_value('html_show_search_summary', True, 'html') + app.add_config_value('html_show_sphinx', True, 'html') + app.add_config_value('html_context', {}, 'html') + app.add_config_value('html_output_encoding', 'utf-8', 'html') + app.add_config_value('html_compact_lists', True, 'html') + app.add_config_value('html_secnumber_suffix', '. ', 'html') + app.add_config_value('html_search_language', None, 'html', [str]) + app.add_config_value('html_search_options', {}, 'html') + app.add_config_value('html_search_scorer', '', '') + app.add_config_value('html_scaled_image_link', True, 'html') + app.add_config_value('html_baseurl', '', 'html') + app.add_config_value('html_codeblock_linenos_style', 'inline', 'html', # RemovedInSphinx70Warning # noqa: E501 + ENUM('table', 'inline')) + app.add_config_value('html_math_renderer', None, 'env') + app.add_config_value('html4_writer', False, 'html') + + # events + app.add_event('html-collect-pages') + app.add_event('html-page-context') + + # event handlers + app.connect('config-inited', convert_html_css_files, priority=800) + app.connect('config-inited', convert_html_js_files, priority=800) + app.connect('config-inited', validate_html_extra_path, priority=800) + app.connect('config-inited', validate_html_static_path, priority=800) + app.connect('config-inited', validate_html_logo, priority=800) + app.connect('config-inited', validate_html_favicon, priority=800) + app.connect('config-inited', error_on_html_4, priority=800) + app.connect('builder-inited', validate_math_renderer) + app.connect('html-page-context', setup_resource_paths) + + # load default math renderer + app.setup_extension('sphinx.ext.mathjax') + + # load transforms for HTML builder + app.setup_extension('sphinx.builders.html.transforms') + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } + + +# deprecated name -> (object to return, canonical path or empty string) +_DEPRECATED_OBJECTS = { + 'Stylesheet': (_CascadingStyleSheet, 'sphinx.builders.html._assets._CascadingStyleSheet', (9, 0)), # NoQA: E501 + 'JavaScript': (_JavaScript, 'sphinx.builders.html._assets._JavaScript', (9, 0)), +} + + +def __getattr__(name): + if name not in _DEPRECATED_OBJECTS: + msg = f'module {__name__!r} has no attribute {name!r}' + raise AttributeError(msg) + + from sphinx.deprecation import _deprecation_warning + + deprecated_object, canonical_name, remove = _DEPRECATED_OBJECTS[name] + _deprecation_warning(__name__, name, canonical_name, remove=remove) + return deprecated_object diff --git a/sphinx/builders/html/_assets.py b/sphinx/builders/html/_assets.py new file mode 100644 index 0000000..a72c500 --- /dev/null +++ b/sphinx/builders/html/_assets.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +import os +import warnings +import zlib +from typing import TYPE_CHECKING + +from sphinx.deprecation import RemovedInSphinx90Warning +from sphinx.errors import ThemeError + +if TYPE_CHECKING: + from pathlib import Path + + +class _CascadingStyleSheet: + filename: str | os.PathLike[str] + priority: int + attributes: dict[str, str] + + def __init__( + self, + filename: str | os.PathLike[str], /, *, + priority: int = 500, + rel: str = 'stylesheet', + type: str = 'text/css', + **attributes: str, + ) -> None: + object.__setattr__(self, 'filename', filename) + object.__setattr__(self, 'priority', priority) + object.__setattr__(self, 'attributes', {'rel': rel, 'type': type, **attributes}) + + def __str__(self): + attr = ', '.join(f'{k}={v!r}' for k, v in self.attributes.items()) + return (f'{self.__class__.__name__}({self.filename!r}, ' + f'priority={self.priority}, ' + f'{attr})') + + def __eq__(self, other): + if isinstance(other, str): + warnings.warn('The str interface for _CascadingStyleSheet objects is deprecated. ' + 'Use css.filename instead.', RemovedInSphinx90Warning, stacklevel=2) + return self.filename == other + if not isinstance(other, _CascadingStyleSheet): + return NotImplemented + return (self.filename == other.filename + and self.priority == other.priority + and self.attributes == other.attributes) + + def __hash__(self): + return hash((self.filename, self.priority, *sorted(self.attributes.items()))) + + def __setattr__(self, key, value): + msg = f'{self.__class__.__name__} is immutable' + raise AttributeError(msg) + + def __delattr__(self, key): + msg = f'{self.__class__.__name__} is immutable' + raise AttributeError(msg) + + def __getattr__(self, key): + warnings.warn('The str interface for _CascadingStyleSheet objects is deprecated. ' + 'Use css.filename instead.', RemovedInSphinx90Warning, stacklevel=2) + return getattr(os.fspath(self.filename), key) + + def __getitem__(self, key): + warnings.warn('The str interface for _CascadingStyleSheet objects is deprecated. ' + 'Use css.filename instead.', RemovedInSphinx90Warning, stacklevel=2) + return os.fspath(self.filename)[key] + + +class _JavaScript: + filename: str | os.PathLike[str] + priority: int + attributes: dict[str, str] + + def __init__( + self, + filename: str | os.PathLike[str], /, *, + priority: int = 500, + **attributes: str, + ) -> None: + object.__setattr__(self, 'filename', filename) + object.__setattr__(self, 'priority', priority) + object.__setattr__(self, 'attributes', attributes) + + def __str__(self): + attr = '' + if self.attributes: + attr = ', ' + ', '.join(f'{k}={v!r}' for k, v in self.attributes.items()) + return (f'{self.__class__.__name__}({self.filename!r}, ' + f'priority={self.priority}' + f'{attr})') + + def __eq__(self, other): + if isinstance(other, str): + warnings.warn('The str interface for _JavaScript objects is deprecated. ' + 'Use js.filename instead.', RemovedInSphinx90Warning, stacklevel=2) + return self.filename == other + if not isinstance(other, _JavaScript): + return NotImplemented + return (self.filename == other.filename + and self.priority == other.priority + and self.attributes == other.attributes) + + def __hash__(self): + return hash((self.filename, self.priority, *sorted(self.attributes.items()))) + + def __setattr__(self, key, value): + msg = f'{self.__class__.__name__} is immutable' + raise AttributeError(msg) + + def __delattr__(self, key): + msg = f'{self.__class__.__name__} is immutable' + raise AttributeError(msg) + + def __getattr__(self, key): + warnings.warn('The str interface for _JavaScript objects is deprecated. ' + 'Use js.filename instead.', RemovedInSphinx90Warning, stacklevel=2) + return getattr(os.fspath(self.filename), key) + + def __getitem__(self, key): + warnings.warn('The str interface for _JavaScript objects is deprecated. ' + 'Use js.filename instead.', RemovedInSphinx90Warning, stacklevel=2) + return os.fspath(self.filename)[key] + + +def _file_checksum(outdir: Path, filename: str | os.PathLike[str]) -> str: + filename = os.fspath(filename) + # Don't generate checksums for HTTP URIs + if '://' in filename: + return '' + # Some themes and extensions have used query strings + # for a similar asset checksum feature. + # As we cannot safely strip the query string, + # raise an error to the user. + if '?' in filename: + msg = f'Local asset file paths must not contain query strings: {filename!r}' + raise ThemeError(msg) + try: + # Remove all carriage returns to avoid checksum differences + content = outdir.joinpath(filename).read_bytes().translate(None, b'\r') + except FileNotFoundError: + return '' + if not content: + return '' + return f'{zlib.crc32(content):08x}' diff --git a/sphinx/builders/html/transforms.py b/sphinx/builders/html/transforms.py new file mode 100644 index 0000000..18a8d38 --- /dev/null +++ b/sphinx/builders/html/transforms.py @@ -0,0 +1,86 @@ +"""Transforms for HTML builder.""" + +from __future__ import annotations + +import re +from typing import TYPE_CHECKING, Any + +from docutils import nodes + +from sphinx.transforms.post_transforms import SphinxPostTransform +from sphinx.util.nodes import NodeMatcher + +if TYPE_CHECKING: + from sphinx.application import Sphinx + + +class KeyboardTransform(SphinxPostTransform): + """Transform :kbd: role to more detailed form. + + Before:: + + + Control-x + + After:: + + + + Control + - + + x + """ + default_priority = 400 + formats = ('html',) + pattern = re.compile(r'(?<=.)(-|\+|\^|\s+)(?=.)') + multiwords_keys = (('caps', 'lock'), + ('page', 'down'), + ('page', 'up'), + ('scroll', 'lock'), + ('num', 'lock'), + ('sys', 'rq'), + ('back', 'space')) + + def run(self, **kwargs: Any) -> None: + matcher = NodeMatcher(nodes.literal, classes=["kbd"]) + # this list must be pre-created as during iteration new nodes + # are added which match the condition in the NodeMatcher. + for node in list(self.document.findall(matcher)): # type: nodes.literal + parts = self.pattern.split(node[-1].astext()) + if len(parts) == 1 or self.is_multiwords_key(parts): + continue + + node['classes'].append('compound') + node.pop() + while parts: + if self.is_multiwords_key(parts): + key = ''.join(parts[:3]) + parts[:3] = [] + else: + key = parts.pop(0) + node += nodes.literal('', key, classes=["kbd"]) + + try: + # key separator (ex. -, +, ^) + sep = parts.pop(0) + node += nodes.Text(sep) + except IndexError: + pass + + def is_multiwords_key(self, parts: list[str]) -> bool: + if len(parts) >= 3 and parts[1].strip() == '': + name = parts[0].lower(), parts[2].lower() + return name in self.multiwords_keys + else: + return False + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_post_transform(KeyboardTransform) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/latex/__init__.py b/sphinx/builders/latex/__init__.py new file mode 100644 index 0000000..3ece571 --- /dev/null +++ b/sphinx/builders/latex/__init__.py @@ -0,0 +1,551 @@ +"""LaTeX builder.""" + +from __future__ import annotations + +import os +import warnings +from os import path +from typing import TYPE_CHECKING, Any + +from docutils.frontend import OptionParser + +import sphinx.builders.latex.nodes # noqa: F401,E501 # Workaround: import this before writer to avoid ImportError +from sphinx import addnodes, highlighting, package_dir +from sphinx.builders import Builder +from sphinx.builders.latex.constants import ADDITIONAL_SETTINGS, DEFAULT_SETTINGS, SHORTHANDOFF +from sphinx.builders.latex.theming import Theme, ThemeFactory +from sphinx.builders.latex.util import ExtBabel +from sphinx.config import ENUM, Config +from sphinx.environment.adapters.asset import ImageAdapter +from sphinx.errors import NoUri, SphinxError +from sphinx.locale import _, __ +from sphinx.util import logging, texescape +from sphinx.util.console import bold, darkgreen # type: ignore[attr-defined] +from sphinx.util.display import progress_message, status_iterator +from sphinx.util.docutils import SphinxFileOutput, new_document +from sphinx.util.fileutil import copy_asset_file +from sphinx.util.i18n import format_date +from sphinx.util.nodes import inline_all_toctrees +from sphinx.util.osutil import SEP, make_filename_from_project +from sphinx.util.template import LaTeXRenderer +from sphinx.writers.latex import LaTeXTranslator, LaTeXWriter + +# load docutils.nodes after loading sphinx.builders.latex.nodes +from docutils import nodes # isort:skip + +if TYPE_CHECKING: + from collections.abc import Iterable + + from docutils.nodes import Node + + from sphinx.application import Sphinx + +XINDY_LANG_OPTIONS = { + # language codes from docutils.writers.latex2e.Babel + # ! xindy language names may differ from those in use by LaTeX/babel + # ! xindy does not support all Latin scripts as recognized by LaTeX/babel + # ! not all xindy-supported languages appear in Babel.language_codes + # cd /usr/local/texlive/2018/texmf-dist/xindy/modules/lang + # find . -name '*utf8.xdy' + # LATIN + 'sq': '-L albanian -C utf8 ', + 'hr': '-L croatian -C utf8 ', + 'cs': '-L czech -C utf8 ', + 'da': '-L danish -C utf8 ', + 'nl': '-L dutch-ij-as-ij -C utf8 ', + 'en': '-L english -C utf8 ', + 'eo': '-L esperanto -C utf8 ', + 'et': '-L estonian -C utf8 ', + 'fi': '-L finnish -C utf8 ', + 'fr': '-L french -C utf8 ', + 'de': '-L german-din5007 -C utf8 ', + 'is': '-L icelandic -C utf8 ', + 'it': '-L italian -C utf8 ', + 'la': '-L latin -C utf8 ', + 'lv': '-L latvian -C utf8 ', + 'lt': '-L lithuanian -C utf8 ', + 'dsb': '-L lower-sorbian -C utf8 ', + 'ds': '-L lower-sorbian -C utf8 ', # trick, no conflict + 'nb': '-L norwegian -C utf8 ', + 'no': '-L norwegian -C utf8 ', # and what about nynorsk? + 'pl': '-L polish -C utf8 ', + 'pt': '-L portuguese -C utf8 ', + 'ro': '-L romanian -C utf8 ', + 'sk': '-L slovak-small -C utf8 ', # there is also slovak-large + 'sl': '-L slovenian -C utf8 ', + 'es': '-L spanish-modern -C utf8 ', # there is also spanish-traditional + 'sv': '-L swedish -C utf8 ', + 'tr': '-L turkish -C utf8 ', + 'hsb': '-L upper-sorbian -C utf8 ', + 'hs': '-L upper-sorbian -C utf8 ', # trick, no conflict + 'vi': '-L vietnamese -C utf8 ', + # CYRILLIC + # for usage with pdflatex, needs also cyrLICRutf8.xdy module + 'be': '-L belarusian -C utf8 ', + 'bg': '-L bulgarian -C utf8 ', + 'mk': '-L macedonian -C utf8 ', + 'mn': '-L mongolian-cyrillic -C utf8 ', + 'ru': '-L russian -C utf8 ', + 'sr': '-L serbian -C utf8 ', + 'sh-cyrl': '-L serbian -C utf8 ', + 'sh': '-L serbian -C utf8 ', # trick, no conflict + 'uk': '-L ukrainian -C utf8 ', + # GREEK + # can work only with xelatex/lualatex, not supported by texindy+pdflatex + 'el': '-L greek -C utf8 ', + # FIXME, not compatible with [:2] slice but does Sphinx support Greek ? + 'el-polyton': '-L greek-polytonic -C utf8 ', +} + +XINDY_CYRILLIC_SCRIPTS = [ + 'be', 'bg', 'mk', 'mn', 'ru', 'sr', 'sh', 'uk', +] + +logger = logging.getLogger(__name__) + + +class LaTeXBuilder(Builder): + """ + Builds LaTeX output to create PDF. + """ + name = 'latex' + format = 'latex' + epilog = __('The LaTeX files are in %(outdir)s.') + if os.name == 'posix': + epilog += __("\nRun 'make' in that directory to run these through " + "(pdf)latex\n" + "(use `make latexpdf' here to do that automatically).") + + supported_image_types = ['application/pdf', 'image/png', 'image/jpeg'] + supported_remote_images = False + default_translator_class = LaTeXTranslator + + def init(self) -> None: + self.babel: ExtBabel + self.context: dict[str, Any] = {} + self.docnames: Iterable[str] = {} + self.document_data: list[tuple[str, str, str, str, str, bool]] = [] + self.themes = ThemeFactory(self.app) + texescape.init() + + self.init_context() + self.init_babel() + self.init_multilingual() + + def get_outdated_docs(self) -> str | list[str]: + return 'all documents' # for now + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + if docname not in self.docnames: + raise NoUri(docname, typ) + return '%' + docname + + def get_relative_uri(self, from_: str, to: str, typ: str | None = None) -> str: + # ignore source path + return self.get_target_uri(to, typ) + + def init_document_data(self) -> None: + preliminary_document_data = [list(x) for x in self.config.latex_documents] + if not preliminary_document_data: + logger.warning(__('no "latex_documents" config value found; no documents ' + 'will be written')) + return + # assign subdirs to titles + self.titles: list[tuple[str, str]] = [] + for entry in preliminary_document_data: + docname = entry[0] + if docname not in self.env.all_docs: + logger.warning(__('"latex_documents" config value references unknown ' + 'document %s'), docname) + continue + self.document_data.append(entry) # type: ignore[arg-type] + if docname.endswith(SEP + 'index'): + docname = docname[:-5] + self.titles.append((docname, entry[2])) + + def init_context(self) -> None: + self.context = DEFAULT_SETTINGS.copy() + + # Add special settings for latex_engine + self.context.update(ADDITIONAL_SETTINGS.get(self.config.latex_engine, {})) + + # Add special settings for (latex_engine, language_code) + key = (self.config.latex_engine, self.config.language[:2]) + self.context.update(ADDITIONAL_SETTINGS.get(key, {})) + + # Apply user settings to context + self.context.update(self.config.latex_elements) + self.context['release'] = self.config.release + self.context['use_xindy'] = self.config.latex_use_xindy + self.context['booktabs'] = 'booktabs' in self.config.latex_table_style + self.context['borderless'] = 'borderless' in self.config.latex_table_style + self.context['colorrows'] = 'colorrows' in self.config.latex_table_style + + if self.config.today: + self.context['date'] = self.config.today + else: + self.context['date'] = format_date(self.config.today_fmt or _('%b %d, %Y'), + language=self.config.language) + + if self.config.latex_logo: + self.context['logofilename'] = path.basename(self.config.latex_logo) + + # for compatibilities + self.context['indexname'] = _('Index') + if self.config.release: + # Show the release label only if release value exists + self.context.setdefault('releasename', _('Release')) + + def update_context(self) -> None: + """Update template variables for .tex file just before writing.""" + # Apply extension settings to context + registry = self.app.registry + self.context['packages'] = registry.latex_packages + self.context['packages_after_hyperref'] = registry.latex_packages_after_hyperref + + def init_babel(self) -> None: + self.babel = ExtBabel(self.config.language, not self.context['babel']) + if not self.babel.is_supported_language(): + # emit warning if specified language is invalid + # (only emitting, nothing changed to processing) + logger.warning(__('no Babel option known for language %r'), + self.config.language) + + def init_multilingual(self) -> None: + if self.context['latex_engine'] == 'pdflatex': + if not self.babel.uses_cyrillic(): + if 'X2' in self.context['fontenc']: + self.context['substitutefont'] = '\\usepackage{substitutefont}' + self.context['textcyrillic'] = ('\\usepackage[Xtwo]' + '{sphinxpackagecyrillic}') + elif 'T2A' in self.context['fontenc']: + self.context['substitutefont'] = '\\usepackage{substitutefont}' + self.context['textcyrillic'] = ('\\usepackage[TtwoA]' + '{sphinxpackagecyrillic}') + if 'LGR' in self.context['fontenc']: + self.context['substitutefont'] = '\\usepackage{substitutefont}' + else: + self.context['textgreek'] = '' + if self.context['substitutefont'] == '': + self.context['fontsubstitution'] = '' + + # 'babel' key is public and user setting must be obeyed + if self.context['babel']: + self.context['classoptions'] += ',' + self.babel.get_language() + # this branch is not taken for xelatex/lualatex if default settings + self.context['multilingual'] = self.context['babel'] + self.context['shorthandoff'] = SHORTHANDOFF + + # Times fonts don't work with Cyrillic languages + if self.babel.uses_cyrillic() and 'fontpkg' not in self.config.latex_elements: + self.context['fontpkg'] = '' + elif self.context['polyglossia']: + self.context['classoptions'] += ',' + self.babel.get_language() + options = self.babel.get_mainlanguage_options() + if options: + language = fr'\setmainlanguage[{options}]{{{self.babel.get_language()}}}' + else: + language = r'\setmainlanguage{%s}' % self.babel.get_language() + + self.context['multilingual'] = f'{self.context["polyglossia"]}\n{language}' + + def write_stylesheet(self) -> None: + highlighter = highlighting.PygmentsBridge('latex', self.config.pygments_style) + stylesheet = path.join(self.outdir, 'sphinxhighlight.sty') + with open(stylesheet, 'w', encoding="utf-8") as f: + f.write('\\NeedsTeXFormat{LaTeX2e}[1995/12/01]\n') + f.write('\\ProvidesPackage{sphinxhighlight}' + '[2022/06/30 stylesheet for highlighting with pygments]\n') + f.write('% Its contents depend on pygments_style configuration variable.\n\n') + f.write(highlighter.get_stylesheet()) + + def copy_assets(self) -> None: + self.copy_support_files() + + if self.config.latex_additional_files: + self.copy_latex_additional_files() + + def write(self, *ignored: Any) -> None: + docwriter = LaTeXWriter(self) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=DeprecationWarning) + # DeprecationWarning: The frontend.OptionParser class will be replaced + # by a subclass of argparse.ArgumentParser in Docutils 0.21 or later. + docsettings: Any = OptionParser( + defaults=self.env.settings, + components=(docwriter,), + read_config_files=True).get_default_values() + + self.init_document_data() + self.write_stylesheet() + self.copy_assets() + + for entry in self.document_data: + docname, targetname, title, author, themename = entry[:5] + theme = self.themes.get(themename) + toctree_only = False + if len(entry) > 5: + toctree_only = entry[5] + destination = SphinxFileOutput(destination_path=path.join(self.outdir, targetname), + encoding='utf-8', overwrite_if_changed=True) + with progress_message(__("processing %s") % targetname): + doctree = self.env.get_doctree(docname) + toctree = next(doctree.findall(addnodes.toctree), None) + if toctree and toctree.get('maxdepth') > 0: + tocdepth = toctree.get('maxdepth') + else: + tocdepth = None + + doctree = self.assemble_doctree( + docname, toctree_only, + appendices=(self.config.latex_appendices if theme.name != 'howto' else [])) + doctree['docclass'] = theme.docclass + doctree['contentsname'] = self.get_contentsname(docname) + doctree['tocdepth'] = tocdepth + self.post_process_images(doctree) + self.update_doc_context(title, author, theme) + self.update_context() + + with progress_message(__("writing")): + docsettings._author = author + docsettings._title = title + docsettings._contentsname = doctree['contentsname'] + docsettings._docname = docname + docsettings._docclass = theme.name + + doctree.settings = docsettings + docwriter.theme = theme + docwriter.write(doctree, destination) + + def get_contentsname(self, indexfile: str) -> str: + tree = self.env.get_doctree(indexfile) + contentsname = '' + for toctree in tree.findall(addnodes.toctree): + if 'caption' in toctree: + contentsname = toctree['caption'] + break + + return contentsname + + def update_doc_context(self, title: str, author: str, theme: Theme) -> None: + self.context['title'] = title + self.context['author'] = author + self.context['docclass'] = theme.docclass + self.context['papersize'] = theme.papersize + self.context['pointsize'] = theme.pointsize + self.context['wrapperclass'] = theme.wrapperclass + + def assemble_doctree( + self, indexfile: str, toctree_only: bool, appendices: list[str], + ) -> nodes.document: + self.docnames = set([indexfile] + appendices) + logger.info(darkgreen(indexfile) + " ", nonl=True) + tree = self.env.get_doctree(indexfile) + tree['docname'] = indexfile + if toctree_only: + # extract toctree nodes from the tree and put them in a + # fresh document + new_tree = new_document('') + new_sect = nodes.section() + new_sect += nodes.title('', + '') + new_tree += new_sect + for node in tree.findall(addnodes.toctree): + new_sect += node + tree = new_tree + largetree = inline_all_toctrees(self, self.docnames, indexfile, tree, + darkgreen, [indexfile]) + largetree['docname'] = indexfile + for docname in appendices: + appendix = self.env.get_doctree(docname) + appendix['docname'] = docname + largetree.append(appendix) + logger.info('') + logger.info(__("resolving references...")) + self.env.resolve_references(largetree, indexfile, self) + # resolve :ref:s to distant tex files -- we can't add a cross-reference, + # but append the document name + for pendingnode in largetree.findall(addnodes.pending_xref): + docname = pendingnode['refdocname'] + sectname = pendingnode['refsectname'] + newnodes: list[Node] = [nodes.emphasis(sectname, sectname)] + for subdir, title in self.titles: + if docname.startswith(subdir): + newnodes.append(nodes.Text(_(' (in '))) + newnodes.append(nodes.emphasis(title, title)) + newnodes.append(nodes.Text(')')) + break + else: + pass + pendingnode.replace_self(newnodes) + return largetree + + def finish(self) -> None: + self.copy_image_files() + self.write_message_catalog() + + @progress_message(__('copying TeX support files')) + def copy_support_files(self) -> None: + """copy TeX support files from texinputs.""" + # configure usage of xindy (impacts Makefile and latexmkrc) + # FIXME: convert this rather to a confval with suitable default + # according to language ? but would require extra documentation + xindy_lang_option = XINDY_LANG_OPTIONS.get(self.config.language[:2], + '-L general -C utf8 ') + xindy_cyrillic = self.config.language[:2] in XINDY_CYRILLIC_SCRIPTS + + context = { + 'latex_engine': self.config.latex_engine, + 'xindy_use': self.config.latex_use_xindy, + 'xindy_lang_option': xindy_lang_option, + 'xindy_cyrillic': xindy_cyrillic, + } + logger.info(bold(__('copying TeX support files...'))) + staticdirname = path.join(package_dir, 'texinputs') + for filename in os.listdir(staticdirname): + if not filename.startswith('.'): + copy_asset_file(path.join(staticdirname, filename), + self.outdir, context=context) + + # use pre-1.6.x Makefile for make latexpdf on Windows + if os.name == 'nt': + staticdirname = path.join(package_dir, 'texinputs_win') + copy_asset_file(path.join(staticdirname, 'Makefile_t'), + self.outdir, context=context) + + @progress_message(__('copying additional files')) + def copy_latex_additional_files(self) -> None: + for filename in self.config.latex_additional_files: + logger.info(' ' + filename, nonl=True) + copy_asset_file(path.join(self.confdir, filename), self.outdir) + + def copy_image_files(self) -> None: + if self.images: + stringify_func = ImageAdapter(self.app.env).get_original_image_uri + for src in status_iterator(self.images, __('copying images... '), "brown", + len(self.images), self.app.verbosity, + stringify_func=stringify_func): + dest = self.images[src] + try: + copy_asset_file(path.join(self.srcdir, src), + path.join(self.outdir, dest)) + except Exception as err: + logger.warning(__('cannot copy image file %r: %s'), + path.join(self.srcdir, src), err) + if self.config.latex_logo: + if not path.isfile(path.join(self.confdir, self.config.latex_logo)): + raise SphinxError(__('logo file %r does not exist') % self.config.latex_logo) + copy_asset_file(path.join(self.confdir, self.config.latex_logo), self.outdir) + + def write_message_catalog(self) -> None: + formats = self.config.numfig_format + context = { + 'addtocaptions': r'\@iden', + 'figurename': formats.get('figure', '').split('%s', 1), + 'tablename': formats.get('table', '').split('%s', 1), + 'literalblockname': formats.get('code-block', '').split('%s', 1), + } + + if self.context['babel'] or self.context['polyglossia']: + context['addtocaptions'] = r'\addto\captions%s' % self.babel.get_language() + + filename = path.join(package_dir, 'templates', 'latex', 'sphinxmessages.sty_t') + copy_asset_file(filename, self.outdir, context=context, renderer=LaTeXRenderer()) + + +def validate_config_values(app: Sphinx, config: Config) -> None: + for key in list(config.latex_elements): + if key not in DEFAULT_SETTINGS: + msg = __("Unknown configure key: latex_elements[%r], ignored.") + logger.warning(msg % (key,)) + config.latex_elements.pop(key) + + +def validate_latex_theme_options(app: Sphinx, config: Config) -> None: + for key in list(config.latex_theme_options): + if key not in Theme.UPDATABLE_KEYS: + msg = __("Unknown theme option: latex_theme_options[%r], ignored.") + logger.warning(msg % (key,)) + config.latex_theme_options.pop(key) + + +def install_packages_for_ja(app: Sphinx) -> None: + """Install packages for Japanese.""" + if app.config.language == 'ja' and app.config.latex_engine in ('platex', 'uplatex'): + app.add_latex_package('pxjahyper', after_hyperref=True) + + +def default_latex_engine(config: Config) -> str: + """ Better default latex_engine settings for specific languages. """ + if config.language == 'ja': + return 'uplatex' + if config.language.startswith('zh'): + return 'xelatex' + if config.language == 'el': + return 'xelatex' + return 'pdflatex' + + +def default_latex_docclass(config: Config) -> dict[str, str]: + """ Better default latex_docclass settings for specific languages. """ + if config.language == 'ja': + if config.latex_engine == 'uplatex': + return {'manual': 'ujbook', + 'howto': 'ujreport'} + else: + return {'manual': 'jsbook', + 'howto': 'jreport'} + else: + return {} + + +def default_latex_use_xindy(config: Config) -> bool: + """ Better default latex_use_xindy settings for specific engines. """ + return config.latex_engine in {'xelatex', 'lualatex'} + + +def default_latex_documents(config: Config) -> list[tuple[str, str, str, str, str]]: + """ Better default latex_documents settings. """ + project = texescape.escape(config.project, config.latex_engine) + author = texescape.escape(config.author, config.latex_engine) + return [(config.root_doc, + make_filename_from_project(config.project) + '.tex', + texescape.escape_abbr(project), + texescape.escape_abbr(author), + config.latex_theme)] + + +def setup(app: Sphinx) -> dict[str, Any]: + app.setup_extension('sphinx.builders.latex.transforms') + + app.add_builder(LaTeXBuilder) + app.connect('config-inited', validate_config_values, priority=800) + app.connect('config-inited', validate_latex_theme_options, priority=800) + app.connect('builder-inited', install_packages_for_ja) + + app.add_config_value('latex_engine', default_latex_engine, False, + ENUM('pdflatex', 'xelatex', 'lualatex', 'platex', 'uplatex')) + app.add_config_value('latex_documents', default_latex_documents, False) + app.add_config_value('latex_logo', None, False, [str]) + app.add_config_value('latex_appendices', [], False) + app.add_config_value('latex_use_latex_multicolumn', False, False) + app.add_config_value('latex_use_xindy', default_latex_use_xindy, False, [bool]) + app.add_config_value('latex_toplevel_sectioning', None, False, + ENUM(None, 'part', 'chapter', 'section')) + app.add_config_value('latex_domain_indices', True, False, [list]) + app.add_config_value('latex_show_urls', 'no', False) + app.add_config_value('latex_show_pagerefs', False, False) + app.add_config_value('latex_elements', {}, False) + app.add_config_value('latex_additional_files', [], False) + app.add_config_value('latex_table_style', ['booktabs', 'colorrows'], False, [list]) + app.add_config_value('latex_theme', 'manual', False, [str]) + app.add_config_value('latex_theme_options', {}, False) + app.add_config_value('latex_theme_path', [], False, [list]) + + app.add_config_value('latex_docclass', default_latex_docclass, False) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/latex/constants.py b/sphinx/builders/latex/constants.py new file mode 100644 index 0000000..ce646d0 --- /dev/null +++ b/sphinx/builders/latex/constants.py @@ -0,0 +1,210 @@ +"""constants for LaTeX builder.""" + +from __future__ import annotations + +from typing import Any + +PDFLATEX_DEFAULT_FONTPKG = r''' +\usepackage{tgtermes} +\usepackage{tgheros} +\renewcommand{\ttdefault}{txtt} +''' + +PDFLATEX_DEFAULT_FONTSUBSTITUTION = r''' +\expandafter\ifx\csname T@LGR\endcsname\relax +\else +% LGR was declared as font encoding + \substitutefont{LGR}{\rmdefault}{cmr} + \substitutefont{LGR}{\sfdefault}{cmss} + \substitutefont{LGR}{\ttdefault}{cmtt} +\fi +\expandafter\ifx\csname T@X2\endcsname\relax + \expandafter\ifx\csname T@T2A\endcsname\relax + \else + % T2A was declared as font encoding + \substitutefont{T2A}{\rmdefault}{cmr} + \substitutefont{T2A}{\sfdefault}{cmss} + \substitutefont{T2A}{\ttdefault}{cmtt} + \fi +\else +% X2 was declared as font encoding + \substitutefont{X2}{\rmdefault}{cmr} + \substitutefont{X2}{\sfdefault}{cmss} + \substitutefont{X2}{\ttdefault}{cmtt} +\fi +''' + +XELATEX_DEFAULT_FONTPKG = r''' +\setmainfont{FreeSerif}[ + Extension = .otf, + UprightFont = *, + ItalicFont = *Italic, + BoldFont = *Bold, + BoldItalicFont = *BoldItalic +] +\setsansfont{FreeSans}[ + Extension = .otf, + UprightFont = *, + ItalicFont = *Oblique, + BoldFont = *Bold, + BoldItalicFont = *BoldOblique, +] +\setmonofont{FreeMono}[ + Extension = .otf, + UprightFont = *, + ItalicFont = *Oblique, + BoldFont = *Bold, + BoldItalicFont = *BoldOblique, +] +''' + +XELATEX_GREEK_DEFAULT_FONTPKG = (XELATEX_DEFAULT_FONTPKG + + '\n\\newfontfamily\\greekfont{FreeSerif}' + + '\n\\newfontfamily\\greekfontsf{FreeSans}' + + '\n\\newfontfamily\\greekfonttt{FreeMono}') + +LUALATEX_DEFAULT_FONTPKG = XELATEX_DEFAULT_FONTPKG + +DEFAULT_SETTINGS: dict[str, Any] = { + 'latex_engine': 'pdflatex', + 'papersize': '', + 'pointsize': '', + 'pxunit': '.75bp', + 'classoptions': '', + 'extraclassoptions': '', + 'maxlistdepth': '', + 'sphinxpkgoptions': '', + 'sphinxsetup': '', + 'fvset': '\\fvset{fontsize=auto}', + 'passoptionstopackages': '', + 'geometry': '\\usepackage{geometry}', + 'inputenc': '', + 'utf8extra': '', + 'cmappkg': '\\usepackage{cmap}', + 'fontenc': '\\usepackage[T1]{fontenc}', + 'amsmath': '\\usepackage{amsmath,amssymb,amstext}', + 'multilingual': '', + 'babel': '\\usepackage{babel}', + 'polyglossia': '', + 'fontpkg': PDFLATEX_DEFAULT_FONTPKG, + 'fontsubstitution': PDFLATEX_DEFAULT_FONTSUBSTITUTION, + 'substitutefont': '', + 'textcyrillic': '', + 'textgreek': '\\usepackage{textalpha}', + 'fncychap': '\\usepackage[Bjarne]{fncychap}', + 'hyperref': ('% Include hyperref last.\n' + '\\usepackage{hyperref}\n' + '% Fix anchor placement for figures with captions.\n' + '\\usepackage{hypcap}% it must be loaded after hyperref.\n' + '% Set up styles of URL: it should be placed after hyperref.\n' + '\\urlstyle{same}'), + 'contentsname': '', + 'extrapackages': '', + 'preamble': '', + 'title': '', + 'release': '', + 'author': '', + 'releasename': '', + 'makeindex': '\\makeindex', + 'shorthandoff': '', + 'maketitle': '\\sphinxmaketitle', + 'tableofcontents': '\\sphinxtableofcontents', + 'atendofbody': '', + 'printindex': '\\printindex', + 'transition': '\n\n\\bigskip\\hrule\\bigskip\n\n', + 'figure_align': 'htbp', + 'tocdepth': '', + 'secnumdepth': '', +} + +ADDITIONAL_SETTINGS: dict[Any, dict[str, Any]] = { + 'pdflatex': { + 'inputenc': '\\usepackage[utf8]{inputenc}', + 'utf8extra': ('\\ifdefined\\DeclareUnicodeCharacter\n' + '% support both utf8 and utf8x syntaxes\n' + ' \\ifdefined\\DeclareUnicodeCharacterAsOptional\n' + ' \\def\\sphinxDUC#1{\\DeclareUnicodeCharacter{"#1}}\n' + ' \\else\n' + ' \\let\\sphinxDUC\\DeclareUnicodeCharacter\n' + ' \\fi\n' + ' \\sphinxDUC{00A0}{\\nobreakspace}\n' + ' \\sphinxDUC{2500}{\\sphinxunichar{2500}}\n' + ' \\sphinxDUC{2502}{\\sphinxunichar{2502}}\n' + ' \\sphinxDUC{2514}{\\sphinxunichar{2514}}\n' + ' \\sphinxDUC{251C}{\\sphinxunichar{251C}}\n' + ' \\sphinxDUC{2572}{\\textbackslash}\n' + '\\fi'), + }, + 'xelatex': { + 'latex_engine': 'xelatex', + 'polyglossia': '\\usepackage{polyglossia}', + 'babel': '', + 'fontenc': ('\\usepackage{fontspec}\n' + '\\defaultfontfeatures[\\rmfamily,\\sffamily,\\ttfamily]{}'), + 'fontpkg': XELATEX_DEFAULT_FONTPKG, + 'fvset': '\\fvset{fontsize=\\small}', + 'fontsubstitution': '', + 'textgreek': '', + 'utf8extra': ('\\catcode`^^^^00a0\\active\\protected\\def^^^^00a0' + '{\\leavevmode\\nobreak\\ }'), + }, + 'lualatex': { + 'latex_engine': 'lualatex', + 'polyglossia': '\\usepackage{polyglossia}', + 'babel': '', + 'fontenc': ('\\usepackage{fontspec}\n' + '\\defaultfontfeatures[\\rmfamily,\\sffamily,\\ttfamily]{}'), + 'fontpkg': LUALATEX_DEFAULT_FONTPKG, + 'fvset': '\\fvset{fontsize=\\small}', + 'fontsubstitution': '', + 'textgreek': '', + 'utf8extra': ('\\catcode`^^^^00a0\\active\\protected\\def^^^^00a0' + '{\\leavevmode\\nobreak\\ }'), + }, + 'platex': { + 'latex_engine': 'platex', + 'babel': '', + 'classoptions': ',dvipdfmx', + 'fontpkg': PDFLATEX_DEFAULT_FONTPKG, + 'fontsubstitution': '', + 'textgreek': '', + 'fncychap': '', + 'geometry': '\\usepackage[dvipdfm]{geometry}', + }, + 'uplatex': { + 'latex_engine': 'uplatex', + 'babel': '', + 'classoptions': ',dvipdfmx', + 'fontpkg': PDFLATEX_DEFAULT_FONTPKG, + 'fontsubstitution': '', + 'textgreek': '', + 'fncychap': '', + 'geometry': '\\usepackage[dvipdfm]{geometry}', + }, + + # special settings for latex_engine + language_code + ('xelatex', 'fr'): { + # use babel instead of polyglossia by default + 'polyglossia': '', + 'babel': '\\usepackage{babel}', + }, + ('xelatex', 'zh'): { + 'polyglossia': '', + 'babel': '\\usepackage{babel}', + 'fontenc': '\\usepackage{xeCJK}', + # set formatcom=\xeCJKVerbAddon to prevent xeCJK from adding extra spaces in + # fancyvrb Verbatim environment. + 'fvset': '\\fvset{fontsize=\\small,formatcom=\\xeCJKVerbAddon}', + }, + ('xelatex', 'el'): { + 'fontpkg': XELATEX_GREEK_DEFAULT_FONTPKG, + }, +} + + +SHORTHANDOFF = r''' +\ifdefined\shorthandoff + \ifnum\catcode`\=\string=\active\shorthandoff{=}\fi + \ifnum\catcode`\"=\active\shorthandoff{"}\fi +\fi +''' diff --git a/sphinx/builders/latex/nodes.py b/sphinx/builders/latex/nodes.py new file mode 100644 index 0000000..2c008b9 --- /dev/null +++ b/sphinx/builders/latex/nodes.py @@ -0,0 +1,37 @@ +"""Additional nodes for LaTeX writer.""" + +from docutils import nodes + + +class captioned_literal_block(nodes.container): + """A node for a container of literal_block having a caption.""" + pass + + +class footnotemark(nodes.Inline, nodes.Referential, nodes.TextElement): + """A node represents ``\footnotemark``.""" + pass + + +class footnotetext(nodes.General, nodes.BackLinkable, nodes.Element, + nodes.Labeled, nodes.Targetable): + """A node represents ``\footnotetext``.""" + + +class math_reference(nodes.Inline, nodes.Referential, nodes.TextElement): + """A node for a reference for equation.""" + pass + + +class thebibliography(nodes.container): + """A node for wrapping bibliographies.""" + pass + + +HYPERLINK_SUPPORT_NODES = ( + nodes.figure, + nodes.literal_block, + nodes.table, + nodes.section, + captioned_literal_block, +) diff --git a/sphinx/builders/latex/theming.py b/sphinx/builders/latex/theming.py new file mode 100644 index 0000000..21b49e8 --- /dev/null +++ b/sphinx/builders/latex/theming.py @@ -0,0 +1,135 @@ +"""Theming support for LaTeX builder.""" + +from __future__ import annotations + +import configparser +from os import path +from typing import TYPE_CHECKING + +from sphinx.errors import ThemeError +from sphinx.locale import __ +from sphinx.util import logging + +if TYPE_CHECKING: + from sphinx.application import Sphinx + from sphinx.config import Config + +logger = logging.getLogger(__name__) + + +class Theme: + """A set of LaTeX configurations.""" + + LATEX_ELEMENTS_KEYS = ['papersize', 'pointsize'] + UPDATABLE_KEYS = ['papersize', 'pointsize'] + + def __init__(self, name: str) -> None: + self.name = name + self.docclass = name + self.wrapperclass = name + self.papersize = 'letterpaper' + self.pointsize = '10pt' + self.toplevel_sectioning = 'chapter' + + def update(self, config: Config) -> None: + """Override theme settings by user's configuration.""" + for key in self.LATEX_ELEMENTS_KEYS: + if config.latex_elements.get(key): + value = config.latex_elements[key] + setattr(self, key, value) + + for key in self.UPDATABLE_KEYS: + if key in config.latex_theme_options: + value = config.latex_theme_options[key] + setattr(self, key, value) + + +class BuiltInTheme(Theme): + """A built-in LaTeX theme.""" + + def __init__(self, name: str, config: Config) -> None: + super().__init__(name) + + if name == 'howto': + self.docclass = config.latex_docclass.get('howto', 'article') + else: + self.docclass = config.latex_docclass.get('manual', 'report') + + if name in ('manual', 'howto'): + self.wrapperclass = 'sphinx' + name + else: + self.wrapperclass = name + + # we assume LaTeX class provides \chapter command except in case + # of non-Japanese 'howto' case + if name == 'howto' and not self.docclass.startswith('j'): + self.toplevel_sectioning = 'section' + else: + self.toplevel_sectioning = 'chapter' + + +class UserTheme(Theme): + """A user defined LaTeX theme.""" + + REQUIRED_CONFIG_KEYS = ['docclass', 'wrapperclass'] + OPTIONAL_CONFIG_KEYS = ['papersize', 'pointsize', 'toplevel_sectioning'] + + def __init__(self, name: str, filename: str) -> None: + super().__init__(name) + self.config = configparser.RawConfigParser() + self.config.read(path.join(filename), encoding='utf-8') + + for key in self.REQUIRED_CONFIG_KEYS: + try: + value = self.config.get('theme', key) + setattr(self, key, value) + except configparser.NoSectionError as exc: + raise ThemeError(__('%r doesn\'t have "theme" setting') % + filename) from exc + except configparser.NoOptionError as exc: + raise ThemeError(__('%r doesn\'t have "%s" setting') % + (filename, exc.args[0])) from exc + + for key in self.OPTIONAL_CONFIG_KEYS: + try: + value = self.config.get('theme', key) + setattr(self, key, value) + except configparser.NoOptionError: + pass + + +class ThemeFactory: + """A factory class for LaTeX Themes.""" + + def __init__(self, app: Sphinx) -> None: + self.themes: dict[str, Theme] = {} + self.theme_paths = [path.join(app.srcdir, p) for p in app.config.latex_theme_path] + self.config = app.config + self.load_builtin_themes(app.config) + + def load_builtin_themes(self, config: Config) -> None: + """Load built-in themes.""" + self.themes['manual'] = BuiltInTheme('manual', config) + self.themes['howto'] = BuiltInTheme('howto', config) + + def get(self, name: str) -> Theme: + """Get a theme for given *name*.""" + if name in self.themes: + theme = self.themes[name] + else: + theme = self.find_user_theme(name) or Theme(name) + + theme.update(self.config) + return theme + + def find_user_theme(self, name: str) -> Theme | None: + """Find a theme named as *name* from latex_theme_path.""" + for theme_path in self.theme_paths: + config_path = path.join(theme_path, name, 'theme.conf') + if path.isfile(config_path): + try: + return UserTheme(name, config_path) + except ThemeError as exc: + logger.warning(exc) + + return None diff --git a/sphinx/builders/latex/transforms.py b/sphinx/builders/latex/transforms.py new file mode 100644 index 0000000..ca1e4f3 --- /dev/null +++ b/sphinx/builders/latex/transforms.py @@ -0,0 +1,642 @@ +"""Transforms for LaTeX builder.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, cast + +from docutils import nodes +from docutils.transforms.references import Substitutions + +from sphinx import addnodes +from sphinx.builders.latex.nodes import ( + captioned_literal_block, + footnotemark, + footnotetext, + math_reference, + thebibliography, +) +from sphinx.domains.citation import CitationDomain +from sphinx.locale import __ +from sphinx.transforms import SphinxTransform +from sphinx.transforms.post_transforms import SphinxPostTransform +from sphinx.util.nodes import NodeMatcher + +if TYPE_CHECKING: + from docutils.nodes import Element, Node + + from sphinx.application import Sphinx + +URI_SCHEMES = ('mailto:', 'http:', 'https:', 'ftp:') + + +class FootnoteDocnameUpdater(SphinxTransform): + """Add docname to footnote and footnote_reference nodes.""" + default_priority = 700 + TARGET_NODES = (nodes.footnote, nodes.footnote_reference) + + def apply(self, **kwargs: Any) -> None: + matcher = NodeMatcher(*self.TARGET_NODES) + for node in self.document.findall(matcher): # type: Element + node['docname'] = self.env.docname + + +class SubstitutionDefinitionsRemover(SphinxPostTransform): + """Remove ``substitution_definition`` nodes from doctrees.""" + + # should be invoked after Substitutions process + default_priority = Substitutions.default_priority + 1 + formats = ('latex',) + + def run(self, **kwargs: Any) -> None: + for node in list(self.document.findall(nodes.substitution_definition)): + node.parent.remove(node) + + +class ShowUrlsTransform(SphinxPostTransform): + """Expand references to inline text or footnotes. + + For more information, see :confval:`latex_show_urls`. + + .. note:: This transform is used for integrated doctree + """ + default_priority = 400 + formats = ('latex',) + + # references are expanded to footnotes (or not) + expanded = False + + def run(self, **kwargs: Any) -> None: + try: + # replace id_prefix temporarily + settings: Any = self.document.settings + id_prefix = settings.id_prefix + settings.id_prefix = 'show_urls' + + self.expand_show_urls() + if self.expanded: + self.renumber_footnotes() + finally: + # restore id_prefix + settings.id_prefix = id_prefix + + def expand_show_urls(self) -> None: + show_urls = self.config.latex_show_urls + if show_urls is False or show_urls == 'no': + return + + for node in list(self.document.findall(nodes.reference)): + uri = node.get('refuri', '') + if uri.startswith(URI_SCHEMES): + if uri.startswith('mailto:'): + uri = uri[7:] + if node.astext() != uri: + index = node.parent.index(node) + docname = self.get_docname_for_node(node) + if show_urls == 'footnote': + fn, fnref = self.create_footnote(uri, docname) + node.parent.insert(index + 1, fn) + node.parent.insert(index + 2, fnref) + + self.expanded = True + else: # all other true values (b/w compat) + textnode = nodes.Text(" (%s)" % uri) + node.parent.insert(index + 1, textnode) + + def get_docname_for_node(self, node: Node) -> str: + while node: + if isinstance(node, nodes.document): + return self.env.path2doc(node['source']) or '' + elif isinstance(node, addnodes.start_of_file): + return node['docname'] + else: + node = node.parent + + try: + source = node['source'] # type: ignore[index] + except TypeError: + raise ValueError(__('Failed to get a docname!')) from None + raise ValueError(__('Failed to get a docname ' + 'for source {source!r}!').format(source=source)) + + def create_footnote( + self, uri: str, docname: str, + ) -> tuple[nodes.footnote, nodes.footnote_reference]: + reference = nodes.reference('', nodes.Text(uri), refuri=uri, nolinkurl=True) + footnote = nodes.footnote(uri, auto=1, docname=docname) + footnote['names'].append('#') + footnote += nodes.label('', '#') + footnote += nodes.paragraph('', '', reference) + self.document.note_autofootnote(footnote) + + footnote_ref = nodes.footnote_reference('[#]_', auto=1, + refid=footnote['ids'][0], docname=docname) + footnote_ref += nodes.Text('#') + self.document.note_autofootnote_ref(footnote_ref) + footnote.add_backref(footnote_ref['ids'][0]) + + return footnote, footnote_ref + + def renumber_footnotes(self) -> None: + collector = FootnoteCollector(self.document) + self.document.walkabout(collector) + + num = 0 + for footnote in collector.auto_footnotes: + # search unused footnote number + while True: + num += 1 + if str(num) not in collector.used_footnote_numbers: + break + + # assign new footnote number + old_label = cast(nodes.label, footnote[0]) + old_label.replace_self(nodes.label('', str(num))) + if old_label in footnote['names']: + footnote['names'].remove(old_label.astext()) + footnote['names'].append(str(num)) + + # update footnote_references by new footnote number + docname = footnote['docname'] + for ref in collector.footnote_refs: + if docname == ref['docname'] and footnote['ids'][0] == ref['refid']: + ref.remove(ref[0]) + ref += nodes.Text(str(num)) + + +class FootnoteCollector(nodes.NodeVisitor): + """Collect footnotes and footnote references on the document""" + + def __init__(self, document: nodes.document) -> None: + self.auto_footnotes: list[nodes.footnote] = [] + self.used_footnote_numbers: set[str] = set() + self.footnote_refs: list[nodes.footnote_reference] = [] + super().__init__(document) + + def unknown_visit(self, node: Node) -> None: + pass + + def unknown_departure(self, node: Node) -> None: + pass + + def visit_footnote(self, node: nodes.footnote) -> None: + if node.get('auto'): + self.auto_footnotes.append(node) + else: + for name in node['names']: + self.used_footnote_numbers.add(name) + + def visit_footnote_reference(self, node: nodes.footnote_reference) -> None: + self.footnote_refs.append(node) + + +class LaTeXFootnoteTransform(SphinxPostTransform): + """Convert footnote definitions and references to appropriate form to LaTeX. + + * Replace footnotes on restricted zone (e.g. headings) by footnotemark node. + In addition, append a footnotetext node after the zone. + + Before:: + +
+ + headings having footnotes + <footnote_reference> + 1 + <footnote ids="id1"> + <label> + 1 + <paragraph> + footnote body + + After:: + + <section> + <title> + headings having footnotes + <footnotemark refid="id1"> + 1 + <footnotetext ids="id1"> + <label> + 1 + <paragraph> + footnote body + + * Integrate footnote definitions and footnote references to single footnote node + + Before:: + + blah blah blah + <footnote_reference refid="id1"> + 1 + blah blah blah ... + + <footnote ids="id1"> + <label> + 1 + <paragraph> + footnote body + + After:: + + blah blah blah + <footnote ids="id1"> + <label> + 1 + <paragraph> + footnote body + blah blah blah ... + + * Replace second and subsequent footnote references which refers same footnote definition + by footnotemark node. Additionally, the footnote definition node is marked as + "referred". + + Before:: + + blah blah blah + <footnote_reference refid="id1"> + 1 + blah blah blah + <footnote_reference refid="id1"> + 1 + blah blah blah ... + + <footnote ids="id1"> + <label> + 1 + <paragraph> + footnote body + + After:: + + blah blah blah + <footnote ids="id1" referred=True> + <label> + 1 + <paragraph> + footnote body + blah blah blah + <footnotemark refid="id1"> + 1 + blah blah blah ... + + * Remove unreferenced footnotes + + Before:: + + <footnote ids="id1"> + <label> + 1 + <paragraph> + Unreferenced footnote! + + After:: + + <!-- nothing! --> + + * Move footnotes in a title of table or thead to head of tbody + + Before:: + + <table> + <title> + title having footnote_reference + <footnote_reference refid="id1"> + 1 + <tgroup> + <thead> + <row> + <entry> + header having footnote_reference + <footnote_reference refid="id2"> + 2 + <tbody> + <row> + ... + + <footnote ids="id1"> + <label> + 1 + <paragraph> + footnote body + + <footnote ids="id2"> + <label> + 2 + <paragraph> + footnote body + + After:: + + <table> + <title> + title having footnote_reference + <footnotemark refid="id1"> + 1 + <tgroup> + <thead> + <row> + <entry> + header having footnote_reference + <footnotemark refid="id2"> + 2 + <tbody> + <footnotetext ids="id1"> + <label> + 1 + <paragraph> + footnote body + + <footnotetext ids="id2"> + <label> + 2 + <paragraph> + footnote body + <row> + ... + """ + + default_priority = 600 + formats = ('latex',) + + def run(self, **kwargs: Any) -> None: + footnotes = list(self.document.findall(nodes.footnote)) + for node in footnotes: + node.parent.remove(node) + + visitor = LaTeXFootnoteVisitor(self.document, footnotes) + self.document.walkabout(visitor) + + +class LaTeXFootnoteVisitor(nodes.NodeVisitor): + def __init__(self, document: nodes.document, footnotes: list[nodes.footnote]) -> None: + self.appeared: dict[tuple[str, str], nodes.footnote] = {} + self.footnotes: list[nodes.footnote] = footnotes + self.pendings: list[nodes.footnote] = [] + self.table_footnotes: list[nodes.footnote] = [] + self.restricted: Element | None = None + super().__init__(document) + + def unknown_visit(self, node: Node) -> None: + pass + + def unknown_departure(self, node: Node) -> None: + pass + + def restrict(self, node: Element) -> None: + if self.restricted is None: + self.restricted = node + + def unrestrict(self, node: Element) -> None: + if self.restricted == node: + self.restricted = None + pos = node.parent.index(node) + for i, footnote, in enumerate(self.pendings): + fntext = footnotetext('', *footnote.children, ids=footnote['ids']) + node.parent.insert(pos + i + 1, fntext) + self.pendings = [] + + def visit_figure(self, node: nodes.figure) -> None: + self.restrict(node) + + def depart_figure(self, node: nodes.figure) -> None: + self.unrestrict(node) + + def visit_term(self, node: nodes.term) -> None: + self.restrict(node) + + def depart_term(self, node: nodes.term) -> None: + self.unrestrict(node) + + def visit_caption(self, node: nodes.caption) -> None: + self.restrict(node) + + def depart_caption(self, node: nodes.caption) -> None: + self.unrestrict(node) + + def visit_title(self, node: nodes.title) -> None: + if isinstance(node.parent, (nodes.section, nodes.table)): + self.restrict(node) + + def depart_title(self, node: nodes.title) -> None: + if isinstance(node.parent, nodes.section): + self.unrestrict(node) + elif isinstance(node.parent, nodes.table): + self.table_footnotes += self.pendings + self.pendings = [] + self.unrestrict(node) + + def visit_thead(self, node: nodes.thead) -> None: + self.restrict(node) + + def depart_thead(self, node: nodes.thead) -> None: + self.table_footnotes += self.pendings + self.pendings = [] + self.unrestrict(node) + + def depart_table(self, node: nodes.table) -> None: + tbody = next(node.findall(nodes.tbody)) + for footnote in reversed(self.table_footnotes): + fntext = footnotetext('', *footnote.children, ids=footnote['ids']) + tbody.insert(0, fntext) + + self.table_footnotes = [] + + def visit_footnote(self, node: nodes.footnote) -> None: + self.restrict(node) + + def depart_footnote(self, node: nodes.footnote) -> None: + self.unrestrict(node) + + def visit_footnote_reference(self, node: nodes.footnote_reference) -> None: + number = node.astext().strip() + docname = node['docname'] + if (docname, number) in self.appeared: + footnote = self.appeared[(docname, number)] + footnote["referred"] = True + + mark = footnotemark('', number, refid=node['refid']) + node.replace_self(mark) + else: + footnote = self.get_footnote_by_reference(node) + if self.restricted: + mark = footnotemark('', number, refid=node['refid']) + node.replace_self(mark) + self.pendings.append(footnote) + else: + self.footnotes.remove(footnote) + node.replace_self(footnote) + footnote.walkabout(self) + + self.appeared[(docname, number)] = footnote + raise nodes.SkipNode + + def get_footnote_by_reference(self, node: nodes.footnote_reference) -> nodes.footnote: + docname = node['docname'] + for footnote in self.footnotes: + if docname == footnote['docname'] and footnote['ids'][0] == node['refid']: + return footnote + + raise ValueError(__('No footnote was found for given reference node %r') % node) + + +class BibliographyTransform(SphinxPostTransform): + """Gather bibliography entries to tail of document. + + Before:: + + <document> + <paragraph> + blah blah blah + <citation> + ... + <paragraph> + blah blah blah + <citation> + ... + ... + + After:: + + <document> + <paragraph> + blah blah blah + <paragraph> + blah blah blah + ... + <thebibliography> + <citation> + ... + <citation> + ... + """ + default_priority = 750 + formats = ('latex',) + + def run(self, **kwargs: Any) -> None: + citations = thebibliography() + for node in list(self.document.findall(nodes.citation)): + node.parent.remove(node) + citations += node + + if len(citations) > 0: + self.document += citations + + +class CitationReferenceTransform(SphinxPostTransform): + """Replace pending_xref nodes for citation by citation_reference. + + To handle citation reference easily on LaTeX writer, this converts + pending_xref nodes to citation_reference. + """ + default_priority = 5 # before ReferencesResolver + formats = ('latex',) + + def run(self, **kwargs: Any) -> None: + domain = cast(CitationDomain, self.env.get_domain('citation')) + matcher = NodeMatcher(addnodes.pending_xref, refdomain='citation', reftype='ref') + for node in self.document.findall(matcher): # type: addnodes.pending_xref + docname, labelid, _ = domain.citations.get(node['reftarget'], ('', '', 0)) + if docname: + citation_ref = nodes.citation_reference('', '', *node.children, + docname=docname, refname=labelid) + node.replace_self(citation_ref) + + +class MathReferenceTransform(SphinxPostTransform): + """Replace pending_xref nodes for math by math_reference. + + To handle math reference easily on LaTeX writer, this converts pending_xref + nodes to math_reference. + """ + default_priority = 5 # before ReferencesResolver + formats = ('latex',) + + def run(self, **kwargs: Any) -> None: + equations = self.env.get_domain('math').data['objects'] + for node in self.document.findall(addnodes.pending_xref): + if node['refdomain'] == 'math' and node['reftype'] in ('eq', 'numref'): + docname, _ = equations.get(node['reftarget'], (None, None)) + if docname: + refnode = math_reference('', docname=docname, target=node['reftarget']) + node.replace_self(refnode) + + +class LiteralBlockTransform(SphinxPostTransform): + """Replace container nodes for literal_block by captioned_literal_block.""" + default_priority = 400 + formats = ('latex',) + + def run(self, **kwargs: Any) -> None: + matcher = NodeMatcher(nodes.container, literal_block=True) + for node in self.document.findall(matcher): # type: nodes.container + newnode = captioned_literal_block('', *node.children, **node.attributes) + node.replace_self(newnode) + + +class DocumentTargetTransform(SphinxPostTransform): + """Add :doc label to the first section of each document.""" + default_priority = 400 + formats = ('latex',) + + def run(self, **kwargs: Any) -> None: + for node in self.document.findall(addnodes.start_of_file): + section = node.next_node(nodes.section) + if section: + section['ids'].append(':doc') # special label for :doc: + + +class IndexInSectionTitleTransform(SphinxPostTransform): + """Move index nodes in section title to outside of the title. + + LaTeX index macro is not compatible with some handling of section titles + such as uppercasing done on LaTeX side (cf. fncychap handling of ``\\chapter``). + Moving the index node to after the title node fixes that. + + Before:: + + <section> + <title> + blah blah <index entries=[...]/>blah + <paragraph> + blah blah blah + ... + + After:: + + <section> + <title> + blah blah blah + <index entries=[...]/> + <paragraph> + blah blah blah + ... + """ + default_priority = 400 + formats = ('latex',) + + def run(self, **kwargs: Any) -> None: + for node in list(self.document.findall(nodes.title)): + if isinstance(node.parent, nodes.section): + for i, index in enumerate(node.findall(addnodes.index)): + # move the index node next to the section title + node.remove(index) + node.parent.insert(i + 1, index) + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_transform(FootnoteDocnameUpdater) + app.add_post_transform(SubstitutionDefinitionsRemover) + app.add_post_transform(BibliographyTransform) + app.add_post_transform(CitationReferenceTransform) + app.add_post_transform(DocumentTargetTransform) + app.add_post_transform(IndexInSectionTitleTransform) + app.add_post_transform(LaTeXFootnoteTransform) + app.add_post_transform(LiteralBlockTransform) + app.add_post_transform(MathReferenceTransform) + app.add_post_transform(ShowUrlsTransform) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/latex/util.py b/sphinx/builders/latex/util.py new file mode 100644 index 0000000..01597f9 --- /dev/null +++ b/sphinx/builders/latex/util.py @@ -0,0 +1,48 @@ +"""Utilities for LaTeX builder.""" + +from __future__ import annotations + +from docutils.writers.latex2e import Babel + + +class ExtBabel(Babel): + cyrillic_languages = ('bulgarian', 'kazakh', 'mongolian', 'russian', 'ukrainian') + + def __init__(self, language_code: str, use_polyglossia: bool = False) -> None: + self.language_code = language_code + self.use_polyglossia = use_polyglossia + self.supported = True + super().__init__(language_code) + + def uses_cyrillic(self) -> bool: + return self.language in self.cyrillic_languages + + def is_supported_language(self) -> bool: + return self.supported + + def language_name(self, language_code: str) -> str: + language = super().language_name(language_code) + if language == 'ngerman' and self.use_polyglossia: + # polyglossia calls new orthography (Neue Rechtschreibung) as + # german (with new spelling option). + return 'german' + elif language: + return language + elif language_code.startswith('zh'): + return 'english' # fallback to english (behaves like supported) + else: + self.supported = False + return 'english' # fallback to english + + def get_mainlanguage_options(self) -> str | None: + """Return options for polyglossia's ``\\setmainlanguage``.""" + if self.use_polyglossia is False: + return None + elif self.language == 'german': + language = super().language_name(self.language_code) + if language == 'ngerman': + return 'spelling=new' + else: + return 'spelling=old' + else: + return None diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py new file mode 100644 index 0000000..f250958 --- /dev/null +++ b/sphinx/builders/linkcheck.py @@ -0,0 +1,641 @@ +"""The CheckExternalLinksBuilder class.""" + +from __future__ import annotations + +import contextlib +import json +import re +import socket +import time +from html.parser import HTMLParser +from os import path +from queue import PriorityQueue, Queue +from threading import Thread +from typing import TYPE_CHECKING, NamedTuple, cast +from urllib.parse import unquote, urlparse, urlsplit, urlunparse + +from docutils import nodes +from requests.exceptions import ConnectionError, HTTPError, SSLError, TooManyRedirects + +from sphinx.builders.dummy import DummyBuilder +from sphinx.locale import __ +from sphinx.transforms.post_transforms import SphinxPostTransform +from sphinx.util import encode_uri, logging, requests +from sphinx.util.console import ( # type: ignore[attr-defined] + darkgray, + darkgreen, + purple, + red, + turquoise, +) +from sphinx.util.http_date import rfc1123_to_epoch +from sphinx.util.nodes import get_node_line + +if TYPE_CHECKING: + from collections.abc import Generator, Iterator + from typing import Any, Callable + + from requests import Response + + from sphinx.application import Sphinx + from sphinx.config import Config + +logger = logging.getLogger(__name__) + +uri_re = re.compile('([a-z]+:)?//') # matches to foo:// and // (a protocol relative URL) + +DEFAULT_REQUEST_HEADERS = { + 'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8', +} +CHECK_IMMEDIATELY = 0 +QUEUE_POLL_SECS = 1 +DEFAULT_DELAY = 60.0 + + +class CheckExternalLinksBuilder(DummyBuilder): + """ + Checks for broken external links. + """ + name = 'linkcheck' + epilog = __('Look for any errors in the above output or in ' + '%(outdir)s/output.txt') + + def init(self) -> None: + self.broken_hyperlinks = 0 + self.hyperlinks: dict[str, Hyperlink] = {} + # set a timeout for non-responding servers + socket.setdefaulttimeout(5.0) + + def finish(self) -> None: + checker = HyperlinkAvailabilityChecker(self.config) + logger.info('') + + output_text = path.join(self.outdir, 'output.txt') + output_json = path.join(self.outdir, 'output.json') + with open(output_text, 'w', encoding='utf-8') as self.txt_outfile, \ + open(output_json, 'w', encoding='utf-8') as self.json_outfile: + for result in checker.check(self.hyperlinks): + self.process_result(result) + + if self.broken_hyperlinks: + self.app.statuscode = 1 + + def process_result(self, result: CheckResult) -> None: + filename = self.env.doc2path(result.docname, False) + + linkstat = {'filename': filename, 'lineno': result.lineno, + 'status': result.status, 'code': result.code, 'uri': result.uri, + 'info': result.message} + self.write_linkstat(linkstat) + + if result.status == 'unchecked': + return + if result.status == 'working' and result.message == 'old': + return + if result.lineno: + logger.info('(%16s: line %4d) ', result.docname, result.lineno, nonl=True) + if result.status == 'ignored': + if result.message: + logger.info(darkgray('-ignored- ') + result.uri + ': ' + result.message) + else: + logger.info(darkgray('-ignored- ') + result.uri) + elif result.status == 'local': + logger.info(darkgray('-local- ') + result.uri) + self.write_entry('local', result.docname, filename, result.lineno, result.uri) + elif result.status == 'working': + logger.info(darkgreen('ok ') + result.uri + result.message) + elif result.status == 'broken': + if self.app.quiet or self.app.warningiserror: + logger.warning(__('broken link: %s (%s)'), result.uri, result.message, + location=(result.docname, result.lineno)) + else: + logger.info(red('broken ') + result.uri + red(' - ' + result.message)) + self.write_entry('broken', result.docname, filename, result.lineno, + result.uri + ': ' + result.message) + self.broken_hyperlinks += 1 + elif result.status == 'redirected': + try: + text, color = { + 301: ('permanently', purple), + 302: ('with Found', purple), + 303: ('with See Other', purple), + 307: ('temporarily', turquoise), + 308: ('permanently', purple), + }[result.code] + except KeyError: + text, color = ('with unknown code', purple) + linkstat['text'] = text + if self.config.linkcheck_allowed_redirects: + logger.warning('redirect ' + result.uri + ' - ' + text + ' to ' + + result.message, location=(result.docname, result.lineno)) + else: + logger.info(color('redirect ') + result.uri + + color(' - ' + text + ' to ' + result.message)) + self.write_entry('redirected ' + text, result.docname, filename, + result.lineno, result.uri + ' to ' + result.message) + else: + raise ValueError('Unknown status %s.' % result.status) + + def write_linkstat(self, data: dict) -> None: + self.json_outfile.write(json.dumps(data)) + self.json_outfile.write('\n') + + def write_entry(self, what: str, docname: str, filename: str, line: int, + uri: str) -> None: + self.txt_outfile.write(f'{filename}:{line}: [{what}] {uri}\n') + + +class HyperlinkCollector(SphinxPostTransform): + builders = ('linkcheck',) + default_priority = 800 + + def run(self, **kwargs: Any) -> None: + builder = cast(CheckExternalLinksBuilder, self.app.builder) + hyperlinks = builder.hyperlinks + docname = self.env.docname + + # reference nodes + for refnode in self.document.findall(nodes.reference): + if 'refuri' in refnode: + uri = refnode['refuri'] + _add_uri(self.app, uri, refnode, hyperlinks, docname) + + # image nodes + for imgnode in self.document.findall(nodes.image): + uri = imgnode['candidates'].get('?') + if uri and '://' in uri: + _add_uri(self.app, uri, imgnode, hyperlinks, docname) + + # raw nodes + for rawnode in self.document.findall(nodes.raw): + uri = rawnode.get('source') + if uri and '://' in uri: + _add_uri(self.app, uri, rawnode, hyperlinks, docname) + + +def _add_uri(app: Sphinx, uri: str, node: nodes.Element, + hyperlinks: dict[str, Hyperlink], docname: str) -> None: + if newuri := app.emit_firstresult('linkcheck-process-uri', uri): + uri = newuri + + try: + lineno = get_node_line(node) + except ValueError: + lineno = -1 + + if uri not in hyperlinks: + hyperlinks[uri] = Hyperlink(uri, docname, app.env.doc2path(docname), lineno) + + +class Hyperlink(NamedTuple): + uri: str + docname: str + docpath: str + lineno: int + + +class HyperlinkAvailabilityChecker: + def __init__(self, config: Config) -> None: + self.config = config + self.rate_limits: dict[str, RateLimit] = {} + self.rqueue: Queue[CheckResult] = Queue() + self.workers: list[Thread] = [] + self.wqueue: PriorityQueue[CheckRequest] = PriorityQueue() + self.num_workers: int = config.linkcheck_workers + + self.to_ignore: list[re.Pattern[str]] = list(map(re.compile, + self.config.linkcheck_ignore)) + + def check(self, hyperlinks: dict[str, Hyperlink]) -> Generator[CheckResult, None, None]: + self.invoke_threads() + + total_links = 0 + for hyperlink in hyperlinks.values(): + if self.is_ignored_uri(hyperlink.uri): + yield CheckResult(hyperlink.uri, hyperlink.docname, hyperlink.lineno, + 'ignored', '', 0) + else: + self.wqueue.put(CheckRequest(CHECK_IMMEDIATELY, hyperlink), False) + total_links += 1 + + done = 0 + while done < total_links: + yield self.rqueue.get() + done += 1 + + self.shutdown_threads() + + def invoke_threads(self) -> None: + for _i in range(self.num_workers): + thread = HyperlinkAvailabilityCheckWorker(self.config, + self.rqueue, self.wqueue, + self.rate_limits) + thread.start() + self.workers.append(thread) + + def shutdown_threads(self) -> None: + self.wqueue.join() + for _worker in self.workers: + self.wqueue.put(CheckRequest(CHECK_IMMEDIATELY, None), False) + + def is_ignored_uri(self, uri: str) -> bool: + return any(pat.match(uri) for pat in self.to_ignore) + + +class CheckRequest(NamedTuple): + next_check: float + hyperlink: Hyperlink | None + + +class CheckResult(NamedTuple): + uri: str + docname: str + lineno: int + status: str + message: str + code: int + + +class HyperlinkAvailabilityCheckWorker(Thread): + """A worker class for checking the availability of hyperlinks.""" + + def __init__(self, config: Config, + rqueue: Queue[CheckResult], + wqueue: Queue[CheckRequest], + rate_limits: dict[str, RateLimit]) -> None: + self.rate_limits = rate_limits + self.rqueue = rqueue + self.wqueue = wqueue + + self.anchors_ignore: list[re.Pattern[str]] = list( + map(re.compile, config.linkcheck_anchors_ignore)) + self.anchors_ignore_for_url: list[re.Pattern[str]] = list( + map(re.compile, config.linkcheck_anchors_ignore_for_url)) + self.documents_exclude: list[re.Pattern[str]] = list( + map(re.compile, config.linkcheck_exclude_documents)) + self.auth = [(re.compile(pattern), auth_info) for pattern, auth_info + in config.linkcheck_auth] + + self.timeout: int | float | None = config.linkcheck_timeout + self.request_headers: dict[str, dict[str, str]] = config.linkcheck_request_headers + self.check_anchors: bool = config.linkcheck_anchors + self.allowed_redirects: dict[re.Pattern[str], re.Pattern[str]] + self.allowed_redirects = config.linkcheck_allowed_redirects + self.retries: int = config.linkcheck_retries + self.rate_limit_timeout = config.linkcheck_rate_limit_timeout + + self.user_agent = config.user_agent + self.tls_verify = config.tls_verify + self.tls_cacerts = config.tls_cacerts + + self._session = requests._Session() + + super().__init__(daemon=True) + + def run(self) -> None: + while True: + next_check, hyperlink = self.wqueue.get() + if hyperlink is None: + # An empty hyperlink is a signal to shutdown the worker; cleanup resources here + self._session.close() + break + + uri, docname, _docpath, lineno = hyperlink + if uri is None: + break + + netloc = urlsplit(uri).netloc + with contextlib.suppress(KeyError): + # Refresh rate limit. + # When there are many links in the queue, workers are all stuck waiting + # for responses, but the builder keeps queuing. Links in the queue may + # have been queued before rate limits were discovered. + next_check = self.rate_limits[netloc].next_check + if next_check > time.time(): + # Sleep before putting message back in the queue to avoid + # waking up other threads. + time.sleep(QUEUE_POLL_SECS) + self.wqueue.put(CheckRequest(next_check, hyperlink), False) + self.wqueue.task_done() + continue + status, info, code = self._check(docname, uri, hyperlink) + if status == 'rate-limited': + logger.info(darkgray('-rate limited- ') + uri + darkgray(' | sleeping...')) + else: + self.rqueue.put(CheckResult(uri, docname, lineno, status, info, code)) + self.wqueue.task_done() + + def _check(self, docname: str, uri: str, hyperlink: Hyperlink) -> tuple[str, str, int]: + # check for various conditions without bothering the network + + for doc_matcher in self.documents_exclude: + if doc_matcher.match(docname): + info = ( + f'{docname} matched {doc_matcher.pattern} from ' + 'linkcheck_exclude_documents' + ) + return 'ignored', info, 0 + + if len(uri) == 0 or uri.startswith(('#', 'mailto:', 'tel:')): + return 'unchecked', '', 0 + if not uri.startswith(('http:', 'https:')): + if uri_re.match(uri): + # Non-supported URI schemes (ex. ftp) + return 'unchecked', '', 0 + + src_dir = path.dirname(hyperlink.docpath) + if path.exists(path.join(src_dir, uri)): + return 'working', '', 0 + return 'broken', '', 0 + + # need to actually check the URI + status, info, code = '', '', 0 + for _ in range(self.retries): + status, info, code = self._check_uri(uri, hyperlink) + if status != 'broken': + break + + return status, info, code + + def _retrieval_methods(self, + check_anchors: bool, + anchor: str) -> Iterator[tuple[Callable, dict]]: + if not check_anchors or not anchor: + yield self._session.head, {'allow_redirects': True} + yield self._session.get, {'stream': True} + + def _check_uri(self, uri: str, hyperlink: Hyperlink) -> tuple[str, str, int]: + req_url, delimiter, anchor = uri.partition('#') + if delimiter and anchor: + for rex in self.anchors_ignore: + if rex.match(anchor): + anchor = '' + break + else: + for rex in self.anchors_ignore_for_url: + if rex.match(req_url): + anchor = '' + break + + # handle non-ASCII URIs + try: + req_url.encode('ascii') + except UnicodeError: + req_url = encode_uri(req_url) + + # Get auth info, if any + for pattern, auth_info in self.auth: # noqa: B007 (false positive) + if pattern.match(uri): + break + else: + auth_info = None + + # update request headers for the URL + headers = _get_request_headers(uri, self.request_headers) + + # Linkcheck HTTP request logic: + # + # - Attempt HTTP HEAD before HTTP GET unless page content is required. + # - Follow server-issued HTTP redirects. + # - Respect server-issued HTTP 429 back-offs. + error_message = '' + status_code = -1 + response_url = retry_after = '' + for retrieval_method, kwargs in self._retrieval_methods(self.check_anchors, anchor): + try: + with retrieval_method( + url=req_url, auth=auth_info, + headers=headers, + timeout=self.timeout, + **kwargs, + _user_agent=self.user_agent, + _tls_info=(self.tls_verify, self.tls_cacerts), + ) as response: + if (self.check_anchors and response.ok and anchor + and not contains_anchor(response, anchor)): + raise Exception(__(f'Anchor {anchor!r} not found')) + + # Copy data we need from the (closed) response + status_code = response.status_code + redirect_status_code = response.history[-1].status_code if response.history else None # NoQA: E501 + retry_after = response.headers.get('Retry-After') + response_url = f'{response.url}' + response.raise_for_status() + del response + break + + except SSLError as err: + # SSL failure; report that the link is broken. + return 'broken', str(err), 0 + + except (ConnectionError, TooManyRedirects) as err: + # Servers drop the connection on HEAD requests, causing + # ConnectionError. + error_message = str(err) + continue + + except HTTPError as err: + error_message = str(err) + + # Unauthorised: the reference probably exists + if status_code == 401: + return 'working', 'unauthorized', 0 + + # Rate limiting; back-off if allowed, or report failure otherwise + if status_code == 429: + if next_check := self.limit_rate(response_url, retry_after): + self.wqueue.put(CheckRequest(next_check, hyperlink), False) + return 'rate-limited', '', 0 + return 'broken', error_message, 0 + + # Don't claim success/failure during server-side outages + if status_code == 503: + return 'ignored', 'service unavailable', 0 + + # For most HTTP failures, continue attempting alternate retrieval methods + continue + + except Exception as err: + # Unhandled exception (intermittent or permanent); report that + # the link is broken. + return 'broken', str(err), 0 + + else: + # All available retrieval methods have been exhausted; report + # that the link is broken. + return 'broken', error_message, 0 + + # Success; clear rate limits for the origin + netloc = urlsplit(req_url).netloc + self.rate_limits.pop(netloc, None) + + if ((response_url.rstrip('/') == req_url.rstrip('/')) + or _allowed_redirect(req_url, response_url, + self.allowed_redirects)): + return 'working', '', 0 + elif redirect_status_code is not None: + return 'redirected', response_url, redirect_status_code + else: + return 'redirected', response_url, 0 + + def limit_rate(self, response_url: str, retry_after: str) -> float | None: + delay = DEFAULT_DELAY + next_check = None + if retry_after: + try: + # Integer: time to wait before next attempt. + delay = float(retry_after) + except ValueError: + try: + # An HTTP-date: time of next attempt. + next_check = rfc1123_to_epoch(retry_after) + except (ValueError, TypeError): + # TypeError: Invalid date format. + # ValueError: Invalid date, e.g. Oct 52th. + pass + else: + delay = next_check - time.time() + else: + next_check = time.time() + delay + netloc = urlsplit(response_url).netloc + if next_check is None: + max_delay = self.rate_limit_timeout + try: + rate_limit = self.rate_limits[netloc] + except KeyError: + delay = DEFAULT_DELAY + else: + last_wait_time = rate_limit.delay + delay = 2.0 * last_wait_time + if delay > max_delay > last_wait_time: + delay = max_delay + if delay > max_delay: + return None + next_check = time.time() + delay + self.rate_limits[netloc] = RateLimit(delay, next_check) + return next_check + + +def _get_request_headers( + uri: str, + request_headers: dict[str, dict[str, str]], +) -> dict[str, str]: + url = urlsplit(uri) + candidates = (f'{url.scheme}://{url.netloc}', + f'{url.scheme}://{url.netloc}/', + uri, + '*') + + for u in candidates: + if u in request_headers: + return {**DEFAULT_REQUEST_HEADERS, **request_headers[u]} + return {} + + +def contains_anchor(response: Response, anchor: str) -> bool: + """Determine if an anchor is contained within an HTTP response.""" + + parser = AnchorCheckParser(unquote(anchor)) + # Read file in chunks. If we find a matching anchor, we break + # the loop early in hopes not to have to download the whole thing. + for chunk in response.iter_content(chunk_size=4096, decode_unicode=True): + if isinstance(chunk, bytes): # requests failed to decode + chunk = chunk.decode() # manually try to decode it + + parser.feed(chunk) + if parser.found: + break + parser.close() + return parser.found + + +class AnchorCheckParser(HTMLParser): + """Specialised HTML parser that looks for a specific anchor.""" + + def __init__(self, search_anchor: str) -> None: + super().__init__() + + self.search_anchor = search_anchor + self.found = False + + def handle_starttag(self, tag: Any, attrs: Any) -> None: + for key, value in attrs: + if key in ('id', 'name') and value == self.search_anchor: + self.found = True + break + + +def _allowed_redirect(url: str, new_url: str, + allowed_redirects: dict[re.Pattern[str], re.Pattern[str]]) -> bool: + return any( + from_url.match(url) and to_url.match(new_url) + for from_url, to_url + in allowed_redirects.items() + ) + + +class RateLimit(NamedTuple): + delay: float + next_check: float + + +def rewrite_github_anchor(app: Sphinx, uri: str) -> str | None: + """Rewrite anchor name of the hyperlink to github.com + + The hyperlink anchors in github.com are dynamically generated. This rewrites + them before checking and makes them comparable. + """ + parsed = urlparse(uri) + if parsed.hostname == 'github.com' and parsed.fragment: + prefixed = parsed.fragment.startswith('user-content-') + if not prefixed: + fragment = f'user-content-{parsed.fragment}' + return urlunparse(parsed._replace(fragment=fragment)) + return None + + +def compile_linkcheck_allowed_redirects(app: Sphinx, config: Config) -> None: + """Compile patterns in linkcheck_allowed_redirects to the regexp objects.""" + for url, pattern in list(app.config.linkcheck_allowed_redirects.items()): + try: + app.config.linkcheck_allowed_redirects[re.compile(url)] = re.compile(pattern) + except re.error as exc: + logger.warning(__('Failed to compile regex in linkcheck_allowed_redirects: %r %s'), + exc.pattern, exc.msg) + finally: + # Remove the original regexp-string + app.config.linkcheck_allowed_redirects.pop(url) + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_builder(CheckExternalLinksBuilder) + app.add_post_transform(HyperlinkCollector) + + app.add_config_value('linkcheck_ignore', [], False) + app.add_config_value('linkcheck_exclude_documents', [], False) + app.add_config_value('linkcheck_allowed_redirects', {}, False) + app.add_config_value('linkcheck_auth', [], False) + app.add_config_value('linkcheck_request_headers', {}, False) + app.add_config_value('linkcheck_retries', 1, False) + app.add_config_value('linkcheck_timeout', None, False, [int, float]) + app.add_config_value('linkcheck_workers', 5, False) + app.add_config_value('linkcheck_anchors', True, False) + # Anchors starting with ! are ignored since they are + # commonly used for dynamic pages + app.add_config_value('linkcheck_anchors_ignore', ['^!'], False) + app.add_config_value('linkcheck_anchors_ignore_for_url', (), False, (tuple, list)) + app.add_config_value('linkcheck_rate_limit_timeout', 300.0, False) + + app.add_event('linkcheck-process-uri') + + app.connect('config-inited', compile_linkcheck_allowed_redirects, priority=800) + + # FIXME: Disable URL rewrite handler for github.com temporarily. + # ref: https://github.com/sphinx-doc/sphinx/issues/9435 + # app.connect('linkcheck-process-uri', rewrite_github_anchor) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/manpage.py b/sphinx/builders/manpage.py new file mode 100644 index 0000000..2d35d20 --- /dev/null +++ b/sphinx/builders/manpage.py @@ -0,0 +1,127 @@ +"""Manual pages builder.""" + +from __future__ import annotations + +import warnings +from os import path +from typing import TYPE_CHECKING, Any + +from docutils.frontend import OptionParser +from docutils.io import FileOutput + +from sphinx import addnodes +from sphinx.builders import Builder +from sphinx.locale import __ +from sphinx.util import logging +from sphinx.util.console import darkgreen # type: ignore[attr-defined] +from sphinx.util.display import progress_message +from sphinx.util.nodes import inline_all_toctrees +from sphinx.util.osutil import ensuredir, make_filename_from_project +from sphinx.writers.manpage import ManualPageTranslator, ManualPageWriter + +if TYPE_CHECKING: + from sphinx.application import Sphinx + from sphinx.config import Config + +logger = logging.getLogger(__name__) + + +class ManualPageBuilder(Builder): + """ + Builds groff output in manual page format. + """ + name = 'man' + format = 'man' + epilog = __('The manual pages are in %(outdir)s.') + + default_translator_class = ManualPageTranslator + supported_image_types: list[str] = [] + + def init(self) -> None: + if not self.config.man_pages: + logger.warning(__('no "man_pages" config value found; no manual pages ' + 'will be written')) + + def get_outdated_docs(self) -> str | list[str]: + return 'all manpages' # for now + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + return '' + + @progress_message(__('writing')) + def write(self, *ignored: Any) -> None: + docwriter = ManualPageWriter(self) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=DeprecationWarning) + # DeprecationWarning: The frontend.OptionParser class will be replaced + # by a subclass of argparse.ArgumentParser in Docutils 0.21 or later. + docsettings: Any = OptionParser( + defaults=self.env.settings, + components=(docwriter,), + read_config_files=True).get_default_values() + + for info in self.config.man_pages: + docname, name, description, authors, section = info + if docname not in self.env.all_docs: + logger.warning(__('"man_pages" config value references unknown ' + 'document %s'), docname) + continue + if isinstance(authors, str): + if authors: + authors = [authors] + else: + authors = [] + + docsettings.title = name + docsettings.subtitle = description + docsettings.authors = authors + docsettings.section = section + + if self.config.man_make_section_directory: + dirname = 'man%s' % section + ensuredir(path.join(self.outdir, dirname)) + targetname = f'{dirname}/{name}.{section}' + else: + targetname = f'{name}.{section}' + + logger.info(darkgreen(targetname) + ' { ', nonl=True) + destination = FileOutput( + destination_path=path.join(self.outdir, targetname), + encoding='utf-8') + + tree = self.env.get_doctree(docname) + docnames: set[str] = set() + largetree = inline_all_toctrees(self, docnames, docname, tree, + darkgreen, [docname]) + largetree.settings = docsettings + logger.info('} ', nonl=True) + self.env.resolve_references(largetree, docname, self) + # remove pending_xref nodes + for pendingnode in largetree.findall(addnodes.pending_xref): + pendingnode.replace_self(pendingnode.children) + + docwriter.write(largetree, destination) + + def finish(self) -> None: + pass + + +def default_man_pages(config: Config) -> list[tuple[str, str, str, list[str], int]]: + """ Better default man_pages settings. """ + filename = make_filename_from_project(config.project) + return [(config.root_doc, filename, f'{config.project} {config.release}', + [config.author], 1)] + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_builder(ManualPageBuilder) + + app.add_config_value('man_pages', default_man_pages, False) + app.add_config_value('man_show_urls', False, False) + app.add_config_value('man_make_section_directory', False, False) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/singlehtml.py b/sphinx/builders/singlehtml.py new file mode 100644 index 0000000..cd66953 --- /dev/null +++ b/sphinx/builders/singlehtml.py @@ -0,0 +1,202 @@ +"""Single HTML builders.""" + +from __future__ import annotations + +from os import path +from typing import TYPE_CHECKING, Any + +from docutils import nodes + +from sphinx.builders.html import StandaloneHTMLBuilder +from sphinx.environment.adapters.toctree import global_toctree_for_doc +from sphinx.locale import __ +from sphinx.util import logging +from sphinx.util.console import darkgreen # type: ignore[attr-defined] +from sphinx.util.display import progress_message +from sphinx.util.nodes import inline_all_toctrees + +if TYPE_CHECKING: + from docutils.nodes import Node + + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + + +class SingleFileHTMLBuilder(StandaloneHTMLBuilder): + """ + A StandaloneHTMLBuilder subclass that puts the whole document tree on one + HTML page. + """ + name = 'singlehtml' + epilog = __('The HTML page is in %(outdir)s.') + + copysource = False + + def get_outdated_docs(self) -> str | list[str]: # type: ignore[override] + return 'all documents' + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + if docname in self.env.all_docs: + # all references are on the same page... + return self.config.root_doc + self.out_suffix + \ + '#document-' + docname + else: + # chances are this is a html_additional_page + return docname + self.out_suffix + + def get_relative_uri(self, from_: str, to: str, typ: str | None = None) -> str: + # ignore source + return self.get_target_uri(to, typ) + + def fix_refuris(self, tree: Node) -> None: + # fix refuris with double anchor + fname = self.config.root_doc + self.out_suffix + for refnode in tree.findall(nodes.reference): + if 'refuri' not in refnode: + continue + refuri = refnode['refuri'] + hashindex = refuri.find('#') + if hashindex < 0: + continue + hashindex = refuri.find('#', hashindex + 1) + if hashindex >= 0: + refnode['refuri'] = fname + refuri[hashindex:] + + def _get_local_toctree(self, docname: str, collapse: bool = True, **kwargs: Any) -> str: + if isinstance(includehidden := kwargs.get('includehidden'), str): + if includehidden.lower() == 'false': + kwargs['includehidden'] = False + elif includehidden.lower() == 'true': + kwargs['includehidden'] = True + if kwargs.get('maxdepth') == '': + kwargs.pop('maxdepth') + toctree = global_toctree_for_doc(self.env, docname, self, collapse=collapse, **kwargs) + if toctree is not None: + self.fix_refuris(toctree) + return self.render_partial(toctree)['fragment'] + + def assemble_doctree(self) -> nodes.document: + master = self.config.root_doc + tree = self.env.get_doctree(master) + tree = inline_all_toctrees(self, set(), master, tree, darkgreen, [master]) + tree['docname'] = master + self.env.resolve_references(tree, master, self) + self.fix_refuris(tree) + return tree + + def assemble_toc_secnumbers(self) -> dict[str, dict[str, tuple[int, ...]]]: + # Assemble toc_secnumbers to resolve section numbers on SingleHTML. + # Merge all secnumbers to single secnumber. + # + # Note: current Sphinx has refid confliction in singlehtml mode. + # To avoid the problem, it replaces key of secnumbers to + # tuple of docname and refid. + # + # There are related codes in inline_all_toctres() and + # HTMLTranslter#add_secnumber(). + new_secnumbers: dict[str, tuple[int, ...]] = {} + for docname, secnums in self.env.toc_secnumbers.items(): + for id, secnum in secnums.items(): + alias = f"{docname}/{id}" + new_secnumbers[alias] = secnum + + return {self.config.root_doc: new_secnumbers} + + def assemble_toc_fignumbers(self) -> dict[str, dict[str, dict[str, tuple[int, ...]]]]: + # Assemble toc_fignumbers to resolve figure numbers on SingleHTML. + # Merge all fignumbers to single fignumber. + # + # Note: current Sphinx has refid confliction in singlehtml mode. + # To avoid the problem, it replaces key of secnumbers to + # tuple of docname and refid. + # + # There are related codes in inline_all_toctres() and + # HTMLTranslter#add_fignumber(). + new_fignumbers: dict[str, dict[str, tuple[int, ...]]] = {} + # {'foo': {'figure': {'id2': (2,), 'id1': (1,)}}, 'bar': {'figure': {'id1': (3,)}}} + for docname, fignumlist in self.env.toc_fignumbers.items(): + for figtype, fignums in fignumlist.items(): + alias = f"{docname}/{figtype}" + new_fignumbers.setdefault(alias, {}) + for id, fignum in fignums.items(): + new_fignumbers[alias][id] = fignum + + return {self.config.root_doc: new_fignumbers} + + def get_doc_context(self, docname: str, body: str, metatags: str) -> dict[str, Any]: + # no relation links... + toctree = global_toctree_for_doc(self.env, self.config.root_doc, self, collapse=False) + # if there is no toctree, toc is None + if toctree: + self.fix_refuris(toctree) + toc = self.render_partial(toctree)['fragment'] + display_toc = True + else: + toc = '' + display_toc = False + return { + 'parents': [], + 'prev': None, + 'next': None, + 'docstitle': None, + 'title': self.config.html_title, + 'meta': None, + 'body': body, + 'metatags': metatags, + 'rellinks': [], + 'sourcename': '', + 'toc': toc, + 'display_toc': display_toc, + } + + def write(self, *ignored: Any) -> None: + docnames = self.env.all_docs + + with progress_message(__('preparing documents')): + self.prepare_writing(docnames) # type: ignore[arg-type] + + with progress_message(__('assembling single document')): + doctree = self.assemble_doctree() + self.env.toc_secnumbers = self.assemble_toc_secnumbers() + self.env.toc_fignumbers = self.assemble_toc_fignumbers() + + with progress_message(__('writing')): + self.write_doc_serialized(self.config.root_doc, doctree) + self.write_doc(self.config.root_doc, doctree) + + def finish(self) -> None: + self.write_additional_files() + self.copy_image_files() + self.copy_download_files() + self.copy_static_files() + self.copy_extra_files() + self.write_buildinfo() + self.dump_inventory() + + @progress_message(__('writing additional files')) + def write_additional_files(self) -> None: + # no indices or search pages are supported + + # additional pages from conf.py + for pagename, template in self.config.html_additional_pages.items(): + logger.info(' ' + pagename, nonl=True) + self.handle_page(pagename, {}, template) + + if self.config.html_use_opensearch: + logger.info(' opensearch', nonl=True) + fn = path.join(self.outdir, '_static', 'opensearch.xml') + self.handle_page('opensearch', {}, 'opensearch.xml', outfilename=fn) + + +def setup(app: Sphinx) -> dict[str, Any]: + app.setup_extension('sphinx.builders.html') + + app.add_builder(SingleFileHTMLBuilder) + app.add_config_value('singlehtml_sidebars', lambda self: self.html_sidebars, 'html') + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/texinfo.py b/sphinx/builders/texinfo.py new file mode 100644 index 0000000..441b598 --- /dev/null +++ b/sphinx/builders/texinfo.py @@ -0,0 +1,229 @@ +"""Texinfo builder.""" + +from __future__ import annotations + +import os +import warnings +from os import path +from typing import TYPE_CHECKING, Any + +from docutils import nodes +from docutils.frontend import OptionParser +from docutils.io import FileOutput + +from sphinx import addnodes, package_dir +from sphinx.builders import Builder +from sphinx.environment.adapters.asset import ImageAdapter +from sphinx.errors import NoUri +from sphinx.locale import _, __ +from sphinx.util import logging +from sphinx.util.console import darkgreen # type: ignore[attr-defined] +from sphinx.util.display import progress_message, status_iterator +from sphinx.util.docutils import new_document +from sphinx.util.fileutil import copy_asset_file +from sphinx.util.nodes import inline_all_toctrees +from sphinx.util.osutil import SEP, ensuredir, make_filename_from_project +from sphinx.writers.texinfo import TexinfoTranslator, TexinfoWriter + +if TYPE_CHECKING: + from collections.abc import Iterable + + from docutils.nodes import Node + + from sphinx.application import Sphinx + from sphinx.config import Config + +logger = logging.getLogger(__name__) +template_dir = os.path.join(package_dir, 'templates', 'texinfo') + + +class TexinfoBuilder(Builder): + """ + Builds Texinfo output to create Info documentation. + """ + name = 'texinfo' + format = 'texinfo' + epilog = __('The Texinfo files are in %(outdir)s.') + if os.name == 'posix': + epilog += __("\nRun 'make' in that directory to run these through " + "makeinfo\n" + "(use 'make info' here to do that automatically).") + + supported_image_types = ['image/png', 'image/jpeg', + 'image/gif'] + default_translator_class = TexinfoTranslator + + def init(self) -> None: + self.docnames: Iterable[str] = [] + self.document_data: list[tuple[str, str, str, str, str, str, str, bool]] = [] + + def get_outdated_docs(self) -> str | list[str]: + return 'all documents' # for now + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + if docname not in self.docnames: + raise NoUri(docname, typ) + return '%' + docname + + def get_relative_uri(self, from_: str, to: str, typ: str | None = None) -> str: + # ignore source path + return self.get_target_uri(to, typ) + + def init_document_data(self) -> None: + preliminary_document_data = [list(x) for x in self.config.texinfo_documents] + if not preliminary_document_data: + logger.warning(__('no "texinfo_documents" config value found; no documents ' + 'will be written')) + return + # assign subdirs to titles + self.titles: list[tuple[str, str]] = [] + for entry in preliminary_document_data: + docname = entry[0] + if docname not in self.env.all_docs: + logger.warning(__('"texinfo_documents" config value references unknown ' + 'document %s'), docname) + continue + self.document_data.append(entry) # type: ignore[arg-type] + if docname.endswith(SEP + 'index'): + docname = docname[:-5] + self.titles.append((docname, entry[2])) + + def write(self, *ignored: Any) -> None: + self.init_document_data() + self.copy_assets() + for entry in self.document_data: + docname, targetname, title, author = entry[:4] + targetname += '.texi' + direntry = description = category = '' + if len(entry) > 6: + direntry, description, category = entry[4:7] + toctree_only = False + if len(entry) > 7: + toctree_only = entry[7] + destination = FileOutput( + destination_path=path.join(self.outdir, targetname), + encoding='utf-8') + with progress_message(__("processing %s") % targetname): + appendices = self.config.texinfo_appendices or [] + doctree = self.assemble_doctree(docname, toctree_only, appendices=appendices) + + with progress_message(__("writing")): + self.post_process_images(doctree) + docwriter = TexinfoWriter(self) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=DeprecationWarning) + # DeprecationWarning: The frontend.OptionParser class will be replaced + # by a subclass of argparse.ArgumentParser in Docutils 0.21 or later. + settings: Any = OptionParser( + defaults=self.env.settings, + components=(docwriter,), + read_config_files=True).get_default_values() + settings.author = author + settings.title = title + settings.texinfo_filename = targetname[:-5] + '.info' + settings.texinfo_elements = self.config.texinfo_elements + settings.texinfo_dir_entry = direntry or '' + settings.texinfo_dir_category = category or '' + settings.texinfo_dir_description = description or '' + settings.docname = docname + doctree.settings = settings + docwriter.write(doctree, destination) + self.copy_image_files(targetname[:-5]) + + def assemble_doctree( + self, indexfile: str, toctree_only: bool, appendices: list[str], + ) -> nodes.document: + self.docnames = set([indexfile] + appendices) + logger.info(darkgreen(indexfile) + " ", nonl=True) + tree = self.env.get_doctree(indexfile) + tree['docname'] = indexfile + if toctree_only: + # extract toctree nodes from the tree and put them in a + # fresh document + new_tree = new_document('<texinfo output>') + new_sect = nodes.section() + new_sect += nodes.title('<Set title in conf.py>', + '<Set title in conf.py>') + new_tree += new_sect + for node in tree.findall(addnodes.toctree): + new_sect += node + tree = new_tree + largetree = inline_all_toctrees(self, self.docnames, indexfile, tree, + darkgreen, [indexfile]) + largetree['docname'] = indexfile + for docname in appendices: + appendix = self.env.get_doctree(docname) + appendix['docname'] = docname + largetree.append(appendix) + logger.info('') + logger.info(__("resolving references...")) + self.env.resolve_references(largetree, indexfile, self) + # TODO: add support for external :ref:s + for pendingnode in largetree.findall(addnodes.pending_xref): + docname = pendingnode['refdocname'] + sectname = pendingnode['refsectname'] + newnodes: list[Node] = [nodes.emphasis(sectname, sectname)] + for subdir, title in self.titles: + if docname.startswith(subdir): + newnodes.append(nodes.Text(_(' (in '))) + newnodes.append(nodes.emphasis(title, title)) + newnodes.append(nodes.Text(')')) + break + else: + pass + pendingnode.replace_self(newnodes) + return largetree + + def copy_assets(self) -> None: + self.copy_support_files() + + def copy_image_files(self, targetname: str) -> None: + if self.images: + stringify_func = ImageAdapter(self.app.env).get_original_image_uri + for src in status_iterator(self.images, __('copying images... '), "brown", + len(self.images), self.app.verbosity, + stringify_func=stringify_func): + dest = self.images[src] + try: + imagedir = path.join(self.outdir, targetname + '-figures') + ensuredir(imagedir) + copy_asset_file(path.join(self.srcdir, src), + path.join(imagedir, dest)) + except Exception as err: + logger.warning(__('cannot copy image file %r: %s'), + path.join(self.srcdir, src), err) + + def copy_support_files(self) -> None: + try: + with progress_message(__('copying Texinfo support files')): + logger.info('Makefile ', nonl=True) + copy_asset_file(os.path.join(template_dir, 'Makefile'), self.outdir) + except OSError as err: + logger.warning(__("error writing file Makefile: %s"), err) + + +def default_texinfo_documents( + config: Config, +) -> list[tuple[str, str, str, str, str, str, str]]: + """ Better default texinfo_documents settings. """ + filename = make_filename_from_project(config.project) + return [(config.root_doc, filename, config.project, config.author, filename, + 'One line description of project', 'Miscellaneous')] + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_builder(TexinfoBuilder) + + app.add_config_value('texinfo_documents', default_texinfo_documents, False) + app.add_config_value('texinfo_appendices', [], False) + app.add_config_value('texinfo_elements', {}, False) + app.add_config_value('texinfo_domain_indices', True, False, [list]) + app.add_config_value('texinfo_show_urls', 'footnote', False) + app.add_config_value('texinfo_no_detailmenu', False, False) + app.add_config_value('texinfo_cross_references', True, False) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/text.py b/sphinx/builders/text.py new file mode 100644 index 0000000..43a8d1f --- /dev/null +++ b/sphinx/builders/text.py @@ -0,0 +1,94 @@ +"""Plain-text Sphinx builder.""" + +from __future__ import annotations + +from os import path +from typing import TYPE_CHECKING, Any + +from docutils.io import StringOutput + +from sphinx.builders import Builder +from sphinx.locale import __ +from sphinx.util import logging +from sphinx.util.osutil import ensuredir, os_path +from sphinx.writers.text import TextTranslator, TextWriter + +if TYPE_CHECKING: + from collections.abc import Iterator + + from docutils.nodes import Node + + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + + +class TextBuilder(Builder): + name = 'text' + format = 'text' + epilog = __('The text files are in %(outdir)s.') + + out_suffix = '.txt' + allow_parallel = True + default_translator_class = TextTranslator + + current_docname: str | None = None + + def init(self) -> None: + # section numbers for headings in the currently visited document + self.secnumbers: dict[str, tuple[int, ...]] = {} + + def get_outdated_docs(self) -> Iterator[str]: + for docname in self.env.found_docs: + if docname not in self.env.all_docs: + yield docname + continue + targetname = path.join(self.outdir, docname + self.out_suffix) + try: + targetmtime = path.getmtime(targetname) + except Exception: + targetmtime = 0 + try: + srcmtime = path.getmtime(self.env.doc2path(docname)) + if srcmtime > targetmtime: + yield docname + except OSError: + # source doesn't exist anymore + pass + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + return '' + + def prepare_writing(self, docnames: set[str]) -> None: + self.writer = TextWriter(self) + + def write_doc(self, docname: str, doctree: Node) -> None: + self.current_docname = docname + self.secnumbers = self.env.toc_secnumbers.get(docname, {}) + destination = StringOutput(encoding='utf-8') + self.writer.write(doctree, destination) + outfilename = path.join(self.outdir, os_path(docname) + self.out_suffix) + ensuredir(path.dirname(outfilename)) + try: + with open(outfilename, 'w', encoding='utf-8') as f: + f.write(self.writer.output) + except OSError as err: + logger.warning(__("error writing file %s: %s"), outfilename, err) + + def finish(self) -> None: + pass + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_builder(TextBuilder) + + app.add_config_value('text_sectionchars', '*=-~"+`', 'env') + app.add_config_value('text_newlines', 'unix', 'env') + app.add_config_value('text_add_secnumbers', True, 'env') + app.add_config_value('text_secnumber_suffix', '. ', 'env') + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/builders/xml.py b/sphinx/builders/xml.py new file mode 100644 index 0000000..5b88531 --- /dev/null +++ b/sphinx/builders/xml.py @@ -0,0 +1,123 @@ +"""Docutils-native XML and pseudo-XML builders.""" + +from __future__ import annotations + +from os import path +from typing import TYPE_CHECKING, Any + +from docutils import nodes +from docutils.io import StringOutput +from docutils.writers.docutils_xml import XMLTranslator + +from sphinx.builders import Builder +from sphinx.locale import __ +from sphinx.util import logging +from sphinx.util.osutil import ensuredir, os_path +from sphinx.writers.xml import PseudoXMLWriter, XMLWriter + +if TYPE_CHECKING: + from collections.abc import Iterator + + from docutils.nodes import Node + + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + + +class XMLBuilder(Builder): + """ + Builds Docutils-native XML. + """ + name = 'xml' + format = 'xml' + epilog = __('The XML files are in %(outdir)s.') + + out_suffix = '.xml' + allow_parallel = True + + _writer_class: type[XMLWriter] | type[PseudoXMLWriter] = XMLWriter + writer: XMLWriter | PseudoXMLWriter + default_translator_class = XMLTranslator + + def init(self) -> None: + pass + + def get_outdated_docs(self) -> Iterator[str]: + for docname in self.env.found_docs: + if docname not in self.env.all_docs: + yield docname + continue + targetname = path.join(self.outdir, docname + self.out_suffix) + try: + targetmtime = path.getmtime(targetname) + except Exception: + targetmtime = 0 + try: + srcmtime = path.getmtime(self.env.doc2path(docname)) + if srcmtime > targetmtime: + yield docname + except OSError: + # source doesn't exist anymore + pass + + def get_target_uri(self, docname: str, typ: str | None = None) -> str: + return docname + + def prepare_writing(self, docnames: set[str]) -> None: + self.writer = self._writer_class(self) + + def write_doc(self, docname: str, doctree: Node) -> None: + # work around multiple string % tuple issues in docutils; + # replace tuples in attribute values with lists + doctree = doctree.deepcopy() + for domain in self.env.domains.values(): + xmlns = "xmlns:" + domain.name + doctree[xmlns] = "https://www.sphinx-doc.org/" # type: ignore[index] + for node in doctree.findall(nodes.Element): + for att, value in node.attributes.items(): + if isinstance(value, tuple): + node.attributes[att] = list(value) + value = node.attributes[att] + if isinstance(value, list): + for i, val in enumerate(value): + if isinstance(val, tuple): + value[i] = list(val) + destination = StringOutput(encoding='utf-8') + self.writer.write(doctree, destination) + outfilename = path.join(self.outdir, os_path(docname) + self.out_suffix) + ensuredir(path.dirname(outfilename)) + try: + with open(outfilename, 'w', encoding='utf-8') as f: + f.write(self.writer.output) + except OSError as err: + logger.warning(__("error writing file %s: %s"), outfilename, err) + + def finish(self) -> None: + pass + + +class PseudoXMLBuilder(XMLBuilder): + """ + Builds pseudo-XML for display purposes. + """ + name = 'pseudoxml' + format = 'pseudoxml' + epilog = __('The pseudo-XML files are in %(outdir)s.') + + out_suffix = '.pseudoxml' + + _writer_class = PseudoXMLWriter + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_builder(XMLBuilder) + app.add_builder(PseudoXMLBuilder) + + app.add_config_value('xml_pretty', True, 'env') + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } -- cgit v1.2.3