diff options
Diffstat (limited to 'sphinx/transforms/post_transforms')
-rw-r--r-- | sphinx/transforms/post_transforms/__init__.py | 297 | ||||
-rw-r--r-- | sphinx/transforms/post_transforms/code.py | 139 | ||||
-rw-r--r-- | sphinx/transforms/post_transforms/images.py | 280 |
3 files changed, 716 insertions, 0 deletions
diff --git a/sphinx/transforms/post_transforms/__init__.py b/sphinx/transforms/post_transforms/__init__.py new file mode 100644 index 0000000..485f1f1 --- /dev/null +++ b/sphinx/transforms/post_transforms/__init__.py @@ -0,0 +1,297 @@ +"""Docutils transforms used by Sphinx.""" + +from __future__ import annotations + +import re +from typing import TYPE_CHECKING, Any, cast + +from docutils import nodes +from docutils.nodes import Element, Node + +from sphinx import addnodes +from sphinx.errors import NoUri +from sphinx.locale import __ +from sphinx.transforms import SphinxTransform +from sphinx.util import logging +from sphinx.util.docutils import SphinxTranslator +from sphinx.util.nodes import find_pending_xref_condition, process_only_nodes + +if TYPE_CHECKING: + from collections.abc import Sequence + + from sphinx.addnodes import pending_xref + from sphinx.application import Sphinx + from sphinx.domains import Domain + +logger = logging.getLogger(__name__) + + +class SphinxPostTransform(SphinxTransform): + """A base class of post-transforms. + + Post transforms are invoked to modify the document to restructure it for outputting. + They resolve references, convert images, do special transformation for each output + formats and so on. This class helps to implement these post transforms. + """ + builders: tuple[str, ...] = () + formats: tuple[str, ...] = () + + def apply(self, **kwargs: Any) -> None: + if self.is_supported(): + self.run(**kwargs) + + def is_supported(self) -> bool: + """Check this transform working for current builder.""" + if self.builders and self.app.builder.name not in self.builders: + return False + if self.formats and self.app.builder.format not in self.formats: + return False + + return True + + def run(self, **kwargs: Any) -> None: + """Main method of post transforms. + + Subclasses should override this method instead of ``apply()``. + """ + raise NotImplementedError + + +class ReferencesResolver(SphinxPostTransform): + """ + Resolves cross-references on doctrees. + """ + + default_priority = 10 + + def run(self, **kwargs: Any) -> None: + for node in self.document.findall(addnodes.pending_xref): + content = self.find_pending_xref_condition(node, ("resolved", "*")) + if content: + contnode = cast(Element, content[0].deepcopy()) + else: + contnode = cast(Element, node[0].deepcopy()) + + newnode = None + + typ = node['reftype'] + target = node['reftarget'] + node.setdefault('refdoc', self.env.docname) + refdoc = node.get('refdoc') + domain = None + + try: + if 'refdomain' in node and node['refdomain']: + # let the domain try to resolve the reference + try: + domain = self.env.domains[node['refdomain']] + except KeyError as exc: + raise NoUri(target, typ) from exc + newnode = domain.resolve_xref(self.env, refdoc, self.app.builder, + typ, target, node, contnode) + # really hardwired reference types + elif typ == 'any': + newnode = self.resolve_anyref(refdoc, node, contnode) + # no new node found? try the missing-reference event + if newnode is None: + newnode = self.app.emit_firstresult('missing-reference', self.env, + node, contnode, + allowed_exceptions=(NoUri,)) + # still not found? warn if node wishes to be warned about or + # we are in nit-picky mode + if newnode is None: + self.warn_missing_reference(refdoc, typ, target, node, domain) + except NoUri: + newnode = None + + if newnode: + newnodes: list[Node] = [newnode] + else: + newnodes = [contnode] + if newnode is None and isinstance(node[0], addnodes.pending_xref_condition): + matched = self.find_pending_xref_condition(node, ("*",)) + if matched: + newnodes = matched + else: + logger.warning(__('Could not determine the fallback text for the ' + 'cross-reference. Might be a bug.'), location=node) + + node.replace_self(newnodes) + + def resolve_anyref( + self, refdoc: str, node: pending_xref, contnode: Element, + ) -> Element | None: + """Resolve reference generated by the "any" role.""" + stddomain = self.env.get_domain('std') + target = node['reftarget'] + results: list[tuple[str, Element]] = [] + # first, try resolving as :doc: + doc_ref = stddomain.resolve_xref(self.env, refdoc, self.app.builder, + 'doc', target, node, contnode) + if doc_ref: + results.append(('doc', doc_ref)) + # next, do the standard domain (makes this a priority) + results.extend(stddomain.resolve_any_xref(self.env, refdoc, self.app.builder, + target, node, contnode)) + for domain in self.env.domains.values(): + if domain.name == 'std': + continue # we did this one already + try: + results.extend(domain.resolve_any_xref(self.env, refdoc, self.app.builder, + target, node, contnode)) + except NotImplementedError: + # the domain doesn't yet support the new interface + # we have to manually collect possible references (SLOW) + for role in domain.roles: + res = domain.resolve_xref(self.env, refdoc, self.app.builder, + role, target, node, contnode) + if res and len(res) > 0 and isinstance(res[0], nodes.Element): + results.append((f'{domain.name}:{role}', res)) + # now, see how many matches we got... + if not results: + return None + if len(results) > 1: + def stringify(name: str, node: Element) -> str: + reftitle = node.get('reftitle', node.astext()) + return f':{name}:`{reftitle}`' + candidates = ' or '.join(stringify(name, role) for name, role in results) + logger.warning(__("more than one target found for 'any' cross-" + 'reference %r: could be %s'), target, candidates, + location=node) + res_role, newnode = results[0] + # Override "any" class with the actual role type to get the styling + # approximately correct. + res_domain = res_role.split(':')[0] + if (len(newnode) > 0 and + isinstance(newnode[0], nodes.Element) and + newnode[0].get('classes')): + newnode[0]['classes'].append(res_domain) + newnode[0]['classes'].append(res_role.replace(':', '-')) + return newnode + + def warn_missing_reference(self, refdoc: str, typ: str, target: str, + node: pending_xref, domain: Domain | None) -> None: + warn = node.get('refwarn') + if self.config.nitpicky: + warn = True + dtype = f'{domain.name}:{typ}' if domain else typ + if self.config.nitpick_ignore: + if (dtype, target) in self.config.nitpick_ignore: + warn = False + # for "std" types also try without domain name + if (not domain or domain.name == 'std') and \ + (typ, target) in self.config.nitpick_ignore: + warn = False + if self.config.nitpick_ignore_regex: + def matches_ignore(entry_type: str, entry_target: str) -> bool: + return any( + ( + re.fullmatch(ignore_type, entry_type) + and re.fullmatch(ignore_target, entry_target) + ) + for ignore_type, ignore_target + in self.config.nitpick_ignore_regex + ) + if matches_ignore(dtype, target): + warn = False + # for "std" types also try without domain name + if (not domain or domain.name == 'std') and \ + matches_ignore(typ, target): + warn = False + if not warn: + return + + if self.app.emit_firstresult('warn-missing-reference', domain, node): + return + elif domain and typ in domain.dangling_warnings: + msg = domain.dangling_warnings[typ] % {'target': target} + elif node.get('refdomain', 'std') not in ('', 'std'): + msg = (__('%s:%s reference target not found: %s') % + (node['refdomain'], typ, target)) + else: + msg = __('%r reference target not found: %s') % (typ, target) + logger.warning(msg, location=node, type='ref', subtype=typ) + + def find_pending_xref_condition(self, node: pending_xref, conditions: Sequence[str], + ) -> list[Node] | None: + for condition in conditions: + matched = find_pending_xref_condition(node, condition) + if matched: + return matched.children + return None + + +class OnlyNodeTransform(SphinxPostTransform): + default_priority = 50 + + def run(self, **kwargs: Any) -> None: + # A comment on the comment() nodes being inserted: replacing by [] would + # result in a "Losing ids" exception if there is a target node before + # the only node, so we make sure docutils can transfer the id to + # something, even if it's just a comment and will lose the id anyway... + process_only_nodes(self.document, self.app.builder.tags) + + +class SigElementFallbackTransform(SphinxPostTransform): + """Fallback various desc_* nodes to inline if translator does not support them.""" + default_priority = 200 + + def run(self, **kwargs: Any) -> None: + def has_visitor(translator: type[nodes.NodeVisitor], node: type[Element]) -> bool: + return hasattr(translator, "visit_%s" % node.__name__) + + try: + translator = self.app.builder.get_translator_class() + except AttributeError: + # do nothing if no translator class is specified (e.g., on a dummy builder) + return + + if issubclass(translator, SphinxTranslator): + # subclass of SphinxTranslator supports desc_sig_element nodes automatically. + return + + # for the leaf elements (desc_sig_element), the translator should support _all_, + # unless there exists a generic visit_desc_sig_element default visitor + if (not all(has_visitor(translator, node) for node in addnodes.SIG_ELEMENTS) + and not has_visitor(translator, addnodes.desc_sig_element)): + self.fallback(addnodes.desc_sig_element) + + if not has_visitor(translator, addnodes.desc_inline): + self.fallback(addnodes.desc_inline) + + def fallback(self, node_type: Any) -> None: + """Translate nodes of type *node_type* to docutils inline nodes. + + The original node type name is stored as a string in a private + ``_sig_node_type`` attribute if the latter did not exist. + """ + for node in self.document.findall(node_type): + newnode = nodes.inline() + newnode.update_all_atts(node) + newnode.extend(node) + # Only set _sig_node_type if not defined by the user + newnode.setdefault('_sig_node_type', node.tagname) + node.replace_self(newnode) + + +class PropagateDescDomain(SphinxPostTransform): + """Add the domain name of the parent node as a class in each desc_signature node.""" + default_priority = 200 + + def run(self, **kwargs: Any) -> None: + for node in self.document.findall(addnodes.desc_signature): + if node.parent.get('domain'): + node['classes'].append(node.parent['domain']) + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_post_transform(ReferencesResolver) + app.add_post_transform(OnlyNodeTransform) + app.add_post_transform(SigElementFallbackTransform) + app.add_post_transform(PropagateDescDomain) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/transforms/post_transforms/code.py b/sphinx/transforms/post_transforms/code.py new file mode 100644 index 0000000..cd8abcc --- /dev/null +++ b/sphinx/transforms/post_transforms/code.py @@ -0,0 +1,139 @@ +"""transforms for code-blocks.""" + +from __future__ import annotations + +import sys +from typing import TYPE_CHECKING, Any, NamedTuple + +from docutils import nodes +from pygments.lexers import PythonConsoleLexer, guess_lexer + +from sphinx import addnodes +from sphinx.ext import doctest +from sphinx.transforms import SphinxTransform + +if TYPE_CHECKING: + from docutils.nodes import Node, TextElement + + from sphinx.application import Sphinx + + +class HighlightSetting(NamedTuple): + language: str + force: bool + lineno_threshold: int + + +class HighlightLanguageTransform(SphinxTransform): + """ + Apply highlight_language to all literal_block nodes. + + This refers both :confval:`highlight_language` setting and + :rst:dir:`highlight` directive. After processing, this transform + removes ``highlightlang`` node from doctree. + """ + default_priority = 400 + + def apply(self, **kwargs: Any) -> None: + visitor = HighlightLanguageVisitor(self.document, + self.config.highlight_language) + self.document.walkabout(visitor) + + for node in list(self.document.findall(addnodes.highlightlang)): + node.parent.remove(node) + + +class HighlightLanguageVisitor(nodes.NodeVisitor): + def __init__(self, document: nodes.document, default_language: str) -> None: + self.default_setting = HighlightSetting(default_language, False, sys.maxsize) + self.settings: list[HighlightSetting] = [] + super().__init__(document) + + def unknown_visit(self, node: Node) -> None: + pass + + def unknown_departure(self, node: Node) -> None: + pass + + def visit_document(self, node: Node) -> None: + self.settings.append(self.default_setting) + + def depart_document(self, node: Node) -> None: + self.settings.pop() + + def visit_start_of_file(self, node: Node) -> None: + self.settings.append(self.default_setting) + + def depart_start_of_file(self, node: Node) -> None: + self.settings.pop() + + def visit_highlightlang(self, node: addnodes.highlightlang) -> None: + self.settings[-1] = HighlightSetting(node['lang'], + node['force'], + node['linenothreshold']) + + def visit_literal_block(self, node: nodes.literal_block) -> None: + setting = self.settings[-1] + if 'language' not in node: + node['language'] = setting.language + node['force'] = setting.force + if 'linenos' not in node: + lines = node.astext().count('\n') + node['linenos'] = (lines >= setting.lineno_threshold - 1) + + +class TrimDoctestFlagsTransform(SphinxTransform): + """ + Trim doctest flags like ``# doctest: +FLAG`` from python code-blocks. + + see :confval:`trim_doctest_flags` for more information. + """ + default_priority = HighlightLanguageTransform.default_priority + 1 + + def apply(self, **kwargs: Any) -> None: + for lbnode in self.document.findall(nodes.literal_block): + if self.is_pyconsole(lbnode): + self.strip_doctest_flags(lbnode) + + for dbnode in self.document.findall(nodes.doctest_block): + self.strip_doctest_flags(dbnode) + + def strip_doctest_flags(self, node: TextElement) -> None: + if not node.get('trim_flags', self.config.trim_doctest_flags): + return + + source = node.rawsource + source = doctest.blankline_re.sub('', source) + source = doctest.doctestopt_re.sub('', source) + node.rawsource = source + node[:] = [nodes.Text(source)] + + @staticmethod + def is_pyconsole(node: nodes.literal_block) -> bool: + if node.rawsource != node.astext(): + return False # skip parsed-literal node + + language = node.get('language') + if language in {'pycon', 'pycon3'}: + return True + elif language in {'py', 'python', 'py3', 'python3', 'default'}: + return node.rawsource.startswith('>>>') + elif language == 'guess': + try: + lexer = guess_lexer(node.rawsource) + return isinstance(lexer, PythonConsoleLexer) + except Exception: + pass + + return False + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_post_transform(HighlightLanguageTransform) + app.add_post_transform(TrimDoctestFlagsTransform) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py new file mode 100644 index 0000000..e220df0 --- /dev/null +++ b/sphinx/transforms/post_transforms/images.py @@ -0,0 +1,280 @@ +"""Docutils transforms used by Sphinx.""" + +from __future__ import annotations + +import os +import re +from hashlib import sha1 +from math import ceil +from typing import TYPE_CHECKING, Any + +from docutils import nodes + +from sphinx.locale import __ +from sphinx.transforms import SphinxTransform +from sphinx.util import logging, requests +from sphinx.util.http_date import epoch_to_rfc1123, rfc1123_to_epoch +from sphinx.util.images import get_image_extension, guess_mimetype, parse_data_uri +from sphinx.util.osutil import ensuredir + +if TYPE_CHECKING: + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + +MAX_FILENAME_LEN = 32 +CRITICAL_PATH_CHAR_RE = re.compile('[:;<>|*" ]') + + +class BaseImageConverter(SphinxTransform): + def apply(self, **kwargs: Any) -> None: + for node in self.document.findall(nodes.image): + if self.match(node): + self.handle(node) + + def match(self, node: nodes.image) -> bool: + return True + + def handle(self, node: nodes.image) -> None: + pass + + @property + def imagedir(self) -> str: + return os.path.join(self.app.doctreedir, 'images') + + +class ImageDownloader(BaseImageConverter): + default_priority = 100 + + def match(self, node: nodes.image) -> bool: + if self.app.builder.supported_image_types == []: + return False + if self.app.builder.supported_remote_images: + return False + return '://' in node['uri'] + + def handle(self, node: nodes.image) -> None: + try: + basename = os.path.basename(node['uri']) + if '?' in basename: + basename = basename.split('?')[0] + if basename == '' or len(basename) > MAX_FILENAME_LEN: + filename, ext = os.path.splitext(node['uri']) + basename = sha1(filename.encode(), usedforsecurity=False).hexdigest() + ext + basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename) + + dirname = node['uri'].replace('://', '/').translate({ord("?"): "/", + ord("&"): "/"}) + if len(dirname) > MAX_FILENAME_LEN: + dirname = sha1(dirname.encode(), usedforsecurity=False).hexdigest() + ensuredir(os.path.join(self.imagedir, dirname)) + path = os.path.join(self.imagedir, dirname, basename) + + headers = {} + if os.path.exists(path): + timestamp: float = ceil(os.stat(path).st_mtime) + headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp) + + r = requests.get(node['uri'], headers=headers) + if r.status_code >= 400: + logger.warning(__('Could not fetch remote image: %s [%d]') % + (node['uri'], r.status_code)) + else: + self.app.env.original_image_uri[path] = node['uri'] + + if r.status_code == 200: + with open(path, 'wb') as f: + f.write(r.content) + + last_modified = r.headers.get('last-modified') + if last_modified: + timestamp = rfc1123_to_epoch(last_modified) + os.utime(path, (timestamp, timestamp)) + + mimetype = guess_mimetype(path, default='*') + if mimetype != '*' and os.path.splitext(basename)[1] == '': + # append a suffix if URI does not contain suffix + ext = get_image_extension(mimetype) + newpath = os.path.join(self.imagedir, dirname, basename + ext) + os.replace(path, newpath) + self.app.env.original_image_uri.pop(path) + self.app.env.original_image_uri[newpath] = node['uri'] + path = newpath + node['candidates'].pop('?') + node['candidates'][mimetype] = path + node['uri'] = path + self.app.env.images.add_file(self.env.docname, path) + except Exception as exc: + logger.warning(__('Could not fetch remote image: %s [%s]') % (node['uri'], exc)) + + +class DataURIExtractor(BaseImageConverter): + default_priority = 150 + + def match(self, node: nodes.image) -> bool: + if self.app.builder.supported_remote_images == []: + return False + if self.app.builder.supported_data_uri_images is True: + return False + return node['uri'].startswith('data:') + + def handle(self, node: nodes.image) -> None: + image = parse_data_uri(node['uri']) + assert image is not None + ext = get_image_extension(image.mimetype) + if ext is None: + logger.warning(__('Unknown image format: %s...'), node['uri'][:32], + location=node) + return + + ensuredir(os.path.join(self.imagedir, 'embeded')) + digest = sha1(image.data, usedforsecurity=False).hexdigest() + path = os.path.join(self.imagedir, 'embeded', digest + ext) + self.app.env.original_image_uri[path] = node['uri'] + + with open(path, 'wb') as f: + f.write(image.data) + + node['candidates'].pop('?') + node['candidates'][image.mimetype] = path + node['uri'] = path + self.app.env.images.add_file(self.env.docname, path) + + +def get_filename_for(filename: str, mimetype: str) -> str: + basename = os.path.basename(filename) + basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename) + return os.path.splitext(basename)[0] + (get_image_extension(mimetype) or '') + + +class ImageConverter(BaseImageConverter): + """A base class for image converters. + + An image converter is kind of Docutils transform module. It is used to + convert image files which are not supported by a builder to the + appropriate format for that builder. + + For example, :py:class:`LaTeX builder <.LaTeXBuilder>` supports PDF, + PNG and JPEG as image formats. However it does not support SVG images. + For such case, using image converters allows to embed these + unsupported images into the document. One of the image converters; + :ref:`sphinx.ext.imgconverter <sphinx.ext.imgconverter>` can convert + a SVG image to PNG format using Imagemagick internally. + + There are three steps to make your custom image converter: + + 1. Make a subclass of ``ImageConverter`` class + 2. Override ``conversion_rules``, ``is_available()`` and ``convert()`` + 3. Register your image converter to Sphinx using + :py:meth:`.Sphinx.add_post_transform` + """ + default_priority = 200 + + #: The converter is available or not. Will be filled at the first call of + #: the build. The result is shared in the same process. + #: + #: .. todo:: This should be refactored not to store the state without class + #: variable. + available: bool | None = None + + #: A conversion rules the image converter supports. + #: It is represented as a list of pair of source image format (mimetype) and + #: destination one:: + #: + #: conversion_rules = [ + #: ('image/svg+xml', 'image/png'), + #: ('image/gif', 'image/png'), + #: ('application/pdf', 'image/png'), + #: ] + conversion_rules: list[tuple[str, str]] = [] + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + def match(self, node: nodes.image) -> bool: + if not self.app.builder.supported_image_types: + return False + if '?' in node['candidates']: + return False + if set(self.guess_mimetypes(node)) & set(self.app.builder.supported_image_types): + # builder supports the image; no need to convert + return False + if self.available is None: + # store the value to the class variable to share it during the build + self.__class__.available = self.is_available() + + if not self.available: + return False + else: + try: + self.get_conversion_rule(node) + except ValueError: + return False + else: + return True + + def get_conversion_rule(self, node: nodes.image) -> tuple[str, str]: + for candidate in self.guess_mimetypes(node): + for supported in self.app.builder.supported_image_types: + rule = (candidate, supported) + if rule in self.conversion_rules: + return rule + + msg = 'No conversion rule found' + raise ValueError(msg) + + def is_available(self) -> bool: + """Return the image converter is available or not.""" + raise NotImplementedError + + def guess_mimetypes(self, node: nodes.image) -> list[str]: + if '?' in node['candidates']: + return [] + elif '*' in node['candidates']: + guessed = guess_mimetype(node['uri']) + return [guessed] if guessed is not None else [] + else: + return node['candidates'].keys() + + def handle(self, node: nodes.image) -> None: + _from, _to = self.get_conversion_rule(node) + + if _from in node['candidates']: + srcpath = node['candidates'][_from] + else: + srcpath = node['candidates']['*'] + + filename = self.env.images[srcpath][1] + filename = get_filename_for(filename, _to) + ensuredir(self.imagedir) + destpath = os.path.join(self.imagedir, filename) + + abs_srcpath = os.path.join(self.app.srcdir, srcpath) + if self.convert(abs_srcpath, destpath): + if '*' in node['candidates']: + node['candidates']['*'] = destpath + else: + node['candidates'][_to] = destpath + node['uri'] = destpath + + self.env.original_image_uri[destpath] = srcpath + self.env.images.add_file(self.env.docname, destpath) + + def convert(self, _from: str, _to: str) -> bool: + """Convert an image file to the expected format. + + *_from* is a path of the source image file, and *_to* is a path + of the destination file. + """ + raise NotImplementedError + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_post_transform(ImageDownloader) + app.add_post_transform(DataURIExtractor) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } |