diff options
Diffstat (limited to 'sphinx/transforms/post_transforms/images.py')
-rw-r--r-- | sphinx/transforms/post_transforms/images.py | 280 |
1 files changed, 280 insertions, 0 deletions
diff --git a/sphinx/transforms/post_transforms/images.py b/sphinx/transforms/post_transforms/images.py new file mode 100644 index 0000000..e220df0 --- /dev/null +++ b/sphinx/transforms/post_transforms/images.py @@ -0,0 +1,280 @@ +"""Docutils transforms used by Sphinx.""" + +from __future__ import annotations + +import os +import re +from hashlib import sha1 +from math import ceil +from typing import TYPE_CHECKING, Any + +from docutils import nodes + +from sphinx.locale import __ +from sphinx.transforms import SphinxTransform +from sphinx.util import logging, requests +from sphinx.util.http_date import epoch_to_rfc1123, rfc1123_to_epoch +from sphinx.util.images import get_image_extension, guess_mimetype, parse_data_uri +from sphinx.util.osutil import ensuredir + +if TYPE_CHECKING: + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + +MAX_FILENAME_LEN = 32 +CRITICAL_PATH_CHAR_RE = re.compile('[:;<>|*" ]') + + +class BaseImageConverter(SphinxTransform): + def apply(self, **kwargs: Any) -> None: + for node in self.document.findall(nodes.image): + if self.match(node): + self.handle(node) + + def match(self, node: nodes.image) -> bool: + return True + + def handle(self, node: nodes.image) -> None: + pass + + @property + def imagedir(self) -> str: + return os.path.join(self.app.doctreedir, 'images') + + +class ImageDownloader(BaseImageConverter): + default_priority = 100 + + def match(self, node: nodes.image) -> bool: + if self.app.builder.supported_image_types == []: + return False + if self.app.builder.supported_remote_images: + return False + return '://' in node['uri'] + + def handle(self, node: nodes.image) -> None: + try: + basename = os.path.basename(node['uri']) + if '?' in basename: + basename = basename.split('?')[0] + if basename == '' or len(basename) > MAX_FILENAME_LEN: + filename, ext = os.path.splitext(node['uri']) + basename = sha1(filename.encode(), usedforsecurity=False).hexdigest() + ext + basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename) + + dirname = node['uri'].replace('://', '/').translate({ord("?"): "/", + ord("&"): "/"}) + if len(dirname) > MAX_FILENAME_LEN: + dirname = sha1(dirname.encode(), usedforsecurity=False).hexdigest() + ensuredir(os.path.join(self.imagedir, dirname)) + path = os.path.join(self.imagedir, dirname, basename) + + headers = {} + if os.path.exists(path): + timestamp: float = ceil(os.stat(path).st_mtime) + headers['If-Modified-Since'] = epoch_to_rfc1123(timestamp) + + r = requests.get(node['uri'], headers=headers) + if r.status_code >= 400: + logger.warning(__('Could not fetch remote image: %s [%d]') % + (node['uri'], r.status_code)) + else: + self.app.env.original_image_uri[path] = node['uri'] + + if r.status_code == 200: + with open(path, 'wb') as f: + f.write(r.content) + + last_modified = r.headers.get('last-modified') + if last_modified: + timestamp = rfc1123_to_epoch(last_modified) + os.utime(path, (timestamp, timestamp)) + + mimetype = guess_mimetype(path, default='*') + if mimetype != '*' and os.path.splitext(basename)[1] == '': + # append a suffix if URI does not contain suffix + ext = get_image_extension(mimetype) + newpath = os.path.join(self.imagedir, dirname, basename + ext) + os.replace(path, newpath) + self.app.env.original_image_uri.pop(path) + self.app.env.original_image_uri[newpath] = node['uri'] + path = newpath + node['candidates'].pop('?') + node['candidates'][mimetype] = path + node['uri'] = path + self.app.env.images.add_file(self.env.docname, path) + except Exception as exc: + logger.warning(__('Could not fetch remote image: %s [%s]') % (node['uri'], exc)) + + +class DataURIExtractor(BaseImageConverter): + default_priority = 150 + + def match(self, node: nodes.image) -> bool: + if self.app.builder.supported_remote_images == []: + return False + if self.app.builder.supported_data_uri_images is True: + return False + return node['uri'].startswith('data:') + + def handle(self, node: nodes.image) -> None: + image = parse_data_uri(node['uri']) + assert image is not None + ext = get_image_extension(image.mimetype) + if ext is None: + logger.warning(__('Unknown image format: %s...'), node['uri'][:32], + location=node) + return + + ensuredir(os.path.join(self.imagedir, 'embeded')) + digest = sha1(image.data, usedforsecurity=False).hexdigest() + path = os.path.join(self.imagedir, 'embeded', digest + ext) + self.app.env.original_image_uri[path] = node['uri'] + + with open(path, 'wb') as f: + f.write(image.data) + + node['candidates'].pop('?') + node['candidates'][image.mimetype] = path + node['uri'] = path + self.app.env.images.add_file(self.env.docname, path) + + +def get_filename_for(filename: str, mimetype: str) -> str: + basename = os.path.basename(filename) + basename = re.sub(CRITICAL_PATH_CHAR_RE, "_", basename) + return os.path.splitext(basename)[0] + (get_image_extension(mimetype) or '') + + +class ImageConverter(BaseImageConverter): + """A base class for image converters. + + An image converter is kind of Docutils transform module. It is used to + convert image files which are not supported by a builder to the + appropriate format for that builder. + + For example, :py:class:`LaTeX builder <.LaTeXBuilder>` supports PDF, + PNG and JPEG as image formats. However it does not support SVG images. + For such case, using image converters allows to embed these + unsupported images into the document. One of the image converters; + :ref:`sphinx.ext.imgconverter <sphinx.ext.imgconverter>` can convert + a SVG image to PNG format using Imagemagick internally. + + There are three steps to make your custom image converter: + + 1. Make a subclass of ``ImageConverter`` class + 2. Override ``conversion_rules``, ``is_available()`` and ``convert()`` + 3. Register your image converter to Sphinx using + :py:meth:`.Sphinx.add_post_transform` + """ + default_priority = 200 + + #: The converter is available or not. Will be filled at the first call of + #: the build. The result is shared in the same process. + #: + #: .. todo:: This should be refactored not to store the state without class + #: variable. + available: bool | None = None + + #: A conversion rules the image converter supports. + #: It is represented as a list of pair of source image format (mimetype) and + #: destination one:: + #: + #: conversion_rules = [ + #: ('image/svg+xml', 'image/png'), + #: ('image/gif', 'image/png'), + #: ('application/pdf', 'image/png'), + #: ] + conversion_rules: list[tuple[str, str]] = [] + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + def match(self, node: nodes.image) -> bool: + if not self.app.builder.supported_image_types: + return False + if '?' in node['candidates']: + return False + if set(self.guess_mimetypes(node)) & set(self.app.builder.supported_image_types): + # builder supports the image; no need to convert + return False + if self.available is None: + # store the value to the class variable to share it during the build + self.__class__.available = self.is_available() + + if not self.available: + return False + else: + try: + self.get_conversion_rule(node) + except ValueError: + return False + else: + return True + + def get_conversion_rule(self, node: nodes.image) -> tuple[str, str]: + for candidate in self.guess_mimetypes(node): + for supported in self.app.builder.supported_image_types: + rule = (candidate, supported) + if rule in self.conversion_rules: + return rule + + msg = 'No conversion rule found' + raise ValueError(msg) + + def is_available(self) -> bool: + """Return the image converter is available or not.""" + raise NotImplementedError + + def guess_mimetypes(self, node: nodes.image) -> list[str]: + if '?' in node['candidates']: + return [] + elif '*' in node['candidates']: + guessed = guess_mimetype(node['uri']) + return [guessed] if guessed is not None else [] + else: + return node['candidates'].keys() + + def handle(self, node: nodes.image) -> None: + _from, _to = self.get_conversion_rule(node) + + if _from in node['candidates']: + srcpath = node['candidates'][_from] + else: + srcpath = node['candidates']['*'] + + filename = self.env.images[srcpath][1] + filename = get_filename_for(filename, _to) + ensuredir(self.imagedir) + destpath = os.path.join(self.imagedir, filename) + + abs_srcpath = os.path.join(self.app.srcdir, srcpath) + if self.convert(abs_srcpath, destpath): + if '*' in node['candidates']: + node['candidates']['*'] = destpath + else: + node['candidates'][_to] = destpath + node['uri'] = destpath + + self.env.original_image_uri[destpath] = srcpath + self.env.images.add_file(self.env.docname, destpath) + + def convert(self, _from: str, _to: str) -> bool: + """Convert an image file to the expected format. + + *_from* is a path of the source image file, and *_to* is a path + of the destination file. + """ + raise NotImplementedError + + +def setup(app: Sphinx) -> dict[str, Any]: + app.add_post_transform(ImageDownloader) + app.add_post_transform(DataURIExtractor) + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } |