From cf7da1843c45a4c2df7a749f7886a2d2ba0ee92a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 19:25:40 +0200 Subject: Adding upstream version 7.2.6. Signed-off-by: Daniel Baumann --- sphinx/util/texescape.py | 153 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 sphinx/util/texescape.py (limited to 'sphinx/util/texescape.py') diff --git a/sphinx/util/texescape.py b/sphinx/util/texescape.py new file mode 100644 index 0000000..8527441 --- /dev/null +++ b/sphinx/util/texescape.py @@ -0,0 +1,153 @@ +"""TeX escaping helper.""" + +from __future__ import annotations + +import re + +tex_replacements = [ + # map TeX special chars + ('$', r'\$'), + ('%', r'\%'), + ('&', r'\&'), + ('#', r'\#'), + ('_', r'\_'), + ('{', r'\{'), + ('}', r'\}'), + ('\\', r'\textbackslash{}'), + ('~', r'\textasciitilde{}'), + ('^', r'\textasciicircum{}'), + # map chars to avoid mis-interpretation in LaTeX + ('[', r'{[}'), + (']', r'{]}'), + # map special Unicode characters to TeX commands + ('✓', r'\(\checkmark\)'), + ('✔', r'\(\pmb{\checkmark}\)'), + ('✕', r'\(\times\)'), + ('✖', r'\(\pmb{\times}\)'), + # used to separate -- in options + ('', r'{}'), + # map some special Unicode characters to similar ASCII ones + # (even for Unicode LaTeX as may not be supported by OpenType font) + ('⎽', r'\_'), + ('ℯ', r'e'), + ('ⅈ', r'i'), + # Greek alphabet not escaped: pdflatex handles it via textalpha and inputenc + # OHM SIGN U+2126 is handled by LaTeX textcomp package +] + +# A map to avoid TeX ligatures or character replacements in PDF output +# xelatex/lualatex/uplatex are handled differently (#5790, #6888) +ascii_tex_replacements = [ + # Note: the " renders curly in OT1 encoding but straight in T1, T2A, LY1... + # escaping it to \textquotedbl would break documents using OT1 + # Sphinx does \shorthandoff{"} to avoid problems with some languages + # There is no \text... LaTeX escape for the hyphen character - + ('-', r'\sphinxhyphen{}'), # -- and --- are TeX ligatures + # ,, is a TeX ligature in T1 encoding, but escaping the comma adds + # complications (whether by {}, or a macro) and is not done + # the next two require textcomp package + ("'", r'\textquotesingle{}'), # else ' renders curly, and '' is a ligature + ('`', r'\textasciigrave{}'), # else \` and \`\` render curly + ('<', r'\textless{}'), # < is inv. exclam in OT1, << is a T1-ligature + ('>', r'\textgreater{}'), # > is inv. quest. mark in 0T1, >> a T1-ligature +] + +# A map Unicode characters to LaTeX representation +# (for LaTeX engines which don't support unicode) +unicode_tex_replacements = [ + # map some more common Unicode characters to TeX commands + ('¶', r'\P{}'), + ('§', r'\S{}'), + ('€', r'\texteuro{}'), + ('∞', r'\(\infty\)'), + ('±', r'\(\pm\)'), + ('→', r'\(\rightarrow\)'), + ('‣', r'\(\rightarrow\)'), + ('–', r'\textendash{}'), + # superscript + ('⁰', r'\(\sp{\text{0}}\)'), + ('¹', r'\(\sp{\text{1}}\)'), + ('²', r'\(\sp{\text{2}}\)'), + ('³', r'\(\sp{\text{3}}\)'), + ('⁴', r'\(\sp{\text{4}}\)'), + ('⁵', r'\(\sp{\text{5}}\)'), + ('⁶', r'\(\sp{\text{6}}\)'), + ('⁷', r'\(\sp{\text{7}}\)'), + ('⁸', r'\(\sp{\text{8}}\)'), + ('⁹', r'\(\sp{\text{9}}\)'), + # subscript + ('₀', r'\(\sb{\text{0}}\)'), + ('₁', r'\(\sb{\text{1}}\)'), + ('₂', r'\(\sb{\text{2}}\)'), + ('₃', r'\(\sb{\text{3}}\)'), + ('₄', r'\(\sb{\text{4}}\)'), + ('₅', r'\(\sb{\text{5}}\)'), + ('₆', r'\(\sb{\text{6}}\)'), + ('₇', r'\(\sb{\text{7}}\)'), + ('₈', r'\(\sb{\text{8}}\)'), + ('₉', r'\(\sb{\text{9}}\)'), +] + +# TODO: this should be called tex_idescape_map because its only use is in +# sphinx.writers.latex.LaTeXTranslator.idescape() +# %, {, }, \, #, and ~ are the only ones which must be replaced by _ character +# It would be simpler to define it entirely here rather than in init(). +# Unicode replacements are superfluous, as idescape() uses backslashreplace +tex_replace_map: dict[int, str] = {} + +_tex_escape_map: dict[int, str] = {} +_tex_escape_map_without_unicode: dict[int, str] = {} +_tex_hlescape_map: dict[int, str] = {} +_tex_hlescape_map_without_unicode: dict[int, str] = {} + + +def escape(s: str, latex_engine: str | None = None) -> str: + """Escape text for LaTeX output.""" + if latex_engine in ('lualatex', 'xelatex'): + # unicode based LaTeX engine + return s.translate(_tex_escape_map_without_unicode) + else: + return s.translate(_tex_escape_map) + + +def hlescape(s: str, latex_engine: str | None = None) -> str: + """Escape text for LaTeX highlighter.""" + if latex_engine in ('lualatex', 'xelatex'): + # unicode based LaTeX engine + return s.translate(_tex_hlescape_map_without_unicode) + else: + return s.translate(_tex_hlescape_map) + + +def escape_abbr(text: str) -> str: + """Adjust spacing after abbreviations. Works with @ letter or other.""" + return re.sub(r'\.(?=\s|$)', r'.\@{}', text) + + +def init() -> None: + for a, b in tex_replacements: + _tex_escape_map[ord(a)] = b + _tex_escape_map_without_unicode[ord(a)] = b + tex_replace_map[ord(a)] = '_' + + # no reason to do this for _tex_escape_map_without_unicode + for a, b in ascii_tex_replacements: + _tex_escape_map[ord(a)] = b + + # but the hyphen has a specific PDF bookmark problem + # https://github.com/latex3/hyperref/issues/112 + _tex_escape_map_without_unicode[ord('-')] = r'\sphinxhyphen{}' + + for a, b in unicode_tex_replacements: + _tex_escape_map[ord(a)] = b + # This is actually unneeded: + tex_replace_map[ord(a)] = '_' + + for a, b in tex_replacements: + if a in '[]{}\\': + continue + _tex_hlescape_map[ord(a)] = b + _tex_hlescape_map_without_unicode[ord(a)] = b + + for a, b in unicode_tex_replacements: + _tex_hlescape_map[ord(a)] = b -- cgit v1.2.3