summaryrefslogtreecommitdiffstats
path: root/sphinx/util/smartypants.py
diff options
context:
space:
mode:
Diffstat (limited to 'sphinx/util/smartypants.py')
-rw-r--r--sphinx/util/smartypants.py374
1 files changed, 374 insertions, 0 deletions
diff --git a/sphinx/util/smartypants.py b/sphinx/util/smartypants.py
new file mode 100644
index 0000000..ab300e0
--- /dev/null
+++ b/sphinx/util/smartypants.py
@@ -0,0 +1,374 @@
+"""Deprecated backport of docutils.utils.smartquotes.
+
+This is extracted (with minor adaptations for flake8 compliance) from
+docutils’ docutils/utils/smartquotes.py as of revision 8097 (30 May 2017),
+in order to backport for Sphinx usage with Docutils < 0.14 extra language
+configurations and fixes. Replaces earlier smartypants version as used up
+to Sphinx 1.5.6.
+
+:copyright: © 2010 Günter Milde,
+ original `SmartyPants`_: © 2003 John Gruber
+ smartypants.py: © 2004, 2007 Chad Miller
+:license: Released under the terms of the `2-Clause BSD license`_, in short:
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notices and this notice are preserved.
+ This file is offered as-is, without any warranty.
+
+.. _SmartyPants: https://daringfireball.net/projects/smartypants/
+.. _2-Clause BSD license: https://spdx.org/licenses/BSD-2-Clause
+
+See the LICENSE file and the original docutils code for details.
+
+"""
+
+import re
+import warnings
+from typing import Generator, Iterable, Tuple
+
+from docutils.utils import smartquotes
+
+from sphinx.deprecation import RemovedInSphinx60Warning
+
+warnings.warn('sphinx.util.smartypants is deprecated.',
+ RemovedInSphinx60Warning)
+
+langquotes = {'af': '“”‘’',
+ 'af-x-altquot': '„”‚’',
+ 'bg': '„“‚‘', # Bulgarian, https://bg.wikipedia.org/wiki/Кавички
+ 'ca': '«»“”',
+ 'ca-x-altquot': '“”‘’',
+ 'cs': '„“‚‘',
+ 'cs-x-altquot': '»«›‹',
+ 'da': '»«›‹',
+ 'da-x-altquot': '„“‚‘',
+ # 'da-x-altquot2': '””’’',
+ 'de': '„“‚‘',
+ 'de-x-altquot': '»«›‹',
+ 'de-ch': '«»‹›',
+ 'el': '«»“”',
+ 'en': '“”‘’',
+ 'en-uk-x-altquot': '‘’“”', # Attention: " → ‘ and ' → “ !
+ 'eo': '“”‘’',
+ 'es': '«»“”',
+ 'es-x-altquot': '“”‘’',
+ 'et': '„“‚‘', # no secondary quote listed in
+ 'et-x-altquot': '«»‹›', # the sources above (wikipedia.org)
+ 'eu': '«»‹›',
+ 'fi': '””’’',
+ 'fi-x-altquot': '»»››',
+ 'fr': ('« ', ' »', '“', '”'), # full no-break space
+ 'fr-x-altquot': ('« ', ' »', '“', '”'), # narrow no-break space
+ 'fr-ch': '«»‹›',
+ 'fr-ch-x-altquot': ('« ', ' »', '‹ ', ' ›'), # narrow no-break space
+ # http://typoguide.ch/
+ 'gl': '«»“”',
+ 'he': '”“»«', # Hebrew is RTL, test position:
+ 'he-x-altquot': '„”‚’', # low quotation marks are opening.
+ # 'he-x-altquot': '“„‘‚', # RTL: low quotation marks opening
+ 'hr': '„”‘’', # https://hrvatska-tipografija.com/polunavodnici/
+ 'hr-x-altquot': '»«›‹',
+ 'hsb': '„“‚‘',
+ 'hsb-x-altquot': '»«›‹',
+ 'hu': '„”«»',
+ 'is': '„“‚‘',
+ 'it': '«»“”',
+ 'it-ch': '«»‹›',
+ 'it-x-altquot': '“”‘’',
+ # 'it-x-altquot2': '“„‘‚', # [7] in headlines
+ 'ja': '「」『』',
+ 'lt': '„“‚‘',
+ 'lv': '„“‚‘',
+ 'mk': '„“‚‘', # Macedonian,
+ # https://mk.wikipedia.org/wiki/Правопис_и_правоговор_на_македонскиот_јазик
+ 'nl': '“”‘’',
+ 'nl-x-altquot': '„”‚’',
+ # 'nl-x-altquot2': '””’’',
+ 'nb': '«»’’', # Norsk bokmål (canonical form 'no')
+ 'nn': '«»’’', # Nynorsk [10]
+ 'nn-x-altquot': '«»‘’', # [8], [10]
+ # 'nn-x-altquot2': '«»«»', # [9], [10]
+ # 'nn-x-altquot3': '„“‚‘', # [10]
+ 'no': '«»’’', # Norsk bokmål [10]
+ 'no-x-altquot': '«»‘’', # [8], [10]
+ # 'no-x-altquot2': '«»«»', # [9], [10]
+ # 'no-x-altquot3': '„“‚‘', # [10]
+ 'pl': '„”«»',
+ 'pl-x-altquot': '«»‚’',
+ # 'pl-x-altquot2': '„”‚’',
+ # https://pl.wikipedia.org/wiki/Cudzys%C5%82%C3%B3w
+ 'pt': '«»“”',
+ 'pt-br': '“”‘’',
+ 'ro': '„”«»',
+ 'ru': '«»„“',
+ 'sh': '„”‚’', # Serbo-Croatian
+ 'sh-x-altquot': '»«›‹',
+ 'sk': '„“‚‘', # Slovak
+ 'sk-x-altquot': '»«›‹',
+ 'sl': '„“‚‘', # Slovenian
+ 'sl-x-altquot': '»«›‹',
+ 'sq': '«»‹›', # Albanian
+ 'sq-x-altquot': '“„‘‚',
+ 'sr': '„”’’',
+ 'sr-x-altquot': '»«›‹',
+ 'sv': '””’’',
+ 'sv-x-altquot': '»»››',
+ 'tr': '“”‘’',
+ 'tr-x-altquot': '«»‹›',
+ # 'tr-x-altquot2': '“„‘‚', # [7] antiquated?
+ 'uk': '«»„“',
+ 'uk-x-altquot': '„“‚‘',
+ 'zh-cn': '“”‘’',
+ 'zh-tw': '「」『』',
+ }
+
+
+def educateQuotes(text: str, language: str = 'en') -> str:
+ """
+ Parameter: - text string (unicode or bytes).
+ - language (`BCP 47` language tag.)
+ Returns: The `text`, with "educated" curly quote characters.
+
+ Example input: "Isn't this fun?"
+ Example output: “Isn’t this fun?“;
+ """
+
+ smart = smartquotes.smartchars(language)
+ try:
+ apostrophe = smart.apostrophe
+ except Exception:
+ apostrophe = '’'
+
+ # oldtext = text
+ punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
+
+ # Special case if the very first character is a quote
+ # followed by punctuation at a non-word-break.
+ # Close the quotes by brute force:
+ text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), smart.csquote, text)
+ text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), smart.cpquote, text)
+
+ # Special case for double sets of quotes, e.g.:
+ # <p>He said, "'Quoted' words in a larger quote."</p>
+ text = re.sub(r""""'(?=\w)""", smart.opquote + smart.osquote, text)
+ text = re.sub(r"""'"(?=\w)""", smart.osquote + smart.opquote, text)
+
+ # Special case for decade abbreviations (the '80s):
+ if language.startswith('en'): # TODO similar cases in other languages?
+ text = re.sub(r"""'(?=\d{2}s)""", apostrophe, text, flags=re.UNICODE)
+
+ close_class = r"""[^\ \t\r\n\[\{\(\-]"""
+ dec_dashes = r"""&#8211;|&#8212;"""
+
+ # Get most opening single quotes:
+ opening_single_quotes_regex = re.compile(r"""
+ (
+ \s | # a whitespace char, or
+ &nbsp; | # a non-breaking space entity, or
+ -- | # dashes, or
+ &[mn]dash; | # named dash entities
+ %s | # or decimal entities
+ &\#x201[34]; # or hex
+ )
+ ' # the quote
+ (?=\w) # followed by a word character
+ """ % (dec_dashes,), re.VERBOSE | re.UNICODE)
+ text = opening_single_quotes_regex.sub(r'\1' + smart.osquote, text)
+
+ # In many locales, single closing quotes are different from apostrophe:
+ if smart.csquote != apostrophe:
+ apostrophe_regex = re.compile(r"(?<=(\w|\d))'(?=\w)", re.UNICODE)
+ text = apostrophe_regex.sub(apostrophe, text)
+ # TODO: keep track of quoting level to recognize apostrophe in, e.g.,
+ # "Ich fass' es nicht."
+
+ closing_single_quotes_regex = re.compile(r"""
+ (%s)
+ '
+ (?!\s | # whitespace
+ s\b |
+ \d # digits ('80s)
+ )
+ """ % (close_class,), re.VERBOSE | re.UNICODE)
+ text = closing_single_quotes_regex.sub(r'\1' + smart.csquote, text)
+
+ closing_single_quotes_regex = re.compile(r"""
+ (%s)
+ '
+ (\s | s\b)
+ """ % (close_class,), re.VERBOSE | re.UNICODE)
+ text = closing_single_quotes_regex.sub(r'\1%s\2' % smart.csquote, text)
+
+ # Any remaining single quotes should be opening ones:
+ text = re.sub(r"""'""", smart.osquote, text)
+
+ # Get most opening double quotes:
+ opening_double_quotes_regex = re.compile(r"""
+ (
+ \s | # a whitespace char, or
+ &nbsp; | # a non-breaking space entity, or
+ -- | # dashes, or
+ &[mn]dash; | # named dash entities
+ %s | # or decimal entities
+ &\#x201[34]; # or hex
+ )
+ " # the quote
+ (?=\w) # followed by a word character
+ """ % (dec_dashes,), re.VERBOSE)
+ text = opening_double_quotes_regex.sub(r'\1' + smart.opquote, text)
+
+ # Double closing quotes:
+ closing_double_quotes_regex = re.compile(r"""
+ #(%s)? # character that indicates the quote should be closing
+ "
+ (?=\s)
+ """ % (close_class,), re.VERBOSE)
+ text = closing_double_quotes_regex.sub(smart.cpquote, text)
+
+ closing_double_quotes_regex = re.compile(r"""
+ (%s) # character that indicates the quote should be closing
+ "
+ """ % (close_class,), re.VERBOSE)
+ text = closing_double_quotes_regex.sub(r'\1' + smart.cpquote, text)
+
+ # Any remaining quotes should be opening ones.
+ text = re.sub(r'"', smart.opquote, text)
+
+ return text
+
+
+def educate_tokens(text_tokens: Iterable[Tuple[str, str]],
+ attr: str = smartquotes.default_smartypants_attr,
+ language: str = 'en'
+ ) -> Generator[str, None, None]:
+ """Return iterator that "educates" the items of `text_tokens`.
+
+ This is modified to intercept the ``attr='2'`` as it was used by the
+ Docutils 0.13.1 SmartQuotes transform in a hard coded way. Docutils 0.14
+ uses ``'qDe'``` and is configurable, and its choice is backported here
+ for use by Sphinx with earlier Docutils releases. Similarly ``'1'`` is
+ replaced by ``'qde'``.
+
+ Use ``attr='qDbe'``, resp. ``'qdbe'`` to recover Docutils effect of ``'2'``,
+ resp. ``'1'``.
+
+ refs: https://sourceforge.net/p/docutils/mailman/message/35869025/
+ """
+
+ # Parse attributes:
+ # 0 : do nothing
+ # 1 : set all (but backticks)
+ # 2 : set all (but backticks), using old school en- and em- dash shortcuts
+ # 3 : set all, using inverted old school en and em- dash shortcuts
+ #
+ # q : quotes
+ # b : backtick quotes (``double'' only)
+ # B : backtick quotes (``double'' and `single')
+ # d : dashes
+ # D : old school dashes
+ # i : inverted old school dashes
+ # e : ellipses
+ # w : convert &quot; entities to " for Dreamweaver users
+
+ convert_quot = False # translate &quot; entities into normal quotes?
+ do_dashes = 0
+ do_backticks = 0
+ do_quotes = False
+ do_ellipses = False
+ do_stupefy = False
+
+ if attr == "1": # Do everything, turn all options on.
+ do_quotes = True
+ # do_backticks = 1
+ do_dashes = 1
+ do_ellipses = True
+ elif attr == "2":
+ # Do everything, turn all options on, use old school dash shorthand.
+ do_quotes = True
+ # do_backticks = 1
+ do_dashes = 2
+ do_ellipses = True
+ elif attr == "3":
+ # Do everything, use inverted old school dash shorthand.
+ do_quotes = True
+ do_backticks = 1
+ do_dashes = 3
+ do_ellipses = True
+ elif attr == "-1": # Special "stupefy" mode.
+ do_stupefy = True
+ else:
+ if "q" in attr:
+ do_quotes = True
+ if "b" in attr:
+ do_backticks = 1
+ if "B" in attr:
+ do_backticks = 2
+ if "d" in attr:
+ do_dashes = 1
+ if "D" in attr:
+ do_dashes = 2
+ if "i" in attr:
+ do_dashes = 3
+ if "e" in attr:
+ do_ellipses = True
+ if "w" in attr:
+ convert_quot = True
+
+ prev_token_last_char = " "
+ # Last character of the previous text token. Used as
+ # context to curl leading quote characters correctly.
+
+ for (ttype, text) in text_tokens:
+
+ # skip HTML and/or XML tags as well as empty text tokens
+ # without updating the last character
+ if ttype == 'tag' or not text:
+ yield text
+ continue
+
+ # skip literal text (math, literal, raw, ...)
+ if ttype == 'literal':
+ prev_token_last_char = text[-1:]
+ yield text
+ continue
+
+ last_char = text[-1:] # Remember last char before processing.
+
+ text = smartquotes.processEscapes(text)
+
+ if convert_quot:
+ text = re.sub('&quot;', '"', text)
+
+ if do_dashes == 1:
+ text = smartquotes.educateDashes(text)
+ elif do_dashes == 2:
+ text = smartquotes.educateDashesOldSchool(text)
+ elif do_dashes == 3:
+ text = smartquotes.educateDashesOldSchoolInverted(text)
+
+ if do_ellipses:
+ text = smartquotes.educateEllipses(text)
+
+ # Note: backticks need to be processed before quotes.
+ if do_backticks:
+ text = smartquotes.educateBackticks(text, language)
+
+ if do_backticks == 2:
+ text = smartquotes.educateSingleBackticks(text, language)
+
+ if do_quotes:
+ # Replace plain quotes to prevent conversion to
+ # 2-character sequence in French.
+ context = prev_token_last_char.replace('"', ';').replace("'", ';')
+ text = educateQuotes(context + text, language)[1:]
+
+ if do_stupefy:
+ text = smartquotes.stupefyEntities(text, language)
+
+ # Remember last char as context for the next token
+ prev_token_last_char = last_char
+
+ text = smartquotes.processEscapes(text, restore=True)
+
+ yield text