summaryrefslogtreecommitdiffstats
path: root/markdown_it/rules_core/replacements.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown_it/rules_core/replacements.py')
-rw-r--r--markdown_it/rules_core/replacements.py125
1 files changed, 125 insertions, 0 deletions
diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py
new file mode 100644
index 0000000..45377d3
--- /dev/null
+++ b/markdown_it/rules_core/replacements.py
@@ -0,0 +1,125 @@
+"""Simple typographic replacements
+
+* ``(c)``, ``(C)`` → ©
+* ``(tm)``, ``(TM)`` → ™
+* ``(r)``, ``(R)`` → ®
+* ``(p)``, ``(P)`` → §
+* ``+-`` → ±
+* ``...`` → …
+* ``?....`` → ?..
+* ``!....`` → !..
+* ``????????`` → ???
+* ``!!!!!`` → !!!
+* ``,,,`` → ,
+* ``--`` → &ndash
+* ``---`` → &mdash
+"""
+from __future__ import annotations
+
+import logging
+import re
+
+from ..token import Token
+from .state_core import StateCore
+
+LOGGER = logging.getLogger(__name__)
+
+# TODO:
+# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
+# - miltiplication 2 x 4 -> 2 × 4
+
+RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
+
+# Workaround for phantomjs - need regex without /g flag,
+# or root check will fail every second time
+# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)"
+
+SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE)
+
+PLUS_MINUS_RE = re.compile(r"\+-")
+
+ELLIPSIS_RE = re.compile(r"\.{2,}")
+
+ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")
+
+QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")
+
+COMMA_RE = re.compile(r",{2,}")
+
+EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)
+
+EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)
+
+EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)
+
+
+SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"}
+
+
+def replaceFn(match: re.Match[str]):
+ return SCOPED_ABBR[match.group(1).lower()]
+
+
+def replace_scoped(inlineTokens: list[Token]) -> None:
+ inside_autolink = 0
+
+ for token in inlineTokens:
+ if token.type == "text" and not inside_autolink:
+ token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)
+
+ if token.type == "link_open" and token.info == "auto":
+ inside_autolink -= 1
+
+ if token.type == "link_close" and token.info == "auto":
+ inside_autolink += 1
+
+
+def replace_rare(inlineTokens: list[Token]) -> None:
+ inside_autolink = 0
+
+ for token in inlineTokens:
+ if token.type == "text" and not inside_autolink:
+ if RARE_RE.search(token.content):
+ # +- -> ±
+ token.content = PLUS_MINUS_RE.sub("±", token.content)
+
+ # .., ..., ....... -> …
+ token.content = ELLIPSIS_RE.sub("…", token.content)
+
+ # but ?..... & !..... -> ?.. & !..
+ token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub(
+ "\\1..", token.content
+ )
+ token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)
+
+ # ,, ,,, ,,,, -> ,
+ token.content = COMMA_RE.sub(",", token.content)
+
+ # em-dash
+ token.content = EM_DASH_RE.sub("\\1\u2014", token.content)
+
+ # en-dash
+ token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
+ token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)
+
+ if token.type == "link_open" and token.info == "auto":
+ inside_autolink -= 1
+
+ if token.type == "link_close" and token.info == "auto":
+ inside_autolink += 1
+
+
+def replace(state: StateCore) -> None:
+ if not state.md.options.typographer:
+ return
+
+ for token in state.tokens:
+ if token.type != "inline":
+ continue
+ assert token.children is not None
+
+ if SCOPED_ABBR_RE.search(token.content):
+ replace_scoped(token.children)
+
+ if RARE_RE.search(token.content):
+ replace_rare(token.children)