1 files changed, 125 insertions, 0 deletions
diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py
new file mode 100644
index 0000000..45377d3
--- /dev/null
+++ b/markdown_it/rules_core/replacements.py
@@ -0,0 +1,125 @@
+"""Simple typographic replacements
+
+* ``(c)``, ``(C)`` → ©
+* ``(tm)``, ``(TM)`` → ™
+* ``(r)``, ``(R)`` → ®
+* ``(p)``, ``(P)`` → §
+* ``+-`` → ±
+* ``...`` → …
+* ``?....`` → ?..
+* ``!....`` → !..
+* ``????????`` → ???
+* ``!!!!!`` → !!!
+* ``,,,`` → ,
+* ``--`` → &ndash
+* ``---`` → &mdash
+"""
+from __future__ import annotations
+
+import logging
+import re
+
+from ..token import Token
+from .state_core import StateCore
+
+LOGGER = logging.getLogger(__name__)
+
+# TODO:
+# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
+# - miltiplication 2 x 4 -> 2 × 4
+
+RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
+
+# Workaround for phantomjs - need regex without /g flag,
+# or root check will fail every second time
+# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)"
+
+SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE)
+
+PLUS_MINUS_RE = re.compile(r"\+-")
+
+ELLIPSIS_RE = re.compile(r"\.{2,}")
+
+ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")
+
+QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")
+
+COMMA_RE = re.compile(r",{2,}")
+
+EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)
+
+EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)
+
+EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)
+
+
+SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"}
+
+
+def replaceFn(match: re.Match[str]):
+    return SCOPED_ABBR[match.group(1).lower()]
+
+
+def replace_scoped(inlineTokens: list[Token]) -> None:
+    inside_autolink = 0
+
+    for token in inlineTokens:
+        if token.type == "text" and not inside_autolink:
+            token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)
+
+        if token.type == "link_open" and token.info == "auto":
+            inside_autolink -= 1
+
+        if token.type == "link_close" and token.info == "auto":
+            inside_autolink += 1
+
+
+def replace_rare(inlineTokens: list[Token]) -> None:
+    inside_autolink = 0
+
+    for token in inlineTokens:
+        if token.type == "text" and not inside_autolink:
+            if RARE_RE.search(token.content):
+                # +- -> ±
+                token.content = PLUS_MINUS_RE.sub("±", token.content)
+
+                # .., ..., ....... -> …
+                token.content = ELLIPSIS_RE.sub("…", token.content)
+
+                # but ?..... & !..... -> ?.. & !..
+                token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub(
+                    "\\1..", token.content
+                )
+                token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)
+
+                # ,,  ,,,  ,,,, -> ,
+                token.content = COMMA_RE.sub(",", token.content)
+
+                # em-dash
+                token.content = EM_DASH_RE.sub("\\1\u2014", token.content)
+
+                # en-dash
+                token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
+                token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)
+
+        if token.type == "link_open" and token.info == "auto":
+            inside_autolink -= 1
+
+        if token.type == "link_close" and token.info == "auto":
+            inside_autolink += 1
+
+
+def replace(state: StateCore) -> None:
+    if not state.md.options.typographer:
+        return
+
+    for token in state.tokens:
+        if token.type != "inline":
+            continue
+        assert token.children is not None
+
+        if SCOPED_ABBR_RE.search(token.content):
+            replace_scoped(token.children)
+
+        if RARE_RE.search(token.content):
+            replace_rare(token.children)