summaryrefslogtreecommitdiffstats
path: root/markdown_it/rules_core/smartquotes.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown_it/rules_core/smartquotes.py')
-rw-r--r--markdown_it/rules_core/smartquotes.py202
1 files changed, 202 insertions, 0 deletions
diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py
new file mode 100644
index 0000000..93f8be2
--- /dev/null
+++ b/markdown_it/rules_core/smartquotes.py
@@ -0,0 +1,202 @@
+"""Convert straight quotation marks to typographic ones
+"""
+from __future__ import annotations
+
+import re
+from typing import Any
+
+from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace
+from ..token import Token
+from .state_core import StateCore
+
+QUOTE_TEST_RE = re.compile(r"['\"]")
+QUOTE_RE = re.compile(r"['\"]")
+APOSTROPHE = "\u2019" # ’
+
+
+def replaceAt(string: str, index: int, ch: str) -> str:
+ # When the index is negative, the behavior is different from the js version.
+ # But basically, the index will not be negative.
+ assert index >= 0
+ return string[:index] + ch + string[index + 1 :]
+
+
+def process_inlines(tokens: list[Token], state: StateCore) -> None:
+ stack: list[dict[str, Any]] = []
+
+ for i in range(len(tokens)):
+ token = tokens[i]
+
+ thisLevel = token.level
+
+ j = 0
+ for j in range(len(stack))[::-1]:
+ if stack[j]["level"] <= thisLevel:
+ break
+ else:
+ # When the loop is terminated without a "break".
+ # Subtract 1 to get the same index as the js version.
+ j -= 1
+
+ stack = stack[: j + 1]
+
+ if token.type != "text":
+ continue
+
+ text = token.content
+ pos = 0
+ maximum = len(text)
+
+ while pos < maximum:
+ goto_outer = False
+ lastIndex = pos
+ t = QUOTE_RE.search(text[lastIndex:])
+ if not t:
+ break
+
+ canOpen = canClose = True
+ pos = t.start(0) + lastIndex + 1
+ isSingle = t.group(0) == "'"
+
+ # Find previous character,
+ # default to space if it's the beginning of the line
+ lastChar = 0x20
+
+ if t.start(0) + lastIndex - 1 >= 0:
+ lastChar = charCodeAt(text, t.start(0) + lastIndex - 1)
+ else:
+ for j in range(i)[::-1]:
+ # lastChar defaults to 0x20
+ if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
+ break
+ # should skip all tokens except 'text', 'html_inline' or 'code_inline'
+ if not tokens[j].content:
+ continue
+
+ lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1)
+ break
+
+ # Find next character,
+ # default to space if it's the end of the line
+ nextChar = 0x20
+
+ if pos < maximum:
+ nextChar = charCodeAt(text, pos)
+ else:
+ for j in range(i + 1, len(tokens)):
+ # nextChar defaults to 0x20
+ if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
+ break
+ # should skip all tokens except 'text', 'html_inline' or 'code_inline'
+ if not tokens[j].content:
+ continue
+
+ nextChar = charCodeAt(tokens[j].content, 0)
+ break
+
+ isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
+ isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
+
+ isLastWhiteSpace = isWhiteSpace(lastChar)
+ isNextWhiteSpace = isWhiteSpace(nextChar)
+
+ if isNextWhiteSpace:
+ canOpen = False
+ elif isNextPunctChar:
+ if not (isLastWhiteSpace or isLastPunctChar):
+ canOpen = False
+
+ if isLastWhiteSpace:
+ canClose = False
+ elif isLastPunctChar:
+ if not (isNextWhiteSpace or isNextPunctChar):
+ canClose = False
+
+ if nextChar == 0x22 and t.group(0) == '"': # 0x22: "
+ if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9
+ # special case: 1"" - count first quote as an inch
+ canClose = canOpen = False
+
+ if canOpen and canClose:
+ # Replace quotes in the middle of punctuation sequence, but not
+ # in the middle of the words, i.e.:
+ #
+ # 1. foo " bar " baz - not replaced
+ # 2. foo-"-bar-"-baz - replaced
+ # 3. foo"bar"baz - not replaced
+ canOpen = isLastPunctChar
+ canClose = isNextPunctChar
+
+ if not canOpen and not canClose:
+ # middle of word
+ if isSingle:
+ token.content = replaceAt(
+ token.content, t.start(0) + lastIndex, APOSTROPHE
+ )
+ continue
+
+ if canClose:
+ # this could be a closing quote, rewind the stack to get a match
+ for j in range(len(stack))[::-1]:
+ item = stack[j]
+ if stack[j]["level"] < thisLevel:
+ break
+ if item["single"] == isSingle and stack[j]["level"] == thisLevel:
+ item = stack[j]
+
+ if isSingle:
+ openQuote = state.md.options.quotes[2]
+ closeQuote = state.md.options.quotes[3]
+ else:
+ openQuote = state.md.options.quotes[0]
+ closeQuote = state.md.options.quotes[1]
+
+ # replace token.content *before* tokens[item.token].content,
+ # because, if they are pointing at the same token, replaceAt
+ # could mess up indices when quote length != 1
+ token.content = replaceAt(
+ token.content, t.start(0) + lastIndex, closeQuote
+ )
+ tokens[item["token"]].content = replaceAt(
+ tokens[item["token"]].content, item["pos"], openQuote
+ )
+
+ pos += len(closeQuote) - 1
+ if item["token"] == i:
+ pos += len(openQuote) - 1
+
+ text = token.content
+ maximum = len(text)
+
+ stack = stack[:j]
+ goto_outer = True
+ break
+ if goto_outer:
+ goto_outer = False
+ continue
+
+ if canOpen:
+ stack.append(
+ {
+ "token": i,
+ "pos": t.start(0) + lastIndex,
+ "single": isSingle,
+ "level": thisLevel,
+ }
+ )
+ elif canClose and isSingle:
+ token.content = replaceAt(
+ token.content, t.start(0) + lastIndex, APOSTROPHE
+ )
+
+
+def smartquotes(state: StateCore) -> None:
+ if not state.md.options.typographer:
+ return
+
+ for token in state.tokens:
+
+ if token.type != "inline" or not QUOTE_RE.search(token.content):
+ continue
+ assert token.children is not None
+ process_inlines(token.children, state)