diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-29 04:24:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-29 04:24:24 +0000 |
commit | 12e8343068b906f8b2afddc5569968a8a91fa5b0 (patch) | |
tree | 75cc5e05a4392ea0292251898f992a15a16b172b /markdown_it/rules_core/smartquotes.py | |
parent | Initial commit. (diff) | |
download | markdown-it-py-12e8343068b906f8b2afddc5569968a8a91fa5b0.tar.xz markdown-it-py-12e8343068b906f8b2afddc5569968a8a91fa5b0.zip |
Adding upstream version 2.1.0.upstream/2.1.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | markdown_it/rules_core/smartquotes.py | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py new file mode 100644 index 0000000..93f8be2 --- /dev/null +++ b/markdown_it/rules_core/smartquotes.py @@ -0,0 +1,202 @@ +"""Convert straight quotation marks to typographic ones +""" +from __future__ import annotations + +import re +from typing import Any + +from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace +from ..token import Token +from .state_core import StateCore + +QUOTE_TEST_RE = re.compile(r"['\"]") +QUOTE_RE = re.compile(r"['\"]") +APOSTROPHE = "\u2019" # ’ + + +def replaceAt(string: str, index: int, ch: str) -> str: + # When the index is negative, the behavior is different from the js version. + # But basically, the index will not be negative. + assert index >= 0 + return string[:index] + ch + string[index + 1 :] + + +def process_inlines(tokens: list[Token], state: StateCore) -> None: + stack: list[dict[str, Any]] = [] + + for i in range(len(tokens)): + token = tokens[i] + + thisLevel = token.level + + j = 0 + for j in range(len(stack))[::-1]: + if stack[j]["level"] <= thisLevel: + break + else: + # When the loop is terminated without a "break". + # Subtract 1 to get the same index as the js version. + j -= 1 + + stack = stack[: j + 1] + + if token.type != "text": + continue + + text = token.content + pos = 0 + maximum = len(text) + + while pos < maximum: + goto_outer = False + lastIndex = pos + t = QUOTE_RE.search(text[lastIndex:]) + if not t: + break + + canOpen = canClose = True + pos = t.start(0) + lastIndex + 1 + isSingle = t.group(0) == "'" + + # Find previous character, + # default to space if it's the beginning of the line + lastChar = 0x20 + + if t.start(0) + lastIndex - 1 >= 0: + lastChar = charCodeAt(text, t.start(0) + lastIndex - 1) + else: + for j in range(i)[::-1]: + # lastChar defaults to 0x20 + if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": + break + # should skip all tokens except 'text', 'html_inline' or 'code_inline' + if not tokens[j].content: + continue + + lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1) + break + + # Find next character, + # default to space if it's the end of the line + nextChar = 0x20 + + if pos < maximum: + nextChar = charCodeAt(text, pos) + else: + for j in range(i + 1, len(tokens)): + # nextChar defaults to 0x20 + if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": + break + # should skip all tokens except 'text', 'html_inline' or 'code_inline' + if not tokens[j].content: + continue + + nextChar = charCodeAt(tokens[j].content, 0) + break + + isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) + isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + + isLastWhiteSpace = isWhiteSpace(lastChar) + isNextWhiteSpace = isWhiteSpace(nextChar) + + if isNextWhiteSpace: + canOpen = False + elif isNextPunctChar: + if not (isLastWhiteSpace or isLastPunctChar): + canOpen = False + + if isLastWhiteSpace: + canClose = False + elif isLastPunctChar: + if not (isNextWhiteSpace or isNextPunctChar): + canClose = False + + if nextChar == 0x22 and t.group(0) == '"': # 0x22: " + if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9 + # special case: 1"" - count first quote as an inch + canClose = canOpen = False + + if canOpen and canClose: + # Replace quotes in the middle of punctuation sequence, but not + # in the middle of the words, i.e.: + # + # 1. foo " bar " baz - not replaced + # 2. foo-"-bar-"-baz - replaced + # 3. foo"bar"baz - not replaced + canOpen = isLastPunctChar + canClose = isNextPunctChar + + if not canOpen and not canClose: + # middle of word + if isSingle: + token.content = replaceAt( + token.content, t.start(0) + lastIndex, APOSTROPHE + ) + continue + + if canClose: + # this could be a closing quote, rewind the stack to get a match + for j in range(len(stack))[::-1]: + item = stack[j] + if stack[j]["level"] < thisLevel: + break + if item["single"] == isSingle and stack[j]["level"] == thisLevel: + item = stack[j] + + if isSingle: + openQuote = state.md.options.quotes[2] + closeQuote = state.md.options.quotes[3] + else: + openQuote = state.md.options.quotes[0] + closeQuote = state.md.options.quotes[1] + + # replace token.content *before* tokens[item.token].content, + # because, if they are pointing at the same token, replaceAt + # could mess up indices when quote length != 1 + token.content = replaceAt( + token.content, t.start(0) + lastIndex, closeQuote + ) + tokens[item["token"]].content = replaceAt( + tokens[item["token"]].content, item["pos"], openQuote + ) + + pos += len(closeQuote) - 1 + if item["token"] == i: + pos += len(openQuote) - 1 + + text = token.content + maximum = len(text) + + stack = stack[:j] + goto_outer = True + break + if goto_outer: + goto_outer = False + continue + + if canOpen: + stack.append( + { + "token": i, + "pos": t.start(0) + lastIndex, + "single": isSingle, + "level": thisLevel, + } + ) + elif canClose and isSingle: + token.content = replaceAt( + token.content, t.start(0) + lastIndex, APOSTROPHE + ) + + +def smartquotes(state: StateCore) -> None: + if not state.md.options.typographer: + return + + for token in state.tokens: + + if token.type != "inline" or not QUOTE_RE.search(token.content): + continue + assert token.children is not None + process_inlines(token.children, state) |