From 12e8343068b906f8b2afddc5569968a8a91fa5b0 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 29 Apr 2024 06:24:24 +0200 Subject: Adding upstream version 2.1.0. Signed-off-by: Daniel Baumann --- markdown_it/rules_inline/state_inline.py | 175 +++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 markdown_it/rules_inline/state_inline.py (limited to 'markdown_it/rules_inline/state_inline.py') diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py new file mode 100644 index 0000000..283532c --- /dev/null +++ b/markdown_it/rules_inline/state_inline.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +from collections import namedtuple +from collections.abc import MutableMapping +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from .._compat import DATACLASS_KWARGS +from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace +from ..ruler import StateBase +from ..token import Token + +if TYPE_CHECKING: + from markdown_it import MarkdownIt + + +@dataclass(**DATACLASS_KWARGS) +class Delimiter: + # Char code of the starting marker (number). + marker: int + + # Total length of these series of delimiters. + length: int + + # An amount of characters before this one that's equivalent to + # current one. In plain English: if this delimiter does not open + # an emphasis, neither do previous `jump` characters. + # + # Used to skip sequences like "*****" in one step, for 1st asterisk + # value will be 0, for 2nd it's 1 and so on. + jump: int + + # A position of the token this delimiter corresponds to. + token: int + + # If this delimiter is matched as a valid opener, `end` will be + # equal to its position, otherwise it's `-1`. + end: int + + # Boolean flags that determine if this delimiter could open or close + # an emphasis. + open: bool + close: bool + + level: bool | None = None + + +Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"]) + + +class StateInline(StateBase): + def __init__( + self, src: str, md: MarkdownIt, env: MutableMapping, outTokens: list[Token] + ): + self.src = src + self.env = env + self.md = md + self.tokens = outTokens + self.tokens_meta: list[dict | None] = [None] * len(outTokens) + + self.pos = 0 + self.posMax = len(self.src) + self.level = 0 + self.pending = "" + self.pendingLevel = 0 + + # Stores { start: end } pairs. Useful for backtrack + # optimization of pairs parse (emphasis, strikes). + self.cache: dict[int, int] = {} + + # List of emphasis-like delimiters for current tag + self.delimiters: list[Delimiter] = [] + + # Stack of delimiter lists for upper level tags + self._prev_delimiters: list[list[Delimiter]] = [] + + # backticklength => last seen position + self.backticks: dict[int, int] = {} + self.backticksScanned = False + + def __repr__(self): + return ( + f"{self.__class__.__name__}" + f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})" + ) + + def pushPending(self): + token = Token("text", "", 0) + token.content = self.pending + token.level = self.pendingLevel + self.tokens.append(token) + self.pending = "" + return token + + def push(self, ttype, tag, nesting): + """Push new token to "stream". + If pending text exists - flush it as text token + """ + if self.pending: + self.pushPending() + + token = Token(ttype, tag, nesting) + token_meta = None + + if nesting < 0: + # closing tag + self.level -= 1 + self.delimiters = self._prev_delimiters.pop() + + token.level = self.level + + if nesting > 0: + # opening tag + self.level += 1 + self._prev_delimiters.append(self.delimiters) + self.delimiters = [] + token_meta = {"delimiters": self.delimiters} + + self.pendingLevel = self.level + self.tokens.append(token) + self.tokens_meta.append(token_meta) + return token + + def scanDelims(self, start, canSplitWord): + """ + Scan a sequence of emphasis-like markers, and determine whether + it can start an emphasis sequence or end an emphasis sequence. + + - start - position to scan from (it should point at a valid marker); + - canSplitWord - determine if these markers can be found inside a word + + """ + pos = start + left_flanking = True + right_flanking = True + maximum = self.posMax + marker = self.srcCharCode[start] + + # treat beginning of the line as a whitespace + lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20 + + while pos < maximum and self.srcCharCode[pos] == marker: + pos += 1 + + count = pos - start + + # treat end of the line as a whitespace + nextChar = self.srcCharCode[pos] if pos < maximum else 0x20 + + isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) + isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + + isLastWhiteSpace = isWhiteSpace(lastChar) + isNextWhiteSpace = isWhiteSpace(nextChar) + + if isNextWhiteSpace: + left_flanking = False + elif isNextPunctChar: + if not (isLastWhiteSpace or isLastPunctChar): + left_flanking = False + + if isLastWhiteSpace: + right_flanking = False + elif isLastPunctChar: + if not (isNextWhiteSpace or isNextPunctChar): + right_flanking = False + + if not canSplitWord: + can_open = left_flanking and ((not right_flanking) or isLastPunctChar) + can_close = right_flanking and ((not left_flanking) or isNextPunctChar) + else: + can_open = left_flanking + can_close = right_flanking + + return Scanned(can_open, can_close, count) -- cgit v1.2.3