summaryrefslogtreecommitdiffstats
path: root/markdown_it/rules_inline/state_inline.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:24:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:24:24 +0000
commit12e8343068b906f8b2afddc5569968a8a91fa5b0 (patch)
tree75cc5e05a4392ea0292251898f992a15a16b172b /markdown_it/rules_inline/state_inline.py
parentInitial commit. (diff)
downloadmarkdown-it-py-upstream.tar.xz
markdown-it-py-upstream.zip
Adding upstream version 2.1.0.upstream/2.1.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'markdown_it/rules_inline/state_inline.py')
-rw-r--r--markdown_it/rules_inline/state_inline.py175
1 files changed, 175 insertions, 0 deletions
diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py
new file mode 100644
index 0000000..283532c
--- /dev/null
+++ b/markdown_it/rules_inline/state_inline.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+
+from collections import namedtuple
+from collections.abc import MutableMapping
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from .._compat import DATACLASS_KWARGS
+from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
+from ..ruler import StateBase
+from ..token import Token
+
+if TYPE_CHECKING:
+ from markdown_it import MarkdownIt
+
+
+@dataclass(**DATACLASS_KWARGS)
+class Delimiter:
+ # Char code of the starting marker (number).
+ marker: int
+
+ # Total length of these series of delimiters.
+ length: int
+
+ # An amount of characters before this one that's equivalent to
+ # current one. In plain English: if this delimiter does not open
+ # an emphasis, neither do previous `jump` characters.
+ #
+ # Used to skip sequences like "*****" in one step, for 1st asterisk
+ # value will be 0, for 2nd it's 1 and so on.
+ jump: int
+
+ # A position of the token this delimiter corresponds to.
+ token: int
+
+ # If this delimiter is matched as a valid opener, `end` will be
+ # equal to its position, otherwise it's `-1`.
+ end: int
+
+ # Boolean flags that determine if this delimiter could open or close
+ # an emphasis.
+ open: bool
+ close: bool
+
+ level: bool | None = None
+
+
+Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
+
+
+class StateInline(StateBase):
+ def __init__(
+ self, src: str, md: MarkdownIt, env: MutableMapping, outTokens: list[Token]
+ ):
+ self.src = src
+ self.env = env
+ self.md = md
+ self.tokens = outTokens
+ self.tokens_meta: list[dict | None] = [None] * len(outTokens)
+
+ self.pos = 0
+ self.posMax = len(self.src)
+ self.level = 0
+ self.pending = ""
+ self.pendingLevel = 0
+
+ # Stores { start: end } pairs. Useful for backtrack
+ # optimization of pairs parse (emphasis, strikes).
+ self.cache: dict[int, int] = {}
+
+ # List of emphasis-like delimiters for current tag
+ self.delimiters: list[Delimiter] = []
+
+ # Stack of delimiter lists for upper level tags
+ self._prev_delimiters: list[list[Delimiter]] = []
+
+ # backticklength => last seen position
+ self.backticks: dict[int, int] = {}
+ self.backticksScanned = False
+
+ def __repr__(self):
+ return (
+ f"{self.__class__.__name__}"
+ f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
+ )
+
+ def pushPending(self):
+ token = Token("text", "", 0)
+ token.content = self.pending
+ token.level = self.pendingLevel
+ self.tokens.append(token)
+ self.pending = ""
+ return token
+
+ def push(self, ttype, tag, nesting):
+ """Push new token to "stream".
+ If pending text exists - flush it as text token
+ """
+ if self.pending:
+ self.pushPending()
+
+ token = Token(ttype, tag, nesting)
+ token_meta = None
+
+ if nesting < 0:
+ # closing tag
+ self.level -= 1
+ self.delimiters = self._prev_delimiters.pop()
+
+ token.level = self.level
+
+ if nesting > 0:
+ # opening tag
+ self.level += 1
+ self._prev_delimiters.append(self.delimiters)
+ self.delimiters = []
+ token_meta = {"delimiters": self.delimiters}
+
+ self.pendingLevel = self.level
+ self.tokens.append(token)
+ self.tokens_meta.append(token_meta)
+ return token
+
+ def scanDelims(self, start, canSplitWord):
+ """
+ Scan a sequence of emphasis-like markers, and determine whether
+ it can start an emphasis sequence or end an emphasis sequence.
+
+ - start - position to scan from (it should point at a valid marker);
+ - canSplitWord - determine if these markers can be found inside a word
+
+ """
+ pos = start
+ left_flanking = True
+ right_flanking = True
+ maximum = self.posMax
+ marker = self.srcCharCode[start]
+
+ # treat beginning of the line as a whitespace
+ lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20
+
+ while pos < maximum and self.srcCharCode[pos] == marker:
+ pos += 1
+
+ count = pos - start
+
+ # treat end of the line as a whitespace
+ nextChar = self.srcCharCode[pos] if pos < maximum else 0x20
+
+ isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
+ isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
+
+ isLastWhiteSpace = isWhiteSpace(lastChar)
+ isNextWhiteSpace = isWhiteSpace(nextChar)
+
+ if isNextWhiteSpace:
+ left_flanking = False
+ elif isNextPunctChar:
+ if not (isLastWhiteSpace or isLastPunctChar):
+ left_flanking = False
+
+ if isLastWhiteSpace:
+ right_flanking = False
+ elif isLastPunctChar:
+ if not (isNextWhiteSpace or isNextPunctChar):
+ right_flanking = False
+
+ if not canSplitWord:
+ can_open = left_flanking and ((not right_flanking) or isLastPunctChar)
+ can_close = right_flanking and ((not left_flanking) or isNextPunctChar)
+ else:
+ can_open = left_flanking
+ can_close = right_flanking
+
+ return Scanned(can_open, can_close, count)