diff options
Diffstat (limited to '')
-rw-r--r-- | markdown_it/rules_block/state_block.py | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py new file mode 100644 index 0000000..42b8fce --- /dev/null +++ b/markdown_it/rules_block/state_block.py @@ -0,0 +1,230 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ..common.utils import isSpace +from ..ruler import StateBase +from ..token import Token + +if TYPE_CHECKING: + from markdown_it.main import MarkdownIt + + +class StateBlock(StateBase): + def __init__( + self, + src: str, + md: MarkdownIt, + env, + tokens: list[Token], + srcCharCode: tuple[int, ...] | None = None, + ): + + if srcCharCode is not None: + self._src = src + self.srcCharCode = srcCharCode + else: + self.src = src + + # link to parser instance + self.md = md + + self.env = env + + # + # Internal state variables + # + + self.tokens = tokens + + self.bMarks = [] # line begin offsets for fast jumps + self.eMarks = [] # line end offsets for fast jumps + # offsets of the first non-space characters (tabs not expanded) + self.tShift = [] + self.sCount = [] # indents for each line (tabs expanded) + + # An amount of virtual spaces (tabs expanded) between beginning + # of each line (bMarks) and real beginning of that line. + # + # It exists only as a hack because blockquotes override bMarks + # losing information in the process. + # + # It's used only when expanding tabs, you can think about it as + # an initial tab length, e.g. bsCount=21 applied to string `\t123` + # means first tab should be expanded to 4-21%4 === 3 spaces. + # + self.bsCount = [] + + # block parser variables + self.blkIndent = 0 # required block content indent (for example, if we are + # inside a list, it would be positioned after list marker) + self.line = 0 # line index in src + self.lineMax = 0 # lines count + self.tight = False # loose/tight mode for lists + self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any) + self.listIndent = -1 # indent of the current list block (-1 if there isn't any) + + # can be 'blockquote', 'list', 'root', 'paragraph' or 'reference' + # used in lists to determine if they interrupt a paragraph + self.parentType = "root" + + self.level = 0 + + # renderer + self.result = "" + + # Create caches + # Generate markers. + indent_found = False + + start = pos = indent = offset = 0 + length = len(self.src) + + for pos, character in enumerate(self.srcCharCode): + if not indent_found: + if isSpace(character): + indent += 1 + + if character == 0x09: + offset += 4 - offset % 4 + else: + offset += 1 + continue + else: + indent_found = True + + if character == 0x0A or pos == length - 1: + if character != 0x0A: + pos += 1 + self.bMarks.append(start) + self.eMarks.append(pos) + self.tShift.append(indent) + self.sCount.append(offset) + self.bsCount.append(0) + + indent_found = False + indent = 0 + offset = 0 + start = pos + 1 + + # Push fake entry to simplify cache bounds checks + self.bMarks.append(length) + self.eMarks.append(length) + self.tShift.append(0) + self.sCount.append(0) + self.bsCount.append(0) + + self.lineMax = len(self.bMarks) - 1 # don't count last fake line + + def __repr__(self): + return ( + f"{self.__class__.__name__}" + f"(line={self.line},level={self.level},tokens={len(self.tokens)})" + ) + + def push(self, ttype: str, tag: str, nesting: int) -> Token: + """Push new token to "stream".""" + token = Token(ttype, tag, nesting) + token.block = True + if nesting < 0: + self.level -= 1 # closing tag + token.level = self.level + if nesting > 0: + self.level += 1 # opening tag + self.tokens.append(token) + return token + + def isEmpty(self, line: int) -> bool: + """.""" + return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line] + + def skipEmptyLines(self, from_pos: int) -> int: + """.""" + while from_pos < self.lineMax: + try: + if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[ + from_pos + ]: + break + except IndexError: + pass + from_pos += 1 + return from_pos + + def skipSpaces(self, pos: int) -> int: + """Skip spaces from given position.""" + while pos < len(self.src): + if not isSpace(self.srcCharCode[pos]): + break + pos += 1 + return pos + + def skipSpacesBack(self, pos: int, minimum: int) -> int: + """Skip spaces from given position in reverse.""" + if pos <= minimum: + return pos + while pos > minimum: + pos -= 1 + if not isSpace(self.srcCharCode[pos]): + return pos + 1 + return pos + + def skipChars(self, pos: int, code: int) -> int: + """Skip char codes from given position.""" + while pos < len(self.src): + if self.srcCharCode[pos] != code: + break + pos += 1 + return pos + + def skipCharsBack(self, pos: int, code: int, minimum: int) -> int: + """Skip char codes reverse from given position - 1.""" + if pos <= minimum: + return pos + while pos > minimum: + pos -= 1 + if code != self.srcCharCode[pos]: + return pos + 1 + return pos + + def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: + """Cut lines range from source.""" + line = begin + if begin >= end: + return "" + + queue = [""] * (end - begin) + + i = 1 + while line < end: + lineIndent = 0 + lineStart = first = self.bMarks[line] + if line + 1 < end or keepLastLF: + last = self.eMarks[line] + 1 + else: + last = self.eMarks[line] + + while (first < last) and (lineIndent < indent): + ch = self.srcCharCode[first] + if isSpace(ch): + if ch == 0x09: + lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4 + else: + lineIndent += 1 + elif first - lineStart < self.tShift[line]: + lineIndent += 1 + else: + break + first += 1 + + if lineIndent > indent: + # partially expanding tabs in code blocks, e.g '\t\tfoobar' + # with indent=2 becomes ' \tfoobar' + queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last] + else: + queue[i - 1] = self.src[first:last] + + line += 1 + i += 1 + + return "".join(queue) |