Adding upstream version 2.1.0.upstream/2.1.0 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-29 04:24:24 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-29 04:24:24 +0000
commit: 12e8343068b906f8b2afddc5569968a8a91fa5b0 (patch)
tree: 75cc5e05a4392ea0292251898f992a15a16b172b /markdown_it/rules_block
parent: Initial commit. (diff)
download: markdown-it-py-ef6b3991640e41f44752cdb6502719ca58a762c8.tar.xz
markdown-it-py-ef6b3991640e41f44752cdb6502719ca58a762c8.zip
13 files changed, 1870 insertions, 0 deletions
diff --git a/markdown_it/rules_block/__init__.py b/markdown_it/rules_block/__init__.py
new file mode 100644
index 0000000..bcf138d
--- /dev/null
+++ b/markdown_it/rules_block/__init__.py
@@ -0,0 +1,27 @@
+__all__ = (
+    "StateBlock",
+    "paragraph",
+    "heading",
+    "lheading",
+    "code",
+    "fence",
+    "hr",
+    "list_block",
+    "reference",
+    "blockquote",
+    "html_block",
+    "table",
+)
+
+from .blockquote import blockquote
+from .code import code
+from .fence import fence
+from .heading import heading
+from .hr import hr
+from .html_block import html_block
+from .lheading import lheading
+from .list import list_block
+from .paragraph import paragraph
+from .reference import reference
+from .state_block import StateBlock
+from .table import table
diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py
new file mode 100644
index 0000000..6575731
--- /dev/null
+++ b/markdown_it/rules_block/blockquote.py
@@ -0,0 +1,299 @@
+# Block quotes
+from __future__ import annotations
+
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+    LOGGER.debug(
+        "entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent
+    )
+
+    oldLineMax = state.lineMax
+    pos = state.bMarks[startLine] + state.tShift[startLine]
+    max = state.eMarks[startLine]
+
+    # if it's indented more than 3 spaces, it should be a code block
+    if (state.sCount[startLine] - state.blkIndent) >= 4:
+        return False
+
+    # check the block quote marker
+    if state.srcCharCode[pos] != 0x3E:  # /* > */
+        return False
+    pos += 1
+
+    # we know that it's going to be a valid blockquote,
+    # so no point trying to find the end of it in silent mode
+    if silent:
+        return True
+
+    # set offset past spaces and ">"
+    initial = offset = state.sCount[startLine] + 1
+
+    try:
+        second_char_code: int | None = state.srcCharCode[pos]
+    except IndexError:
+        second_char_code = None
+
+    # skip one optional space after '>'
+    if second_char_code == 0x20:  # /* space */
+        # ' >   test '
+        #     ^ -- position start of line here:
+        pos += 1
+        initial += 1
+        offset += 1
+        adjustTab = False
+        spaceAfterMarker = True
+    elif second_char_code == 0x09:  # /* tab */
+        spaceAfterMarker = True
+
+        if (state.bsCount[startLine] + offset) % 4 == 3:
+            # '  >\t  test '
+            #       ^ -- position start of line here (tab has width==1)
+            pos += 1
+            initial += 1
+            offset += 1
+            adjustTab = False
+        else:
+            # ' >\t  test '
+            #    ^ -- position start of line here + shift bsCount slightly
+            #         to make extra space appear
+            adjustTab = True
+
+    else:
+        spaceAfterMarker = False
+
+    oldBMarks = [state.bMarks[startLine]]
+    state.bMarks[startLine] = pos
+
+    while pos < max:
+        ch = state.srcCharCode[pos]
+
+        if isSpace(ch):
+            if ch == 0x09:  # / tab /
+                offset += (
+                    4
+                    - (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
+                )
+            else:
+                offset += 1
+
+        else:
+            break
+
+        pos += 1
+
+    oldBSCount = [state.bsCount[startLine]]
+    state.bsCount[startLine] = (
+        state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0)
+    )
+
+    lastLineEmpty = pos >= max
+
+    oldSCount = [state.sCount[startLine]]
+    state.sCount[startLine] = offset - initial
+
+    oldTShift = [state.tShift[startLine]]
+    state.tShift[startLine] = pos - state.bMarks[startLine]
+
+    terminatorRules = state.md.block.ruler.getRules("blockquote")
+
+    oldParentType = state.parentType
+    state.parentType = "blockquote"
+
+    # Search the end of the block
+    #
+    # Block ends with either:
+    #  1. an empty line outside:
+    #     ```
+    #     > test
+    #
+    #     ```
+    #  2. an empty line inside:
+    #     ```
+    #     >
+    #     test
+    #     ```
+    #  3. another tag:
+    #     ```
+    #     > test
+    #      - - -
+    #     ```
+
+    # for (nextLine = startLine + 1; nextLine < endLine; nextLine++) {
+    nextLine = startLine + 1
+    while nextLine < endLine:
+
+        # check if it's outdented, i.e. it's inside list item and indented
+        # less than said list item:
+        #
+        # ```
+        # 1. anything
+        #    > current blockquote
+        # 2. checking this line
+        # ```
+        isOutdented = state.sCount[nextLine] < state.blkIndent
+
+        pos = state.bMarks[nextLine] + state.tShift[nextLine]
+        max = state.eMarks[nextLine]
+
+        if pos >= max:
+            # Case 1: line is not inside the blockquote, and this line is empty.
+            break
+
+        evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented  # /* > */
+        pos += 1
+        if evaluatesTrue:
+            # This line is inside the blockquote.
+
+            # set offset past spaces and ">"
+            initial = offset = state.sCount[nextLine] + 1
+
+            try:
+                next_char: int | None = state.srcCharCode[pos]
+            except IndexError:
+                next_char = None
+
+            # skip one optional space after '>'
+            if next_char == 0x20:  # /* space */
+                # ' >   test '
+                #     ^ -- position start of line here:
+                pos += 1
+                initial += 1
+                offset += 1
+                adjustTab = False
+                spaceAfterMarker = True
+            elif next_char == 0x09:  # /* tab */
+                spaceAfterMarker = True
+
+                if (state.bsCount[nextLine] + offset) % 4 == 3:
+                    # '  >\t  test '
+                    #       ^ -- position start of line here (tab has width==1)
+                    pos += 1
+                    initial += 1
+                    offset += 1
+                    adjustTab = False
+                else:
+                    # ' >\t  test '
+                    #    ^ -- position start of line here + shift bsCount slightly
+                    #         to make extra space appear
+                    adjustTab = True
+
+            else:
+                spaceAfterMarker = False
+
+            oldBMarks.append(state.bMarks[nextLine])
+            state.bMarks[nextLine] = pos
+
+            while pos < max:
+                ch = state.srcCharCode[pos]
+
+                if isSpace(ch):
+                    if ch == 0x09:
+                        offset += (
+                            4
+                            - (
+                                offset
+                                + state.bsCount[nextLine]
+                                + (1 if adjustTab else 0)
+                            )
+                            % 4
+                        )
+                    else:
+                        offset += 1
+                else:
+                    break
+
+                pos += 1
+
+            lastLineEmpty = pos >= max
+
+            oldBSCount.append(state.bsCount[nextLine])
+            state.bsCount[nextLine] = (
+                state.sCount[nextLine] + 1 + (1 if spaceAfterMarker else 0)
+            )
+
+            oldSCount.append(state.sCount[nextLine])
+            state.sCount[nextLine] = offset - initial
+
+            oldTShift.append(state.tShift[nextLine])
+            state.tShift[nextLine] = pos - state.bMarks[nextLine]
+
+            nextLine += 1
+            continue
+
+        # Case 2: line is not inside the blockquote, and the last line was empty.
+        if lastLineEmpty:
+            break
+
+        # Case 3: another tag found.
+        terminate = False
+
+        for terminatorRule in terminatorRules:
+            if terminatorRule(state, nextLine, endLine, True):
+                terminate = True
+                break
+
+        if terminate:
+            # Quirk to enforce "hard termination mode" for paragraphs;
+            # normally if you call `tokenize(state, startLine, nextLine)`,
+            # paragraphs will look below nextLine for paragraph continuation,
+            # but if blockquote is terminated by another tag, they shouldn't
+            state.lineMax = nextLine
+
+            if state.blkIndent != 0:
+                # state.blkIndent was non-zero, we now set it to zero,
+                # so we need to re-calculate all offsets to appear as
+                # if indent wasn't changed
+                oldBMarks.append(state.bMarks[nextLine])
+                oldBSCount.append(state.bsCount[nextLine])
+                oldTShift.append(state.tShift[nextLine])
+                oldSCount.append(state.sCount[nextLine])
+                state.sCount[nextLine] -= state.blkIndent
+
+            break
+
+        oldBMarks.append(state.bMarks[nextLine])
+        oldBSCount.append(state.bsCount[nextLine])
+        oldTShift.append(state.tShift[nextLine])
+        oldSCount.append(state.sCount[nextLine])
+
+        # A negative indentation means that this is a paragraph continuation
+        #
+        state.sCount[nextLine] = -1
+
+        nextLine += 1
+
+    oldIndent = state.blkIndent
+    state.blkIndent = 0
+
+    token = state.push("blockquote_open", "blockquote", 1)
+    token.markup = ">"
+    token.map = lines = [startLine, 0]
+
+    state.md.block.tokenize(state, startLine, nextLine)
+
+    token = state.push("blockquote_close", "blockquote", -1)
+    token.markup = ">"
+
+    state.lineMax = oldLineMax
+    state.parentType = oldParentType
+    lines[1] = state.line
+
+    # Restore original tShift; this might not be necessary since the parser
+    # has already been here, but just to make sure we can do that.
+    for i, item in enumerate(oldTShift):
+        state.bMarks[i + startLine] = oldBMarks[i]
+        state.tShift[i + startLine] = item
+        state.sCount[i + startLine] = oldSCount[i]
+        state.bsCount[i + startLine] = oldBSCount[i]
+
+    state.blkIndent = oldIndent
+
+    return True
diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py
new file mode 100644
index 0000000..c4fdba3
--- /dev/null
+++ b/markdown_it/rules_block/code.py
@@ -0,0 +1,36 @@
+"""Code block (4 spaces padded)."""
+import logging
+
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def code(state: StateBlock, startLine: int, endLine: int, silent: bool = False):
+
+    LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+    if state.sCount[startLine] - state.blkIndent < 4:
+        return False
+
+    last = nextLine = startLine + 1
+
+    while nextLine < endLine:
+        if state.isEmpty(nextLine):
+            nextLine += 1
+            continue
+
+        if state.sCount[nextLine] - state.blkIndent >= 4:
+            nextLine += 1
+            last = nextLine
+            continue
+
+        break
+
+    state.line = last
+
+    token = state.push("code_block", "code", 0)
+    token.content = state.getLines(startLine, last, 4 + state.blkIndent, False) + "\n"
+    token.map = [startLine, state.line]
+
+    return True
diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py
new file mode 100644
index 0000000..c4f5275
--- /dev/null
+++ b/markdown_it/rules_block/fence.py
@@ -0,0 +1,104 @@
+# fences (``` lang, ~~~ lang)
+import logging
+
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+    LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+    haveEndMarker = False
+    pos = state.bMarks[startLine] + state.tShift[startLine]
+    maximum = state.eMarks[startLine]
+
+    # if it's indented more than 3 spaces, it should be a code block
+    if state.sCount[startLine] - state.blkIndent >= 4:
+        return False
+
+    if pos + 3 > maximum:
+        return False
+
+    marker = state.srcCharCode[pos]
+
+    # /* ~ */  /* ` */
+    if marker != 0x7E and marker != 0x60:
+        return False
+
+    # scan marker length
+    mem = pos
+    pos = state.skipChars(pos, marker)
+
+    length = pos - mem
+
+    if length < 3:
+        return False
+
+    markup = state.src[mem:pos]
+    params = state.src[pos:maximum]
+
+    # /* ` */
+    if marker == 0x60:
+        if chr(marker) in params:
+            return False
+
+    # Since start is found, we can report success here in validation mode
+    if silent:
+        return True
+
+    # search end of block
+    nextLine = startLine
+
+    while True:
+        nextLine += 1
+        if nextLine >= endLine:
+            # unclosed block should be autoclosed by end of document.
+            # also block seems to be autoclosed by end of parent
+            break
+
+        pos = mem = state.bMarks[nextLine] + state.tShift[nextLine]
+        maximum = state.eMarks[nextLine]
+
+        if pos < maximum and state.sCount[nextLine] < state.blkIndent:
+            # non-empty line with negative indent should stop the list:
+            # - ```
+            #  test
+            break
+
+        if state.srcCharCode[pos] != marker:
+            continue
+
+        if state.sCount[nextLine] - state.blkIndent >= 4:
+            # closing fence should be indented less than 4 spaces
+            continue
+
+        pos = state.skipChars(pos, marker)
+
+        # closing code fence must be at least as long as the opening one
+        if pos - mem < length:
+            continue
+
+        # make sure tail has spaces only
+        pos = state.skipSpaces(pos)
+
+        if pos < maximum:
+            continue
+
+        haveEndMarker = True
+        # found!
+        break
+
+    # If a fence has heading spaces, they should be removed from its inner block
+    length = state.sCount[startLine]
+
+    state.line = nextLine + (1 if haveEndMarker else 0)
+
+    token = state.push("fence", "code", 0)
+    token.info = params
+    token.content = state.getLines(startLine + 1, nextLine, length, True)
+    token.markup = markup
+    token.map = [startLine, state.line]
+
+    return True
diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py
new file mode 100644
index 0000000..8d4ef3e
--- /dev/null
+++ b/markdown_it/rules_block/heading.py
@@ -0,0 +1,72 @@
+""" Atex heading (#, ##, ...) """
+from __future__ import annotations
+
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+    LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+    pos = state.bMarks[startLine] + state.tShift[startLine]
+    maximum = state.eMarks[startLine]
+
+    # if it's indented more than 3 spaces, it should be a code block
+    if state.sCount[startLine] - state.blkIndent >= 4:
+        return False
+
+    ch: int | None = state.srcCharCode[pos]
+
+    # /* # */
+    if ch != 0x23 or pos >= maximum:
+        return False
+
+    # count heading level
+    level = 1
+    pos += 1
+    try:
+        ch = state.srcCharCode[pos]
+    except IndexError:
+        ch = None
+    # /* # */
+    while ch == 0x23 and pos < maximum and level <= 6:
+        level += 1
+        pos += 1
+        try:
+            ch = state.srcCharCode[pos]
+        except IndexError:
+            ch = None
+
+    if level > 6 or (pos < maximum and not isSpace(ch)):
+        return False
+
+    if silent:
+        return True
+
+    # Let's cut tails like '    ###  ' from the end of string
+
+    maximum = state.skipSpacesBack(maximum, pos)
+    tmp = state.skipCharsBack(maximum, 0x23, pos)  # #
+    if tmp > pos and isSpace(state.srcCharCode[tmp - 1]):
+        maximum = tmp
+
+    state.line = startLine + 1
+
+    token = state.push("heading_open", "h" + str(level), 1)
+    token.markup = "########"[:level]
+    token.map = [startLine, state.line]
+
+    token = state.push("inline", "", 0)
+    token.content = state.src[pos:maximum].strip()
+    token.map = [startLine, state.line]
+    token.children = []
+
+    token = state.push("heading_close", "h" + str(level), -1)
+    token.markup = "########"[:level]
+
+    return True
diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py
new file mode 100644
index 0000000..804cd9d
--- /dev/null
+++ b/markdown_it/rules_block/hr.py
@@ -0,0 +1,54 @@
+"""Horizontal rule
+
+At least 3 of these characters on a line * - _
+"""
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def hr(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+    LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+    pos = state.bMarks[startLine] + state.tShift[startLine]
+    maximum = state.eMarks[startLine]
+
+    # if it's indented more than 3 spaces, it should be a code block
+    if state.sCount[startLine] - state.blkIndent >= 4:
+        return False
+
+    marker = state.srcCharCode[pos]
+    pos += 1
+
+    # Check hr marker: /* * */ /* - */ /* _ */
+    if marker != 0x2A and marker != 0x2D and marker != 0x5F:
+        return False
+
+    # markers can be mixed with spaces, but there should be at least 3 of them
+
+    cnt = 1
+    while pos < maximum:
+        ch = state.srcCharCode[pos]
+        pos += 1
+        if ch != marker and not isSpace(ch):
+            return False
+        if ch == marker:
+            cnt += 1
+
+    if cnt < 3:
+        return False
+
+    if silent:
+        return True
+
+    state.line = startLine + 1
+
+    token = state.push("hr", "hr", 0)
+    token.map = [startLine, state.line]
+    token.markup = chr(marker) * (cnt + 1)
+
+    return True
diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py
new file mode 100644
index 0000000..31afab7
--- /dev/null
+++ b/markdown_it/rules_block/html_block.py
@@ -0,0 +1,91 @@
+# HTML block
+from __future__ import annotations
+
+import logging
+import re
+
+from ..common.html_blocks import block_names
+from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+# An array of opening and corresponding closing sequences for html tags,
+# last argument defines whether it can terminate a paragraph or not
+HTML_SEQUENCES: list[tuple[re.Pattern, re.Pattern, bool]] = [
+    (
+        re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
+        re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
+        True,
+    ),
+    (re.compile(r"^<!--"), re.compile(r"-->"), True),
+    (re.compile(r"^<\?"), re.compile(r"\?>"), True),
+    (re.compile(r"^<![A-Z]"), re.compile(r">"), True),
+    (re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
+    (
+        re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
+        re.compile(r"^$"),
+        True,
+    ),
+    (re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
+]
+
+
+def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
+    LOGGER.debug(
+        "entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
+    )
+    pos = state.bMarks[startLine] + state.tShift[startLine]
+    maximum = state.eMarks[startLine]
+
+    # if it's indented more than 3 spaces, it should be a code block
+    if state.sCount[startLine] - state.blkIndent >= 4:
+        return False
+
+    if not state.md.options.get("html", None):
+        return False
+
+    if state.srcCharCode[pos] != 0x3C:  # /* < */
+        return False
+
+    lineText = state.src[pos:maximum]
+
+    html_seq = None
+    for HTML_SEQUENCE in HTML_SEQUENCES:
+        if HTML_SEQUENCE[0].search(lineText):
+            html_seq = HTML_SEQUENCE
+            break
+
+    if not html_seq:
+        return False
+
+    if silent:
+        # true if this sequence can be a terminator, false otherwise
+        return html_seq[2]
+
+    nextLine = startLine + 1
+
+    # If we are here - we detected HTML block.
+    # Let's roll down till block end.
+    if not html_seq[1].search(lineText):
+        while nextLine < endLine:
+            if state.sCount[nextLine] < state.blkIndent:
+                break
+
+            pos = state.bMarks[nextLine] + state.tShift[nextLine]
+            maximum = state.eMarks[nextLine]
+            lineText = state.src[pos:maximum]
+
+            if html_seq[1].search(lineText):
+                if len(lineText) != 0:
+                    nextLine += 1
+                break
+            nextLine += 1
+
+    state.line = nextLine
+
+    token = state.push("html_block", "", 0)
+    token.map = [startLine, nextLine]
+    token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
+
+    return True
diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py
new file mode 100644
index 0000000..f26e2af
--- /dev/null
+++ b/markdown_it/rules_block/lheading.py
@@ -0,0 +1,90 @@
+# lheading (---, ==)
+import logging
+
+from ..ruler import Ruler
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+    LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+    level = None
+    nextLine = startLine + 1
+    ruler: Ruler = state.md.block.ruler
+    terminatorRules = ruler.getRules("paragraph")
+
+    # if it's indented more than 3 spaces, it should be a code block
+    if state.sCount[startLine] - state.blkIndent >= 4:
+        return False
+
+    oldParentType = state.parentType
+    state.parentType = "paragraph"  # use paragraph to match terminatorRules
+
+    # jump line-by-line until empty one or EOF
+    while nextLine < endLine and not state.isEmpty(nextLine):
+        # this would be a code block normally, but after paragraph
+        # it's considered a lazy continuation regardless of what's there
+        if state.sCount[nextLine] - state.blkIndent > 3:
+            nextLine += 1
+            continue
+
+        # Check for underline in setext header
+        if state.sCount[nextLine] >= state.blkIndent:
+            pos = state.bMarks[nextLine] + state.tShift[nextLine]
+            maximum = state.eMarks[nextLine]
+
+            if pos < maximum:
+                marker = state.srcCharCode[pos]
+
+                # /* - */  /* = */
+                if marker == 0x2D or marker == 0x3D:
+                    pos = state.skipChars(pos, marker)
+                    pos = state.skipSpaces(pos)
+
+                    # /* = */
+                    if pos >= maximum:
+                        level = 1 if marker == 0x3D else 2
+                        break
+
+        # quirk for blockquotes, this line should already be checked by that rule
+        if state.sCount[nextLine] < 0:
+            nextLine += 1
+            continue
+
+        # Some tags can terminate paragraph without empty line.
+        terminate = False
+        for terminatorRule in terminatorRules:
+            if terminatorRule(state, nextLine, endLine, True):
+                terminate = True
+                break
+        if terminate:
+            break
+
+        nextLine += 1
+
+    if not level:
+        # Didn't find valid underline
+        return False
+
+    content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
+
+    state.line = nextLine + 1
+
+    token = state.push("heading_open", "h" + str(level), 1)
+    token.markup = chr(marker)
+    token.map = [startLine, state.line]
+
+    token = state.push("inline", "", 0)
+    token.content = content
+    token.map = [startLine, state.line - 1]
+    token.children = []
+
+    token = state.push("heading_close", "h" + str(level), -1)
+    token.markup = chr(marker)
+
+    state.parentType = oldParentType
+
+    return True
diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py
new file mode 100644
index 0000000..a7617ad
--- /dev/null
+++ b/markdown_it/rules_block/list.py
@@ -0,0 +1,344 @@
+# Lists
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+# Search `[-+*][\n ]`, returns next pos after marker on success
+# or -1 on fail.
+def skipBulletListMarker(state: StateBlock, startLine: int):
+
+    pos = state.bMarks[startLine] + state.tShift[startLine]
+    maximum = state.eMarks[startLine]
+
+    marker = state.srcCharCode[pos]
+    pos += 1
+    # Check bullet /* * */ /* - */ /* + */
+    if marker != 0x2A and marker != 0x2D and marker != 0x2B:
+        return -1
+
+    if pos < maximum:
+        ch = state.srcCharCode[pos]
+
+        if not isSpace(ch):
+            # " -test " - is not a list item
+            return -1
+
+    return pos
+
+
+# Search `\d+[.)][\n ]`, returns next pos after marker on success
+# or -1 on fail.
+def skipOrderedListMarker(state: StateBlock, startLine: int):
+
+    start = state.bMarks[startLine] + state.tShift[startLine]
+    pos = start
+    maximum = state.eMarks[startLine]
+
+    # List marker should have at least 2 chars (digit + dot)
+    if pos + 1 >= maximum:
+        return -1
+
+    ch = state.srcCharCode[pos]
+    pos += 1
+
+    # /* 0 */  /* 9 */
+    if ch < 0x30 or ch > 0x39:
+        return -1
+
+    while True:
+        # EOL -> fail
+        if pos >= maximum:
+            return -1
+
+        ch = state.srcCharCode[pos]
+        pos += 1
+
+        # /* 0 */  /* 9 */
+        if ch >= 0x30 and ch <= 0x39:
+
+            # List marker should have no more than 9 digits
+            # (prevents integer overflow in browsers)
+            if pos - start >= 10:
+                return -1
+
+            continue
+
+        # found valid marker: /* ) */ /* . */
+        if ch == 0x29 or ch == 0x2E:
+            break
+
+        return -1
+
+    if pos < maximum:
+        ch = state.srcCharCode[pos]
+
+        if not isSpace(ch):
+            # " 1.test " - is not a list item
+            return -1
+
+    return pos
+
+
+def markTightParagraphs(state: StateBlock, idx: int):
+    level = state.level + 2
+
+    i = idx + 2
+    length = len(state.tokens) - 2
+    while i < length:
+        if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open":
+            state.tokens[i + 2].hidden = True
+            state.tokens[i].hidden = True
+            i += 2
+        i += 1
+
+
+def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+    LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+    isTerminatingParagraph = False
+    tight = True
+
+    # if it's indented more than 3 spaces, it should be a code block
+    if state.sCount[startLine] - state.blkIndent >= 4:
+        return False
+
+    # Special case:
+    #  - item 1
+    #   - item 2
+    #    - item 3
+    #     - item 4
+    #      - this one is a paragraph continuation
+    if (
+        state.listIndent >= 0
+        and state.sCount[startLine] - state.listIndent >= 4
+        and state.sCount[startLine] < state.blkIndent
+    ):
+        return False
+
+    # limit conditions when list can interrupt
+    # a paragraph (validation mode only)
+    if silent and state.parentType == "paragraph":
+        # Next list item should still terminate previous list item
+        #
+        # This code can fail if plugins use blkIndent as well as lists,
+        # but I hope the spec gets fixed long before that happens.
+        #
+        if state.tShift[startLine] >= state.blkIndent:
+            isTerminatingParagraph = True
+
+    # Detect list type and position after marker
+    posAfterMarker = skipOrderedListMarker(state, startLine)
+    if posAfterMarker >= 0:
+        isOrdered = True
+        start = state.bMarks[startLine] + state.tShift[startLine]
+        markerValue = int(state.src[start : posAfterMarker - 1])
+
+        # If we're starting a new ordered list right after
+        # a paragraph, it should start with 1.
+        if isTerminatingParagraph and markerValue != 1:
+            return False
+    else:
+        posAfterMarker = skipBulletListMarker(state, startLine)
+        if posAfterMarker >= 0:
+            isOrdered = False
+        else:
+            return False
+
+    # If we're starting a new unordered list right after
+    # a paragraph, first line should not be empty.
+    if isTerminatingParagraph:
+        if state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]:
+            return False
+
+    # We should terminate list on style change. Remember first one to compare.
+    markerCharCode = state.srcCharCode[posAfterMarker - 1]
+
+    # For validation mode we can terminate immediately
+    if silent:
+        return True
+
+    # Start list
+    listTokIdx = len(state.tokens)
+
+    if isOrdered:
+        token = state.push("ordered_list_open", "ol", 1)
+        if markerValue != 1:
+            token.attrs = {"start": markerValue}
+
+    else:
+        token = state.push("bullet_list_open", "ul", 1)
+
+    token.map = listLines = [startLine, 0]
+    token.markup = chr(markerCharCode)
+
+    #
+    # Iterate list items
+    #
+
+    nextLine = startLine
+    prevEmptyEnd = False
+    terminatorRules = state.md.block.ruler.getRules("list")
+
+    oldParentType = state.parentType
+    state.parentType = "list"
+
+    while nextLine < endLine:
+        pos = posAfterMarker
+        maximum = state.eMarks[nextLine]
+
+        initial = offset = (
+            state.sCount[nextLine]
+            + posAfterMarker
+            - (state.bMarks[startLine] + state.tShift[startLine])
+        )
+
+        while pos < maximum:
+            ch = state.srcCharCode[pos]
+
+            if ch == 0x09:  # \t
+                offset += 4 - (offset + state.bsCount[nextLine]) % 4
+            elif ch == 0x20:  # \s
+                offset += 1
+            else:
+                break
+
+            pos += 1
+
+        contentStart = pos
+
+        if contentStart >= maximum:
+            # trimming space in "-    \n  3" case, indent is 1 here
+            indentAfterMarker = 1
+        else:
+            indentAfterMarker = offset - initial
+
+        # If we have more than 4 spaces, the indent is 1
+        # (the rest is just indented code block)
+        if indentAfterMarker > 4:
+            indentAfterMarker = 1
+
+        # "  -  test"
+        #  ^^^^^ - calculating total length of this thing
+        indent = initial + indentAfterMarker
+
+        # Run subparser & write tokens
+        token = state.push("list_item_open", "li", 1)
+        token.markup = chr(markerCharCode)
+        token.map = itemLines = [startLine, 0]
+        if isOrdered:
+            token.info = state.src[start : posAfterMarker - 1]
+
+        # change current state, then restore it after parser subcall
+        oldTight = state.tight
+        oldTShift = state.tShift[startLine]
+        oldSCount = state.sCount[startLine]
+
+        #  - example list
+        # ^ listIndent position will be here
+        #   ^ blkIndent position will be here
+        #
+        oldListIndent = state.listIndent
+        state.listIndent = state.blkIndent
+        state.blkIndent = indent
+
+        state.tight = True
+        state.tShift[startLine] = contentStart - state.bMarks[startLine]
+        state.sCount[startLine] = offset
+
+        if contentStart >= maximum and state.isEmpty(startLine + 1):
+            # workaround for this case
+            # (list item is empty, list terminates before "foo"):
+            # ~~~~~~~~
+            #   -
+            #
+            #     foo
+            # ~~~~~~~~
+            state.line = min(state.line + 2, endLine)
+        else:
+            # NOTE in list.js this was:
+            # state.md.block.tokenize(state, startLine, endLine, True)
+            # but  tokeniz does not take the final parameter
+            state.md.block.tokenize(state, startLine, endLine)
+
+        # If any of list item is tight, mark list as tight
+        if (not state.tight) or prevEmptyEnd:
+            tight = False
+
+        # Item become loose if finish with empty line,
+        # but we should filter last element, because it means list finish
+        prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1)
+
+        state.blkIndent = state.listIndent
+        state.listIndent = oldListIndent
+        state.tShift[startLine] = oldTShift
+        state.sCount[startLine] = oldSCount
+        state.tight = oldTight
+
+        token = state.push("list_item_close", "li", -1)
+        token.markup = chr(markerCharCode)
+
+        nextLine = startLine = state.line
+        itemLines[1] = nextLine
+
+        if nextLine >= endLine:
+            break
+
+        contentStart = state.bMarks[startLine]
+
+        #
+        # Try to check if list is terminated or continued.
+        #
+        if state.sCount[nextLine] < state.blkIndent:
+            break
+
+        # if it's indented more than 3 spaces, it should be a code block
+        if state.sCount[startLine] - state.blkIndent >= 4:
+            break
+
+        # fail if terminating block found
+        terminate = False
+        for terminatorRule in terminatorRules:
+            if terminatorRule(state, nextLine, endLine, True):
+                terminate = True
+                break
+
+        if terminate:
+            break
+
+        # fail if list has another type
+        if isOrdered:
+            posAfterMarker = skipOrderedListMarker(state, nextLine)
+            if posAfterMarker < 0:
+                break
+            start = state.bMarks[nextLine] + state.tShift[nextLine]
+        else:
+            posAfterMarker = skipBulletListMarker(state, nextLine)
+            if posAfterMarker < 0:
+                break
+
+        if markerCharCode != state.srcCharCode[posAfterMarker - 1]:
+            break
+
+    # Finalize list
+    if isOrdered:
+        token = state.push("ordered_list_close", "ol", -1)
+    else:
+        token = state.push("bullet_list_close", "ul", -1)
+
+    token.markup = chr(markerCharCode)
+
+    listLines[1] = nextLine
+    state.line = nextLine
+
+    state.parentType = oldParentType
+
+    # mark paragraphs tight if needed
+    if tight:
+        markTightParagraphs(state, listTokIdx)
+
+    return True
diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py
new file mode 100644
index 0000000..4fee83e
--- /dev/null
+++ b/markdown_it/rules_block/paragraph.py
@@ -0,0 +1,67 @@
+"""Paragraph."""
+import logging
+
+from ..ruler import Ruler
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool = False):
+
+    LOGGER.debug(
+        "entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent
+    )
+
+    nextLine = startLine + 1
+    ruler: Ruler = state.md.block.ruler
+    terminatorRules = ruler.getRules("paragraph")
+    endLine = state.lineMax
+
+    oldParentType = state.parentType
+    state.parentType = "paragraph"
+
+    # jump line-by-line until empty one or EOF
+    while nextLine < endLine:
+        if state.isEmpty(nextLine):
+            break
+        # this would be a code block normally, but after paragraph
+        # it's considered a lazy continuation regardless of what's there
+        if state.sCount[nextLine] - state.blkIndent > 3:
+            nextLine += 1
+            continue
+
+        # quirk for blockquotes, this line should already be checked by that rule
+        if state.sCount[nextLine] < 0:
+            nextLine += 1
+            continue
+
+        # Some tags can terminate paragraph without empty line.
+        terminate = False
+        for terminatorRule in terminatorRules:
+            if terminatorRule(state, nextLine, endLine, True):
+                terminate = True
+                break
+
+        if terminate:
+            break
+
+        nextLine += 1
+
+    content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
+
+    state.line = nextLine
+
+    token = state.push("paragraph_open", "p", 1)
+    token.map = [startLine, state.line]
+
+    token = state.push("inline", "", 0)
+    token.content = content
+    token.map = [startLine, state.line]
+    token.children = []
+
+    token = state.push("paragraph_close", "p", -1)
+
+    state.parentType = oldParentType
+
+    return True
diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py
new file mode 100644
index 0000000..35adde2
--- /dev/null
+++ b/markdown_it/rules_block/reference.py
@@ -0,0 +1,218 @@
+import logging
+
+from ..common.utils import charCodeAt, isSpace, normalizeReference
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def reference(state: StateBlock, startLine, _endLine, silent):
+
+    LOGGER.debug(
+        "entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
+    )
+
+    lines = 0
+    pos = state.bMarks[startLine] + state.tShift[startLine]
+    maximum = state.eMarks[startLine]
+    nextLine = startLine + 1
+
+    # if it's indented more than 3 spaces, it should be a code block
+    if state.sCount[startLine] - state.blkIndent >= 4:
+        return False
+
+    if state.srcCharCode[pos] != 0x5B:  # /* [ */
+        return False
+
+    # Simple check to quickly interrupt scan on [link](url) at the start of line.
+    # Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54
+    while pos < maximum:
+        # /* ] */  /* \ */  /* : */
+        if state.srcCharCode[pos] == 0x5D and state.srcCharCode[pos - 1] != 0x5C:
+            if pos + 1 == maximum:
+                return False
+            if state.srcCharCode[pos + 1] != 0x3A:
+                return False
+            break
+        pos += 1
+
+    endLine = state.lineMax
+
+    # jump line-by-line until empty one or EOF
+    terminatorRules = state.md.block.ruler.getRules("reference")
+
+    oldParentType = state.parentType
+    state.parentType = "reference"
+
+    while nextLine < endLine and not state.isEmpty(nextLine):
+        # this would be a code block normally, but after paragraph
+        # it's considered a lazy continuation regardless of what's there
+        if state.sCount[nextLine] - state.blkIndent > 3:
+            nextLine += 1
+            continue
+
+        # quirk for blockquotes, this line should already be checked by that rule
+        if state.sCount[nextLine] < 0:
+            nextLine += 1
+            continue
+
+        # Some tags can terminate paragraph without empty line.
+        terminate = False
+        for terminatorRule in terminatorRules:
+            if terminatorRule(state, nextLine, endLine, True):
+                terminate = True
+                break
+
+        if terminate:
+            break
+
+        nextLine += 1
+
+    string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
+    maximum = len(string)
+
+    labelEnd = None
+    pos = 1
+    while pos < maximum:
+        ch = charCodeAt(string, pos)
+        if ch == 0x5B:  # /* [ */
+            return False
+        elif ch == 0x5D:  # /* ] */
+            labelEnd = pos
+            break
+        elif ch == 0x0A:  # /* \n */
+            lines += 1
+        elif ch == 0x5C:  # /* \ */
+            pos += 1
+            if pos < maximum and charCodeAt(string, pos) == 0x0A:
+                lines += 1
+        pos += 1
+
+    if (
+        labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A
+    ):  # /* : */
+        return False
+
+    # [label]:   destination   'title'
+    #         ^^^ skip optional whitespace here
+    pos = labelEnd + 2
+    while pos < maximum:
+        ch = charCodeAt(string, pos)
+        if ch == 0x0A:
+            lines += 1
+        elif isSpace(ch):
+            pass
+        else:
+            break
+        pos += 1
+
+    # [label]:   destination   'title'
+    #            ^^^^^^^^^^^ parse this
+    res = state.md.helpers.parseLinkDestination(string, pos, maximum)
+    if not res.ok:
+        return False
+
+    href = state.md.normalizeLink(res.str)
+    if not state.md.validateLink(href):
+        return False
+
+    pos = res.pos
+    lines += res.lines
+
+    # save cursor state, we could require to rollback later
+    destEndPos = pos
+    destEndLineNo = lines
+
+    # [label]:   destination   'title'
+    #                       ^^^ skipping those spaces
+    start = pos
+    while pos < maximum:
+        ch = charCodeAt(string, pos)
+        if ch == 0x0A:
+            lines += 1
+        elif isSpace(ch):
+            pass
+        else:
+            break
+        pos += 1
+
+    # [label]:   destination   'title'
+    #                          ^^^^^^^ parse this
+    res = state.md.helpers.parseLinkTitle(string, pos, maximum)
+    if pos < maximum and start != pos and res.ok:
+        title = res.str
+        pos = res.pos
+        lines += res.lines
+    else:
+        title = ""
+        pos = destEndPos
+        lines = destEndLineNo
+
+    # skip trailing spaces until the rest of the line
+    while pos < maximum:
+        ch = charCodeAt(string, pos)
+        if not isSpace(ch):
+            break
+        pos += 1
+
+    if pos < maximum and charCodeAt(string, pos) != 0x0A:
+        if title:
+            # garbage at the end of the line after title,
+            # but it could still be a valid reference if we roll back
+            title = ""
+            pos = destEndPos
+            lines = destEndLineNo
+            while pos < maximum:
+                ch = charCodeAt(string, pos)
+                if not isSpace(ch):
+                    break
+                pos += 1
+
+    if pos < maximum and charCodeAt(string, pos) != 0x0A:
+        # garbage at the end of the line
+        return False
+
+    label = normalizeReference(string[1:labelEnd])
+    if not label:
+        # CommonMark 0.20 disallows empty labels
+        return False
+
+    # Reference can not terminate anything. This check is for safety only.
+    if silent:
+        return True
+
+    if "references" not in state.env:
+        state.env["references"] = {}
+
+    state.line = startLine + lines + 1
+
+    # note, this is not part of markdown-it JS, but is useful for renderers
+    if state.md.options.get("inline_definitions", False):
+        token = state.push("definition", "", 0)
+        token.meta = {
+            "id": label,
+            "title": title,
+            "url": href,
+            "label": string[1:labelEnd],
+        }
+        token.map = [startLine, state.line]
+
+    if label not in state.env["references"]:
+        state.env["references"][label] = {
+            "title": title,
+            "href": href,
+            "map": [startLine, state.line],
+        }
+    else:
+        state.env.setdefault("duplicate_refs", []).append(
+            {
+                "title": title,
+                "href": href,
+                "label": label,
+                "map": [startLine, state.line],
+            }
+        )
+
+    state.parentType = oldParentType
+
+    return True
diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py
new file mode 100644
index 0000000..42b8fce
--- /dev/null
+++ b/markdown_it/rules_block/state_block.py
@@ -0,0 +1,230 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from ..common.utils import isSpace
+from ..ruler import StateBase
+from ..token import Token
+
+if TYPE_CHECKING:
+    from markdown_it.main import MarkdownIt
+
+
+class StateBlock(StateBase):
+    def __init__(
+        self,
+        src: str,
+        md: MarkdownIt,
+        env,
+        tokens: list[Token],
+        srcCharCode: tuple[int, ...] | None = None,
+    ):
+
+        if srcCharCode is not None:
+            self._src = src
+            self.srcCharCode = srcCharCode
+        else:
+            self.src = src
+
+        # link to parser instance
+        self.md = md
+
+        self.env = env
+
+        #
+        # Internal state variables
+        #
+
+        self.tokens = tokens
+
+        self.bMarks = []  # line begin offsets for fast jumps
+        self.eMarks = []  # line end offsets for fast jumps
+        # offsets of the first non-space characters (tabs not expanded)
+        self.tShift = []
+        self.sCount = []  # indents for each line (tabs expanded)
+
+        # An amount of virtual spaces (tabs expanded) between beginning
+        # of each line (bMarks) and real beginning of that line.
+        #
+        # It exists only as a hack because blockquotes override bMarks
+        # losing information in the process.
+        #
+        # It's used only when expanding tabs, you can think about it as
+        # an initial tab length, e.g. bsCount=21 applied to string `\t123`
+        # means first tab should be expanded to 4-21%4 === 3 spaces.
+        #
+        self.bsCount = []
+
+        # block parser variables
+        self.blkIndent = 0  # required block content indent (for example, if we are
+        # inside a list, it would be positioned after list marker)
+        self.line = 0  # line index in src
+        self.lineMax = 0  # lines count
+        self.tight = False  # loose/tight mode for lists
+        self.ddIndent = -1  # indent of the current dd block (-1 if there isn't any)
+        self.listIndent = -1  # indent of the current list block (-1 if there isn't any)
+
+        # can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
+        # used in lists to determine if they interrupt a paragraph
+        self.parentType = "root"
+
+        self.level = 0
+
+        # renderer
+        self.result = ""
+
+        # Create caches
+        # Generate markers.
+        indent_found = False
+
+        start = pos = indent = offset = 0
+        length = len(self.src)
+
+        for pos, character in enumerate(self.srcCharCode):
+            if not indent_found:
+                if isSpace(character):
+                    indent += 1
+
+                    if character == 0x09:
+                        offset += 4 - offset % 4
+                    else:
+                        offset += 1
+                    continue
+                else:
+                    indent_found = True
+
+            if character == 0x0A or pos == length - 1:
+                if character != 0x0A:
+                    pos += 1
+                self.bMarks.append(start)
+                self.eMarks.append(pos)
+                self.tShift.append(indent)
+                self.sCount.append(offset)
+                self.bsCount.append(0)
+
+                indent_found = False
+                indent = 0
+                offset = 0
+                start = pos + 1
+
+        # Push fake entry to simplify cache bounds checks
+        self.bMarks.append(length)
+        self.eMarks.append(length)
+        self.tShift.append(0)
+        self.sCount.append(0)
+        self.bsCount.append(0)
+
+        self.lineMax = len(self.bMarks) - 1  # don't count last fake line
+
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}"
+            f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
+        )
+
+    def push(self, ttype: str, tag: str, nesting: int) -> Token:
+        """Push new token to "stream"."""
+        token = Token(ttype, tag, nesting)
+        token.block = True
+        if nesting < 0:
+            self.level -= 1  # closing tag
+        token.level = self.level
+        if nesting > 0:
+            self.level += 1  # opening tag
+        self.tokens.append(token)
+        return token
+
+    def isEmpty(self, line: int) -> bool:
+        """."""
+        return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
+
+    def skipEmptyLines(self, from_pos: int) -> int:
+        """."""
+        while from_pos < self.lineMax:
+            try:
+                if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
+                    from_pos
+                ]:
+                    break
+            except IndexError:
+                pass
+            from_pos += 1
+        return from_pos
+
+    def skipSpaces(self, pos: int) -> int:
+        """Skip spaces from given position."""
+        while pos < len(self.src):
+            if not isSpace(self.srcCharCode[pos]):
+                break
+            pos += 1
+        return pos
+
+    def skipSpacesBack(self, pos: int, minimum: int) -> int:
+        """Skip spaces from given position in reverse."""
+        if pos <= minimum:
+            return pos
+        while pos > minimum:
+            pos -= 1
+            if not isSpace(self.srcCharCode[pos]):
+                return pos + 1
+        return pos
+
+    def skipChars(self, pos: int, code: int) -> int:
+        """Skip char codes from given position."""
+        while pos < len(self.src):
+            if self.srcCharCode[pos] != code:
+                break
+            pos += 1
+        return pos
+
+    def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
+        """Skip char codes reverse from given position - 1."""
+        if pos <= minimum:
+            return pos
+        while pos > minimum:
+            pos -= 1
+            if code != self.srcCharCode[pos]:
+                return pos + 1
+        return pos
+
+    def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
+        """Cut lines range from source."""
+        line = begin
+        if begin >= end:
+            return ""
+
+        queue = [""] * (end - begin)
+
+        i = 1
+        while line < end:
+            lineIndent = 0
+            lineStart = first = self.bMarks[line]
+            if line + 1 < end or keepLastLF:
+                last = self.eMarks[line] + 1
+            else:
+                last = self.eMarks[line]
+
+            while (first < last) and (lineIndent < indent):
+                ch = self.srcCharCode[first]
+                if isSpace(ch):
+                    if ch == 0x09:
+                        lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
+                    else:
+                        lineIndent += 1
+                elif first - lineStart < self.tShift[line]:
+                    lineIndent += 1
+                else:
+                    break
+                first += 1
+
+            if lineIndent > indent:
+                # partially expanding tabs in code blocks, e.g '\t\tfoobar'
+                # with indent=2 becomes '  \tfoobar'
+                queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
+            else:
+                queue[i - 1] = self.src[first:last]
+
+            line += 1
+            i += 1
+
+        return "".join(queue)
diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py
new file mode 100644
index 0000000..e3db858
--- /dev/null
+++ b/markdown_it/rules_block/table.py
@@ -0,0 +1,238 @@
+# GFM table, https://github.github.com/gfm/#tables-extension-
+import re
+
+from ..common.utils import charCodeAt, isSpace
+from .state_block import StateBlock
+
+headerLineRe = re.compile(r"^:?-+:?$")
+enclosingPipesRe = re.compile(r"^\||\|$")
+
+
+def getLine(state: StateBlock, line: int):
+    pos = state.bMarks[line] + state.tShift[line]
+    maximum = state.eMarks[line]
+
+    # return state.src.substr(pos, max - pos)
+    return state.src[pos:maximum]
+
+
+def escapedSplit(string):
+    result = []
+    pos = 0
+    max = len(string)
+    isEscaped = False
+    lastPos = 0
+    current = ""
+    ch = charCodeAt(string, pos)
+
+    while pos < max:
+        if ch == 0x7C:  # /* | */
+            if not isEscaped:
+                # pipe separating cells, '|'
+                result.append(current + string[lastPos:pos])
+                current = ""
+                lastPos = pos + 1
+            else:
+                # escaped pipe, '\|'
+                current += string[lastPos : pos - 1]
+                lastPos = pos
+
+        isEscaped = ch == 0x5C  # /* \ */
+        pos += 1
+
+        ch = charCodeAt(string, pos)
+
+    result.append(current + string[lastPos:])
+
+    return result
+
+
+def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
+    tbodyLines = None
+
+    # should have at least two lines
+    if startLine + 2 > endLine:
+        return False
+
+    nextLine = startLine + 1
+
+    if state.sCount[nextLine] < state.blkIndent:
+        return False
+
+    # if it's indented more than 3 spaces, it should be a code block
+    if state.sCount[nextLine] - state.blkIndent >= 4:
+        return False
+
+    # first character of the second line should be '|', '-', ':',
+    # and no other characters are allowed but spaces;
+    # basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
+
+    pos = state.bMarks[nextLine] + state.tShift[nextLine]
+    if pos >= state.eMarks[nextLine]:
+        return False
+    first_ch = state.srcCharCode[pos]
+    pos += 1
+    if first_ch not in {0x7C, 0x2D, 0x3A}:  # not in {"|", "-", ":"}
+        return False
+
+    if pos >= state.eMarks[nextLine]:
+        return False
+    second_ch = state.srcCharCode[pos]
+    pos += 1
+    # not in {"|", "-", ":"} and not space
+    if second_ch not in {0x7C, 0x2D, 0x3A} and not isSpace(second_ch):
+        return False
+
+    # if first character is '-', then second character must not be a space
+    # (due to parsing ambiguity with list)
+    if first_ch == 0x2D and isSpace(second_ch):
+        return False
+
+    while pos < state.eMarks[nextLine]:
+        ch = state.srcCharCode[pos]
+
+        # /* | */  /* - */ /* : */
+        if ch not in {0x7C, 0x2D, 0x3A} and not isSpace(ch):
+            return False
+
+        pos += 1
+
+    lineText = getLine(state, startLine + 1)
+
+    columns = lineText.split("|")
+    aligns = []
+    for i in range(len(columns)):
+        t = columns[i].strip()
+        if not t:
+            # allow empty columns before and after table, but not in between columns;
+            # e.g. allow ` |---| `, disallow ` ---||--- `
+            if i == 0 or i == len(columns) - 1:
+                continue
+            else:
+                return False
+
+        if not headerLineRe.search(t):
+            return False
+        if charCodeAt(t, len(t) - 1) == 0x3A:  # /* : */
+            # /* : */
+            aligns.append("center" if charCodeAt(t, 0) == 0x3A else "right")
+        elif charCodeAt(t, 0) == 0x3A:  # /* : */
+            aligns.append("left")
+        else:
+            aligns.append("")
+
+    lineText = getLine(state, startLine).strip()
+    if "|" not in lineText:
+        return False
+    if state.sCount[startLine] - state.blkIndent >= 4:
+        return False
+    columns = escapedSplit(lineText)
+    if columns and columns[0] == "":
+        columns.pop(0)
+    if columns and columns[-1] == "":
+        columns.pop()
+
+    # header row will define an amount of columns in the entire table,
+    # and align row should be exactly the same (the rest of the rows can differ)
+    columnCount = len(columns)
+    if columnCount == 0 or columnCount != len(aligns):
+        return False
+
+    if silent:
+        return True
+
+    oldParentType = state.parentType
+    state.parentType = "table"
+
+    # use 'blockquote' lists for termination because it's
+    # the most similar to tables
+    terminatorRules = state.md.block.ruler.getRules("blockquote")
+
+    token = state.push("table_open", "table", 1)
+    token.map = tableLines = [startLine, 0]
+
+    token = state.push("thead_open", "thead", 1)
+    token.map = [startLine, startLine + 1]
+
+    token = state.push("tr_open", "tr", 1)
+    token.map = [startLine, startLine + 1]
+
+    for i in range(len(columns)):
+        token = state.push("th_open", "th", 1)
+        if aligns[i]:
+            token.attrs = {"style": "text-align:" + aligns[i]}
+
+        token = state.push("inline", "", 0)
+        # note in markdown-it this map was removed in v12.0.0 however, we keep it,
+        # since it is helpful to propagate to children tokens
+        token.map = [startLine, startLine + 1]
+        token.content = columns[i].strip()
+        token.children = []
+
+        token = state.push("th_close", "th", -1)
+
+    token = state.push("tr_close", "tr", -1)
+    token = state.push("thead_close", "thead", -1)
+
+    nextLine = startLine + 2
+    while nextLine < endLine:
+        if state.sCount[nextLine] < state.blkIndent:
+            break
+
+        terminate = False
+        for i in range(len(terminatorRules)):
+            if terminatorRules[i](state, nextLine, endLine, True):
+                terminate = True
+                break
+
+        if terminate:
+            break
+        lineText = getLine(state, nextLine).strip()
+        if not lineText:
+            break
+        if state.sCount[nextLine] - state.blkIndent >= 4:
+            break
+        columns = escapedSplit(lineText)
+        if columns and columns[0] == "":
+            columns.pop(0)
+        if columns and columns[-1] == "":
+            columns.pop()
+
+        if nextLine == startLine + 2:
+            token = state.push("tbody_open", "tbody", 1)
+            token.map = tbodyLines = [startLine + 2, 0]
+
+        token = state.push("tr_open", "tr", 1)
+        token.map = [nextLine, nextLine + 1]
+
+        for i in range(columnCount):
+            token = state.push("td_open", "td", 1)
+            if aligns[i]:
+                token.attrs = {"style": "text-align:" + aligns[i]}
+
+            token = state.push("inline", "", 0)
+            # note in markdown-it this map was removed in v12.0.0 however, we keep it,
+            # since it is helpful to propagate to children tokens
+            token.map = [nextLine, nextLine + 1]
+            try:
+                token.content = columns[i].strip() if columns[i] else ""
+            except IndexError:
+                token.content = ""
+            token.children = []
+
+            token = state.push("td_close", "td", -1)
+
+        token = state.push("tr_close", "tr", -1)
+
+        nextLine += 1
+
+    if tbodyLines:
+        token = state.push("tbody_close", "tbody", -1)
+        tbodyLines[1] = nextLine
+
+    token = state.push("table_close", "table", -1)
+
+    tableLines[1] = nextLine
+    state.parentType = oldParentType
+    state.line = nextLine
+    return True
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-29 04:24:24 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-29 04:24:24 +0000
commit	12e8343068b906f8b2afddc5569968a8a91fa5b0 (patch)
tree	75cc5e05a4392ea0292251898f992a15a16b172b /markdown_it/rules_block
parent	Initial commit. (diff)
download	markdown-it-py-ef6b3991640e41f44752cdb6502719ca58a762c8.tar.xz markdown-it-py-ef6b3991640e41f44752cdb6502719ca58a762c8.zip