summaryrefslogtreecommitdiffstats
path: root/markdown_it/rules_block
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:24:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:24:24 +0000
commit12e8343068b906f8b2afddc5569968a8a91fa5b0 (patch)
tree75cc5e05a4392ea0292251898f992a15a16b172b /markdown_it/rules_block
parentInitial commit. (diff)
downloadmarkdown-it-py-ef6b3991640e41f44752cdb6502719ca58a762c8.tar.xz
markdown-it-py-ef6b3991640e41f44752cdb6502719ca58a762c8.zip
Adding upstream version 2.1.0.upstream/2.1.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'markdown_it/rules_block')
-rw-r--r--markdown_it/rules_block/__init__.py27
-rw-r--r--markdown_it/rules_block/blockquote.py299
-rw-r--r--markdown_it/rules_block/code.py36
-rw-r--r--markdown_it/rules_block/fence.py104
-rw-r--r--markdown_it/rules_block/heading.py72
-rw-r--r--markdown_it/rules_block/hr.py54
-rw-r--r--markdown_it/rules_block/html_block.py91
-rw-r--r--markdown_it/rules_block/lheading.py90
-rw-r--r--markdown_it/rules_block/list.py344
-rw-r--r--markdown_it/rules_block/paragraph.py67
-rw-r--r--markdown_it/rules_block/reference.py218
-rw-r--r--markdown_it/rules_block/state_block.py230
-rw-r--r--markdown_it/rules_block/table.py238
13 files changed, 1870 insertions, 0 deletions
diff --git a/markdown_it/rules_block/__init__.py b/markdown_it/rules_block/__init__.py
new file mode 100644
index 0000000..bcf138d
--- /dev/null
+++ b/markdown_it/rules_block/__init__.py
@@ -0,0 +1,27 @@
+__all__ = (
+ "StateBlock",
+ "paragraph",
+ "heading",
+ "lheading",
+ "code",
+ "fence",
+ "hr",
+ "list_block",
+ "reference",
+ "blockquote",
+ "html_block",
+ "table",
+)
+
+from .blockquote import blockquote
+from .code import code
+from .fence import fence
+from .heading import heading
+from .hr import hr
+from .html_block import html_block
+from .lheading import lheading
+from .list import list_block
+from .paragraph import paragraph
+from .reference import reference
+from .state_block import StateBlock
+from .table import table
diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py
new file mode 100644
index 0000000..6575731
--- /dev/null
+++ b/markdown_it/rules_block/blockquote.py
@@ -0,0 +1,299 @@
+# Block quotes
+from __future__ import annotations
+
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug(
+ "entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent
+ )
+
+ oldLineMax = state.lineMax
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ max = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if (state.sCount[startLine] - state.blkIndent) >= 4:
+ return False
+
+ # check the block quote marker
+ if state.srcCharCode[pos] != 0x3E: # /* > */
+ return False
+ pos += 1
+
+ # we know that it's going to be a valid blockquote,
+ # so no point trying to find the end of it in silent mode
+ if silent:
+ return True
+
+ # set offset past spaces and ">"
+ initial = offset = state.sCount[startLine] + 1
+
+ try:
+ second_char_code: int | None = state.srcCharCode[pos]
+ except IndexError:
+ second_char_code = None
+
+ # skip one optional space after '>'
+ if second_char_code == 0x20: # /* space */
+ # ' > test '
+ # ^ -- position start of line here:
+ pos += 1
+ initial += 1
+ offset += 1
+ adjustTab = False
+ spaceAfterMarker = True
+ elif second_char_code == 0x09: # /* tab */
+ spaceAfterMarker = True
+
+ if (state.bsCount[startLine] + offset) % 4 == 3:
+ # ' >\t test '
+ # ^ -- position start of line here (tab has width==1)
+ pos += 1
+ initial += 1
+ offset += 1
+ adjustTab = False
+ else:
+ # ' >\t test '
+ # ^ -- position start of line here + shift bsCount slightly
+ # to make extra space appear
+ adjustTab = True
+
+ else:
+ spaceAfterMarker = False
+
+ oldBMarks = [state.bMarks[startLine]]
+ state.bMarks[startLine] = pos
+
+ while pos < max:
+ ch = state.srcCharCode[pos]
+
+ if isSpace(ch):
+ if ch == 0x09: # / tab /
+ offset += (
+ 4
+ - (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
+ )
+ else:
+ offset += 1
+
+ else:
+ break
+
+ pos += 1
+
+ oldBSCount = [state.bsCount[startLine]]
+ state.bsCount[startLine] = (
+ state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0)
+ )
+
+ lastLineEmpty = pos >= max
+
+ oldSCount = [state.sCount[startLine]]
+ state.sCount[startLine] = offset - initial
+
+ oldTShift = [state.tShift[startLine]]
+ state.tShift[startLine] = pos - state.bMarks[startLine]
+
+ terminatorRules = state.md.block.ruler.getRules("blockquote")
+
+ oldParentType = state.parentType
+ state.parentType = "blockquote"
+
+ # Search the end of the block
+ #
+ # Block ends with either:
+ # 1. an empty line outside:
+ # ```
+ # > test
+ #
+ # ```
+ # 2. an empty line inside:
+ # ```
+ # >
+ # test
+ # ```
+ # 3. another tag:
+ # ```
+ # > test
+ # - - -
+ # ```
+
+ # for (nextLine = startLine + 1; nextLine < endLine; nextLine++) {
+ nextLine = startLine + 1
+ while nextLine < endLine:
+
+ # check if it's outdented, i.e. it's inside list item and indented
+ # less than said list item:
+ #
+ # ```
+ # 1. anything
+ # > current blockquote
+ # 2. checking this line
+ # ```
+ isOutdented = state.sCount[nextLine] < state.blkIndent
+
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ max = state.eMarks[nextLine]
+
+ if pos >= max:
+ # Case 1: line is not inside the blockquote, and this line is empty.
+ break
+
+ evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */
+ pos += 1
+ if evaluatesTrue:
+ # This line is inside the blockquote.
+
+ # set offset past spaces and ">"
+ initial = offset = state.sCount[nextLine] + 1
+
+ try:
+ next_char: int | None = state.srcCharCode[pos]
+ except IndexError:
+ next_char = None
+
+ # skip one optional space after '>'
+ if next_char == 0x20: # /* space */
+ # ' > test '
+ # ^ -- position start of line here:
+ pos += 1
+ initial += 1
+ offset += 1
+ adjustTab = False
+ spaceAfterMarker = True
+ elif next_char == 0x09: # /* tab */
+ spaceAfterMarker = True
+
+ if (state.bsCount[nextLine] + offset) % 4 == 3:
+ # ' >\t test '
+ # ^ -- position start of line here (tab has width==1)
+ pos += 1
+ initial += 1
+ offset += 1
+ adjustTab = False
+ else:
+ # ' >\t test '
+ # ^ -- position start of line here + shift bsCount slightly
+ # to make extra space appear
+ adjustTab = True
+
+ else:
+ spaceAfterMarker = False
+
+ oldBMarks.append(state.bMarks[nextLine])
+ state.bMarks[nextLine] = pos
+
+ while pos < max:
+ ch = state.srcCharCode[pos]
+
+ if isSpace(ch):
+ if ch == 0x09:
+ offset += (
+ 4
+ - (
+ offset
+ + state.bsCount[nextLine]
+ + (1 if adjustTab else 0)
+ )
+ % 4
+ )
+ else:
+ offset += 1
+ else:
+ break
+
+ pos += 1
+
+ lastLineEmpty = pos >= max
+
+ oldBSCount.append(state.bsCount[nextLine])
+ state.bsCount[nextLine] = (
+ state.sCount[nextLine] + 1 + (1 if spaceAfterMarker else 0)
+ )
+
+ oldSCount.append(state.sCount[nextLine])
+ state.sCount[nextLine] = offset - initial
+
+ oldTShift.append(state.tShift[nextLine])
+ state.tShift[nextLine] = pos - state.bMarks[nextLine]
+
+ nextLine += 1
+ continue
+
+ # Case 2: line is not inside the blockquote, and the last line was empty.
+ if lastLineEmpty:
+ break
+
+ # Case 3: another tag found.
+ terminate = False
+
+ for terminatorRule in terminatorRules:
+ if terminatorRule(state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ # Quirk to enforce "hard termination mode" for paragraphs;
+ # normally if you call `tokenize(state, startLine, nextLine)`,
+ # paragraphs will look below nextLine for paragraph continuation,
+ # but if blockquote is terminated by another tag, they shouldn't
+ state.lineMax = nextLine
+
+ if state.blkIndent != 0:
+ # state.blkIndent was non-zero, we now set it to zero,
+ # so we need to re-calculate all offsets to appear as
+ # if indent wasn't changed
+ oldBMarks.append(state.bMarks[nextLine])
+ oldBSCount.append(state.bsCount[nextLine])
+ oldTShift.append(state.tShift[nextLine])
+ oldSCount.append(state.sCount[nextLine])
+ state.sCount[nextLine] -= state.blkIndent
+
+ break
+
+ oldBMarks.append(state.bMarks[nextLine])
+ oldBSCount.append(state.bsCount[nextLine])
+ oldTShift.append(state.tShift[nextLine])
+ oldSCount.append(state.sCount[nextLine])
+
+ # A negative indentation means that this is a paragraph continuation
+ #
+ state.sCount[nextLine] = -1
+
+ nextLine += 1
+
+ oldIndent = state.blkIndent
+ state.blkIndent = 0
+
+ token = state.push("blockquote_open", "blockquote", 1)
+ token.markup = ">"
+ token.map = lines = [startLine, 0]
+
+ state.md.block.tokenize(state, startLine, nextLine)
+
+ token = state.push("blockquote_close", "blockquote", -1)
+ token.markup = ">"
+
+ state.lineMax = oldLineMax
+ state.parentType = oldParentType
+ lines[1] = state.line
+
+ # Restore original tShift; this might not be necessary since the parser
+ # has already been here, but just to make sure we can do that.
+ for i, item in enumerate(oldTShift):
+ state.bMarks[i + startLine] = oldBMarks[i]
+ state.tShift[i + startLine] = item
+ state.sCount[i + startLine] = oldSCount[i]
+ state.bsCount[i + startLine] = oldBSCount[i]
+
+ state.blkIndent = oldIndent
+
+ return True
diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py
new file mode 100644
index 0000000..c4fdba3
--- /dev/null
+++ b/markdown_it/rules_block/code.py
@@ -0,0 +1,36 @@
+"""Code block (4 spaces padded)."""
+import logging
+
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def code(state: StateBlock, startLine: int, endLine: int, silent: bool = False):
+
+ LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ if state.sCount[startLine] - state.blkIndent < 4:
+ return False
+
+ last = nextLine = startLine + 1
+
+ while nextLine < endLine:
+ if state.isEmpty(nextLine):
+ nextLine += 1
+ continue
+
+ if state.sCount[nextLine] - state.blkIndent >= 4:
+ nextLine += 1
+ last = nextLine
+ continue
+
+ break
+
+ state.line = last
+
+ token = state.push("code_block", "code", 0)
+ token.content = state.getLines(startLine, last, 4 + state.blkIndent, False) + "\n"
+ token.map = [startLine, state.line]
+
+ return True
diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py
new file mode 100644
index 0000000..c4f5275
--- /dev/null
+++ b/markdown_it/rules_block/fence.py
@@ -0,0 +1,104 @@
+# fences (``` lang, ~~~ lang)
+import logging
+
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ haveEndMarker = False
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ if pos + 3 > maximum:
+ return False
+
+ marker = state.srcCharCode[pos]
+
+ # /* ~ */ /* ` */
+ if marker != 0x7E and marker != 0x60:
+ return False
+
+ # scan marker length
+ mem = pos
+ pos = state.skipChars(pos, marker)
+
+ length = pos - mem
+
+ if length < 3:
+ return False
+
+ markup = state.src[mem:pos]
+ params = state.src[pos:maximum]
+
+ # /* ` */
+ if marker == 0x60:
+ if chr(marker) in params:
+ return False
+
+ # Since start is found, we can report success here in validation mode
+ if silent:
+ return True
+
+ # search end of block
+ nextLine = startLine
+
+ while True:
+ nextLine += 1
+ if nextLine >= endLine:
+ # unclosed block should be autoclosed by end of document.
+ # also block seems to be autoclosed by end of parent
+ break
+
+ pos = mem = state.bMarks[nextLine] + state.tShift[nextLine]
+ maximum = state.eMarks[nextLine]
+
+ if pos < maximum and state.sCount[nextLine] < state.blkIndent:
+ # non-empty line with negative indent should stop the list:
+ # - ```
+ # test
+ break
+
+ if state.srcCharCode[pos] != marker:
+ continue
+
+ if state.sCount[nextLine] - state.blkIndent >= 4:
+ # closing fence should be indented less than 4 spaces
+ continue
+
+ pos = state.skipChars(pos, marker)
+
+ # closing code fence must be at least as long as the opening one
+ if pos - mem < length:
+ continue
+
+ # make sure tail has spaces only
+ pos = state.skipSpaces(pos)
+
+ if pos < maximum:
+ continue
+
+ haveEndMarker = True
+ # found!
+ break
+
+ # If a fence has heading spaces, they should be removed from its inner block
+ length = state.sCount[startLine]
+
+ state.line = nextLine + (1 if haveEndMarker else 0)
+
+ token = state.push("fence", "code", 0)
+ token.info = params
+ token.content = state.getLines(startLine + 1, nextLine, length, True)
+ token.markup = markup
+ token.map = [startLine, state.line]
+
+ return True
diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py
new file mode 100644
index 0000000..8d4ef3e
--- /dev/null
+++ b/markdown_it/rules_block/heading.py
@@ -0,0 +1,72 @@
+""" Atex heading (#, ##, ...) """
+from __future__ import annotations
+
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ ch: int | None = state.srcCharCode[pos]
+
+ # /* # */
+ if ch != 0x23 or pos >= maximum:
+ return False
+
+ # count heading level
+ level = 1
+ pos += 1
+ try:
+ ch = state.srcCharCode[pos]
+ except IndexError:
+ ch = None
+ # /* # */
+ while ch == 0x23 and pos < maximum and level <= 6:
+ level += 1
+ pos += 1
+ try:
+ ch = state.srcCharCode[pos]
+ except IndexError:
+ ch = None
+
+ if level > 6 or (pos < maximum and not isSpace(ch)):
+ return False
+
+ if silent:
+ return True
+
+ # Let's cut tails like ' ### ' from the end of string
+
+ maximum = state.skipSpacesBack(maximum, pos)
+ tmp = state.skipCharsBack(maximum, 0x23, pos) # #
+ if tmp > pos and isSpace(state.srcCharCode[tmp - 1]):
+ maximum = tmp
+
+ state.line = startLine + 1
+
+ token = state.push("heading_open", "h" + str(level), 1)
+ token.markup = "########"[:level]
+ token.map = [startLine, state.line]
+
+ token = state.push("inline", "", 0)
+ token.content = state.src[pos:maximum].strip()
+ token.map = [startLine, state.line]
+ token.children = []
+
+ token = state.push("heading_close", "h" + str(level), -1)
+ token.markup = "########"[:level]
+
+ return True
diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py
new file mode 100644
index 0000000..804cd9d
--- /dev/null
+++ b/markdown_it/rules_block/hr.py
@@ -0,0 +1,54 @@
+"""Horizontal rule
+
+At least 3 of these characters on a line * - _
+"""
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def hr(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ marker = state.srcCharCode[pos]
+ pos += 1
+
+ # Check hr marker: /* * */ /* - */ /* _ */
+ if marker != 0x2A and marker != 0x2D and marker != 0x5F:
+ return False
+
+ # markers can be mixed with spaces, but there should be at least 3 of them
+
+ cnt = 1
+ while pos < maximum:
+ ch = state.srcCharCode[pos]
+ pos += 1
+ if ch != marker and not isSpace(ch):
+ return False
+ if ch == marker:
+ cnt += 1
+
+ if cnt < 3:
+ return False
+
+ if silent:
+ return True
+
+ state.line = startLine + 1
+
+ token = state.push("hr", "hr", 0)
+ token.map = [startLine, state.line]
+ token.markup = chr(marker) * (cnt + 1)
+
+ return True
diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py
new file mode 100644
index 0000000..31afab7
--- /dev/null
+++ b/markdown_it/rules_block/html_block.py
@@ -0,0 +1,91 @@
+# HTML block
+from __future__ import annotations
+
+import logging
+import re
+
+from ..common.html_blocks import block_names
+from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+# An array of opening and corresponding closing sequences for html tags,
+# last argument defines whether it can terminate a paragraph or not
+HTML_SEQUENCES: list[tuple[re.Pattern, re.Pattern, bool]] = [
+ (
+ re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
+ re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
+ True,
+ ),
+ (re.compile(r"^<!--"), re.compile(r"-->"), True),
+ (re.compile(r"^<\?"), re.compile(r"\?>"), True),
+ (re.compile(r"^<![A-Z]"), re.compile(r">"), True),
+ (re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
+ (
+ re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
+ re.compile(r"^$"),
+ True,
+ ),
+ (re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
+]
+
+
+def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
+ LOGGER.debug(
+ "entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
+ )
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ if not state.md.options.get("html", None):
+ return False
+
+ if state.srcCharCode[pos] != 0x3C: # /* < */
+ return False
+
+ lineText = state.src[pos:maximum]
+
+ html_seq = None
+ for HTML_SEQUENCE in HTML_SEQUENCES:
+ if HTML_SEQUENCE[0].search(lineText):
+ html_seq = HTML_SEQUENCE
+ break
+
+ if not html_seq:
+ return False
+
+ if silent:
+ # true if this sequence can be a terminator, false otherwise
+ return html_seq[2]
+
+ nextLine = startLine + 1
+
+ # If we are here - we detected HTML block.
+ # Let's roll down till block end.
+ if not html_seq[1].search(lineText):
+ while nextLine < endLine:
+ if state.sCount[nextLine] < state.blkIndent:
+ break
+
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ maximum = state.eMarks[nextLine]
+ lineText = state.src[pos:maximum]
+
+ if html_seq[1].search(lineText):
+ if len(lineText) != 0:
+ nextLine += 1
+ break
+ nextLine += 1
+
+ state.line = nextLine
+
+ token = state.push("html_block", "", 0)
+ token.map = [startLine, nextLine]
+ token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
+
+ return True
diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py
new file mode 100644
index 0000000..f26e2af
--- /dev/null
+++ b/markdown_it/rules_block/lheading.py
@@ -0,0 +1,90 @@
+# lheading (---, ==)
+import logging
+
+from ..ruler import Ruler
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ level = None
+ nextLine = startLine + 1
+ ruler: Ruler = state.md.block.ruler
+ terminatorRules = ruler.getRules("paragraph")
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ oldParentType = state.parentType
+ state.parentType = "paragraph" # use paragraph to match terminatorRules
+
+ # jump line-by-line until empty one or EOF
+ while nextLine < endLine and not state.isEmpty(nextLine):
+ # this would be a code block normally, but after paragraph
+ # it's considered a lazy continuation regardless of what's there
+ if state.sCount[nextLine] - state.blkIndent > 3:
+ nextLine += 1
+ continue
+
+ # Check for underline in setext header
+ if state.sCount[nextLine] >= state.blkIndent:
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ maximum = state.eMarks[nextLine]
+
+ if pos < maximum:
+ marker = state.srcCharCode[pos]
+
+ # /* - */ /* = */
+ if marker == 0x2D or marker == 0x3D:
+ pos = state.skipChars(pos, marker)
+ pos = state.skipSpaces(pos)
+
+ # /* = */
+ if pos >= maximum:
+ level = 1 if marker == 0x3D else 2
+ break
+
+ # quirk for blockquotes, this line should already be checked by that rule
+ if state.sCount[nextLine] < 0:
+ nextLine += 1
+ continue
+
+ # Some tags can terminate paragraph without empty line.
+ terminate = False
+ for terminatorRule in terminatorRules:
+ if terminatorRule(state, nextLine, endLine, True):
+ terminate = True
+ break
+ if terminate:
+ break
+
+ nextLine += 1
+
+ if not level:
+ # Didn't find valid underline
+ return False
+
+ content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
+
+ state.line = nextLine + 1
+
+ token = state.push("heading_open", "h" + str(level), 1)
+ token.markup = chr(marker)
+ token.map = [startLine, state.line]
+
+ token = state.push("inline", "", 0)
+ token.content = content
+ token.map = [startLine, state.line - 1]
+ token.children = []
+
+ token = state.push("heading_close", "h" + str(level), -1)
+ token.markup = chr(marker)
+
+ state.parentType = oldParentType
+
+ return True
diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py
new file mode 100644
index 0000000..a7617ad
--- /dev/null
+++ b/markdown_it/rules_block/list.py
@@ -0,0 +1,344 @@
+# Lists
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+# Search `[-+*][\n ]`, returns next pos after marker on success
+# or -1 on fail.
+def skipBulletListMarker(state: StateBlock, startLine: int):
+
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ marker = state.srcCharCode[pos]
+ pos += 1
+ # Check bullet /* * */ /* - */ /* + */
+ if marker != 0x2A and marker != 0x2D and marker != 0x2B:
+ return -1
+
+ if pos < maximum:
+ ch = state.srcCharCode[pos]
+
+ if not isSpace(ch):
+ # " -test " - is not a list item
+ return -1
+
+ return pos
+
+
+# Search `\d+[.)][\n ]`, returns next pos after marker on success
+# or -1 on fail.
+def skipOrderedListMarker(state: StateBlock, startLine: int):
+
+ start = state.bMarks[startLine] + state.tShift[startLine]
+ pos = start
+ maximum = state.eMarks[startLine]
+
+ # List marker should have at least 2 chars (digit + dot)
+ if pos + 1 >= maximum:
+ return -1
+
+ ch = state.srcCharCode[pos]
+ pos += 1
+
+ # /* 0 */ /* 9 */
+ if ch < 0x30 or ch > 0x39:
+ return -1
+
+ while True:
+ # EOL -> fail
+ if pos >= maximum:
+ return -1
+
+ ch = state.srcCharCode[pos]
+ pos += 1
+
+ # /* 0 */ /* 9 */
+ if ch >= 0x30 and ch <= 0x39:
+
+ # List marker should have no more than 9 digits
+ # (prevents integer overflow in browsers)
+ if pos - start >= 10:
+ return -1
+
+ continue
+
+ # found valid marker: /* ) */ /* . */
+ if ch == 0x29 or ch == 0x2E:
+ break
+
+ return -1
+
+ if pos < maximum:
+ ch = state.srcCharCode[pos]
+
+ if not isSpace(ch):
+ # " 1.test " - is not a list item
+ return -1
+
+ return pos
+
+
+def markTightParagraphs(state: StateBlock, idx: int):
+ level = state.level + 2
+
+ i = idx + 2
+ length = len(state.tokens) - 2
+ while i < length:
+ if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open":
+ state.tokens[i + 2].hidden = True
+ state.tokens[i].hidden = True
+ i += 2
+ i += 1
+
+
+def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ isTerminatingParagraph = False
+ tight = True
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ # Special case:
+ # - item 1
+ # - item 2
+ # - item 3
+ # - item 4
+ # - this one is a paragraph continuation
+ if (
+ state.listIndent >= 0
+ and state.sCount[startLine] - state.listIndent >= 4
+ and state.sCount[startLine] < state.blkIndent
+ ):
+ return False
+
+ # limit conditions when list can interrupt
+ # a paragraph (validation mode only)
+ if silent and state.parentType == "paragraph":
+ # Next list item should still terminate previous list item
+ #
+ # This code can fail if plugins use blkIndent as well as lists,
+ # but I hope the spec gets fixed long before that happens.
+ #
+ if state.tShift[startLine] >= state.blkIndent:
+ isTerminatingParagraph = True
+
+ # Detect list type and position after marker
+ posAfterMarker = skipOrderedListMarker(state, startLine)
+ if posAfterMarker >= 0:
+ isOrdered = True
+ start = state.bMarks[startLine] + state.tShift[startLine]
+ markerValue = int(state.src[start : posAfterMarker - 1])
+
+ # If we're starting a new ordered list right after
+ # a paragraph, it should start with 1.
+ if isTerminatingParagraph and markerValue != 1:
+ return False
+ else:
+ posAfterMarker = skipBulletListMarker(state, startLine)
+ if posAfterMarker >= 0:
+ isOrdered = False
+ else:
+ return False
+
+ # If we're starting a new unordered list right after
+ # a paragraph, first line should not be empty.
+ if isTerminatingParagraph:
+ if state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]:
+ return False
+
+ # We should terminate list on style change. Remember first one to compare.
+ markerCharCode = state.srcCharCode[posAfterMarker - 1]
+
+ # For validation mode we can terminate immediately
+ if silent:
+ return True
+
+ # Start list
+ listTokIdx = len(state.tokens)
+
+ if isOrdered:
+ token = state.push("ordered_list_open", "ol", 1)
+ if markerValue != 1:
+ token.attrs = {"start": markerValue}
+
+ else:
+ token = state.push("bullet_list_open", "ul", 1)
+
+ token.map = listLines = [startLine, 0]
+ token.markup = chr(markerCharCode)
+
+ #
+ # Iterate list items
+ #
+
+ nextLine = startLine
+ prevEmptyEnd = False
+ terminatorRules = state.md.block.ruler.getRules("list")
+
+ oldParentType = state.parentType
+ state.parentType = "list"
+
+ while nextLine < endLine:
+ pos = posAfterMarker
+ maximum = state.eMarks[nextLine]
+
+ initial = offset = (
+ state.sCount[nextLine]
+ + posAfterMarker
+ - (state.bMarks[startLine] + state.tShift[startLine])
+ )
+
+ while pos < maximum:
+ ch = state.srcCharCode[pos]
+
+ if ch == 0x09: # \t
+ offset += 4 - (offset + state.bsCount[nextLine]) % 4
+ elif ch == 0x20: # \s
+ offset += 1
+ else:
+ break
+
+ pos += 1
+
+ contentStart = pos
+
+ if contentStart >= maximum:
+ # trimming space in "- \n 3" case, indent is 1 here
+ indentAfterMarker = 1
+ else:
+ indentAfterMarker = offset - initial
+
+ # If we have more than 4 spaces, the indent is 1
+ # (the rest is just indented code block)
+ if indentAfterMarker > 4:
+ indentAfterMarker = 1
+
+ # " - test"
+ # ^^^^^ - calculating total length of this thing
+ indent = initial + indentAfterMarker
+
+ # Run subparser & write tokens
+ token = state.push("list_item_open", "li", 1)
+ token.markup = chr(markerCharCode)
+ token.map = itemLines = [startLine, 0]
+ if isOrdered:
+ token.info = state.src[start : posAfterMarker - 1]
+
+ # change current state, then restore it after parser subcall
+ oldTight = state.tight
+ oldTShift = state.tShift[startLine]
+ oldSCount = state.sCount[startLine]
+
+ # - example list
+ # ^ listIndent position will be here
+ # ^ blkIndent position will be here
+ #
+ oldListIndent = state.listIndent
+ state.listIndent = state.blkIndent
+ state.blkIndent = indent
+
+ state.tight = True
+ state.tShift[startLine] = contentStart - state.bMarks[startLine]
+ state.sCount[startLine] = offset
+
+ if contentStart >= maximum and state.isEmpty(startLine + 1):
+ # workaround for this case
+ # (list item is empty, list terminates before "foo"):
+ # ~~~~~~~~
+ # -
+ #
+ # foo
+ # ~~~~~~~~
+ state.line = min(state.line + 2, endLine)
+ else:
+ # NOTE in list.js this was:
+ # state.md.block.tokenize(state, startLine, endLine, True)
+ # but tokeniz does not take the final parameter
+ state.md.block.tokenize(state, startLine, endLine)
+
+ # If any of list item is tight, mark list as tight
+ if (not state.tight) or prevEmptyEnd:
+ tight = False
+
+ # Item become loose if finish with empty line,
+ # but we should filter last element, because it means list finish
+ prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1)
+
+ state.blkIndent = state.listIndent
+ state.listIndent = oldListIndent
+ state.tShift[startLine] = oldTShift
+ state.sCount[startLine] = oldSCount
+ state.tight = oldTight
+
+ token = state.push("list_item_close", "li", -1)
+ token.markup = chr(markerCharCode)
+
+ nextLine = startLine = state.line
+ itemLines[1] = nextLine
+
+ if nextLine >= endLine:
+ break
+
+ contentStart = state.bMarks[startLine]
+
+ #
+ # Try to check if list is terminated or continued.
+ #
+ if state.sCount[nextLine] < state.blkIndent:
+ break
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ break
+
+ # fail if terminating block found
+ terminate = False
+ for terminatorRule in terminatorRules:
+ if terminatorRule(state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ break
+
+ # fail if list has another type
+ if isOrdered:
+ posAfterMarker = skipOrderedListMarker(state, nextLine)
+ if posAfterMarker < 0:
+ break
+ start = state.bMarks[nextLine] + state.tShift[nextLine]
+ else:
+ posAfterMarker = skipBulletListMarker(state, nextLine)
+ if posAfterMarker < 0:
+ break
+
+ if markerCharCode != state.srcCharCode[posAfterMarker - 1]:
+ break
+
+ # Finalize list
+ if isOrdered:
+ token = state.push("ordered_list_close", "ol", -1)
+ else:
+ token = state.push("bullet_list_close", "ul", -1)
+
+ token.markup = chr(markerCharCode)
+
+ listLines[1] = nextLine
+ state.line = nextLine
+
+ state.parentType = oldParentType
+
+ # mark paragraphs tight if needed
+ if tight:
+ markTightParagraphs(state, listTokIdx)
+
+ return True
diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py
new file mode 100644
index 0000000..4fee83e
--- /dev/null
+++ b/markdown_it/rules_block/paragraph.py
@@ -0,0 +1,67 @@
+"""Paragraph."""
+import logging
+
+from ..ruler import Ruler
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool = False):
+
+ LOGGER.debug(
+ "entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent
+ )
+
+ nextLine = startLine + 1
+ ruler: Ruler = state.md.block.ruler
+ terminatorRules = ruler.getRules("paragraph")
+ endLine = state.lineMax
+
+ oldParentType = state.parentType
+ state.parentType = "paragraph"
+
+ # jump line-by-line until empty one or EOF
+ while nextLine < endLine:
+ if state.isEmpty(nextLine):
+ break
+ # this would be a code block normally, but after paragraph
+ # it's considered a lazy continuation regardless of what's there
+ if state.sCount[nextLine] - state.blkIndent > 3:
+ nextLine += 1
+ continue
+
+ # quirk for blockquotes, this line should already be checked by that rule
+ if state.sCount[nextLine] < 0:
+ nextLine += 1
+ continue
+
+ # Some tags can terminate paragraph without empty line.
+ terminate = False
+ for terminatorRule in terminatorRules:
+ if terminatorRule(state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ break
+
+ nextLine += 1
+
+ content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
+
+ state.line = nextLine
+
+ token = state.push("paragraph_open", "p", 1)
+ token.map = [startLine, state.line]
+
+ token = state.push("inline", "", 0)
+ token.content = content
+ token.map = [startLine, state.line]
+ token.children = []
+
+ token = state.push("paragraph_close", "p", -1)
+
+ state.parentType = oldParentType
+
+ return True
diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py
new file mode 100644
index 0000000..35adde2
--- /dev/null
+++ b/markdown_it/rules_block/reference.py
@@ -0,0 +1,218 @@
+import logging
+
+from ..common.utils import charCodeAt, isSpace, normalizeReference
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def reference(state: StateBlock, startLine, _endLine, silent):
+
+ LOGGER.debug(
+ "entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
+ )
+
+ lines = 0
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+ nextLine = startLine + 1
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ if state.srcCharCode[pos] != 0x5B: # /* [ */
+ return False
+
+ # Simple check to quickly interrupt scan on [link](url) at the start of line.
+ # Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54
+ while pos < maximum:
+ # /* ] */ /* \ */ /* : */
+ if state.srcCharCode[pos] == 0x5D and state.srcCharCode[pos - 1] != 0x5C:
+ if pos + 1 == maximum:
+ return False
+ if state.srcCharCode[pos + 1] != 0x3A:
+ return False
+ break
+ pos += 1
+
+ endLine = state.lineMax
+
+ # jump line-by-line until empty one or EOF
+ terminatorRules = state.md.block.ruler.getRules("reference")
+
+ oldParentType = state.parentType
+ state.parentType = "reference"
+
+ while nextLine < endLine and not state.isEmpty(nextLine):
+ # this would be a code block normally, but after paragraph
+ # it's considered a lazy continuation regardless of what's there
+ if state.sCount[nextLine] - state.blkIndent > 3:
+ nextLine += 1
+ continue
+
+ # quirk for blockquotes, this line should already be checked by that rule
+ if state.sCount[nextLine] < 0:
+ nextLine += 1
+ continue
+
+ # Some tags can terminate paragraph without empty line.
+ terminate = False
+ for terminatorRule in terminatorRules:
+ if terminatorRule(state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ break
+
+ nextLine += 1
+
+ string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
+ maximum = len(string)
+
+ labelEnd = None
+ pos = 1
+ while pos < maximum:
+ ch = charCodeAt(string, pos)
+ if ch == 0x5B: # /* [ */
+ return False
+ elif ch == 0x5D: # /* ] */
+ labelEnd = pos
+ break
+ elif ch == 0x0A: # /* \n */
+ lines += 1
+ elif ch == 0x5C: # /* \ */
+ pos += 1
+ if pos < maximum and charCodeAt(string, pos) == 0x0A:
+ lines += 1
+ pos += 1
+
+ if (
+ labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A
+ ): # /* : */
+ return False
+
+ # [label]: destination 'title'
+ # ^^^ skip optional whitespace here
+ pos = labelEnd + 2
+ while pos < maximum:
+ ch = charCodeAt(string, pos)
+ if ch == 0x0A:
+ lines += 1
+ elif isSpace(ch):
+ pass
+ else:
+ break
+ pos += 1
+
+ # [label]: destination 'title'
+ # ^^^^^^^^^^^ parse this
+ res = state.md.helpers.parseLinkDestination(string, pos, maximum)
+ if not res.ok:
+ return False
+
+ href = state.md.normalizeLink(res.str)
+ if not state.md.validateLink(href):
+ return False
+
+ pos = res.pos
+ lines += res.lines
+
+ # save cursor state, we could require to rollback later
+ destEndPos = pos
+ destEndLineNo = lines
+
+ # [label]: destination 'title'
+ # ^^^ skipping those spaces
+ start = pos
+ while pos < maximum:
+ ch = charCodeAt(string, pos)
+ if ch == 0x0A:
+ lines += 1
+ elif isSpace(ch):
+ pass
+ else:
+ break
+ pos += 1
+
+ # [label]: destination 'title'
+ # ^^^^^^^ parse this
+ res = state.md.helpers.parseLinkTitle(string, pos, maximum)
+ if pos < maximum and start != pos and res.ok:
+ title = res.str
+ pos = res.pos
+ lines += res.lines
+ else:
+ title = ""
+ pos = destEndPos
+ lines = destEndLineNo
+
+ # skip trailing spaces until the rest of the line
+ while pos < maximum:
+ ch = charCodeAt(string, pos)
+ if not isSpace(ch):
+ break
+ pos += 1
+
+ if pos < maximum and charCodeAt(string, pos) != 0x0A:
+ if title:
+ # garbage at the end of the line after title,
+ # but it could still be a valid reference if we roll back
+ title = ""
+ pos = destEndPos
+ lines = destEndLineNo
+ while pos < maximum:
+ ch = charCodeAt(string, pos)
+ if not isSpace(ch):
+ break
+ pos += 1
+
+ if pos < maximum and charCodeAt(string, pos) != 0x0A:
+ # garbage at the end of the line
+ return False
+
+ label = normalizeReference(string[1:labelEnd])
+ if not label:
+ # CommonMark 0.20 disallows empty labels
+ return False
+
+ # Reference can not terminate anything. This check is for safety only.
+ if silent:
+ return True
+
+ if "references" not in state.env:
+ state.env["references"] = {}
+
+ state.line = startLine + lines + 1
+
+ # note, this is not part of markdown-it JS, but is useful for renderers
+ if state.md.options.get("inline_definitions", False):
+ token = state.push("definition", "", 0)
+ token.meta = {
+ "id": label,
+ "title": title,
+ "url": href,
+ "label": string[1:labelEnd],
+ }
+ token.map = [startLine, state.line]
+
+ if label not in state.env["references"]:
+ state.env["references"][label] = {
+ "title": title,
+ "href": href,
+ "map": [startLine, state.line],
+ }
+ else:
+ state.env.setdefault("duplicate_refs", []).append(
+ {
+ "title": title,
+ "href": href,
+ "label": label,
+ "map": [startLine, state.line],
+ }
+ )
+
+ state.parentType = oldParentType
+
+ return True
diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py
new file mode 100644
index 0000000..42b8fce
--- /dev/null
+++ b/markdown_it/rules_block/state_block.py
@@ -0,0 +1,230 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from ..common.utils import isSpace
+from ..ruler import StateBase
+from ..token import Token
+
+if TYPE_CHECKING:
+ from markdown_it.main import MarkdownIt
+
+
+class StateBlock(StateBase):
+ def __init__(
+ self,
+ src: str,
+ md: MarkdownIt,
+ env,
+ tokens: list[Token],
+ srcCharCode: tuple[int, ...] | None = None,
+ ):
+
+ if srcCharCode is not None:
+ self._src = src
+ self.srcCharCode = srcCharCode
+ else:
+ self.src = src
+
+ # link to parser instance
+ self.md = md
+
+ self.env = env
+
+ #
+ # Internal state variables
+ #
+
+ self.tokens = tokens
+
+ self.bMarks = [] # line begin offsets for fast jumps
+ self.eMarks = [] # line end offsets for fast jumps
+ # offsets of the first non-space characters (tabs not expanded)
+ self.tShift = []
+ self.sCount = [] # indents for each line (tabs expanded)
+
+ # An amount of virtual spaces (tabs expanded) between beginning
+ # of each line (bMarks) and real beginning of that line.
+ #
+ # It exists only as a hack because blockquotes override bMarks
+ # losing information in the process.
+ #
+ # It's used only when expanding tabs, you can think about it as
+ # an initial tab length, e.g. bsCount=21 applied to string `\t123`
+ # means first tab should be expanded to 4-21%4 === 3 spaces.
+ #
+ self.bsCount = []
+
+ # block parser variables
+ self.blkIndent = 0 # required block content indent (for example, if we are
+ # inside a list, it would be positioned after list marker)
+ self.line = 0 # line index in src
+ self.lineMax = 0 # lines count
+ self.tight = False # loose/tight mode for lists
+ self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
+ self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
+
+ # can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
+ # used in lists to determine if they interrupt a paragraph
+ self.parentType = "root"
+
+ self.level = 0
+
+ # renderer
+ self.result = ""
+
+ # Create caches
+ # Generate markers.
+ indent_found = False
+
+ start = pos = indent = offset = 0
+ length = len(self.src)
+
+ for pos, character in enumerate(self.srcCharCode):
+ if not indent_found:
+ if isSpace(character):
+ indent += 1
+
+ if character == 0x09:
+ offset += 4 - offset % 4
+ else:
+ offset += 1
+ continue
+ else:
+ indent_found = True
+
+ if character == 0x0A or pos == length - 1:
+ if character != 0x0A:
+ pos += 1
+ self.bMarks.append(start)
+ self.eMarks.append(pos)
+ self.tShift.append(indent)
+ self.sCount.append(offset)
+ self.bsCount.append(0)
+
+ indent_found = False
+ indent = 0
+ offset = 0
+ start = pos + 1
+
+ # Push fake entry to simplify cache bounds checks
+ self.bMarks.append(length)
+ self.eMarks.append(length)
+ self.tShift.append(0)
+ self.sCount.append(0)
+ self.bsCount.append(0)
+
+ self.lineMax = len(self.bMarks) - 1 # don't count last fake line
+
+ def __repr__(self):
+ return (
+ f"{self.__class__.__name__}"
+ f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
+ )
+
+ def push(self, ttype: str, tag: str, nesting: int) -> Token:
+ """Push new token to "stream"."""
+ token = Token(ttype, tag, nesting)
+ token.block = True
+ if nesting < 0:
+ self.level -= 1 # closing tag
+ token.level = self.level
+ if nesting > 0:
+ self.level += 1 # opening tag
+ self.tokens.append(token)
+ return token
+
+ def isEmpty(self, line: int) -> bool:
+ """."""
+ return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
+
+ def skipEmptyLines(self, from_pos: int) -> int:
+ """."""
+ while from_pos < self.lineMax:
+ try:
+ if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
+ from_pos
+ ]:
+ break
+ except IndexError:
+ pass
+ from_pos += 1
+ return from_pos
+
+ def skipSpaces(self, pos: int) -> int:
+ """Skip spaces from given position."""
+ while pos < len(self.src):
+ if not isSpace(self.srcCharCode[pos]):
+ break
+ pos += 1
+ return pos
+
+ def skipSpacesBack(self, pos: int, minimum: int) -> int:
+ """Skip spaces from given position in reverse."""
+ if pos <= minimum:
+ return pos
+ while pos > minimum:
+ pos -= 1
+ if not isSpace(self.srcCharCode[pos]):
+ return pos + 1
+ return pos
+
+ def skipChars(self, pos: int, code: int) -> int:
+ """Skip char codes from given position."""
+ while pos < len(self.src):
+ if self.srcCharCode[pos] != code:
+ break
+ pos += 1
+ return pos
+
+ def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
+ """Skip char codes reverse from given position - 1."""
+ if pos <= minimum:
+ return pos
+ while pos > minimum:
+ pos -= 1
+ if code != self.srcCharCode[pos]:
+ return pos + 1
+ return pos
+
+ def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
+ """Cut lines range from source."""
+ line = begin
+ if begin >= end:
+ return ""
+
+ queue = [""] * (end - begin)
+
+ i = 1
+ while line < end:
+ lineIndent = 0
+ lineStart = first = self.bMarks[line]
+ if line + 1 < end or keepLastLF:
+ last = self.eMarks[line] + 1
+ else:
+ last = self.eMarks[line]
+
+ while (first < last) and (lineIndent < indent):
+ ch = self.srcCharCode[first]
+ if isSpace(ch):
+ if ch == 0x09:
+ lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
+ else:
+ lineIndent += 1
+ elif first - lineStart < self.tShift[line]:
+ lineIndent += 1
+ else:
+ break
+ first += 1
+
+ if lineIndent > indent:
+ # partially expanding tabs in code blocks, e.g '\t\tfoobar'
+ # with indent=2 becomes ' \tfoobar'
+ queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
+ else:
+ queue[i - 1] = self.src[first:last]
+
+ line += 1
+ i += 1
+
+ return "".join(queue)
diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py
new file mode 100644
index 0000000..e3db858
--- /dev/null
+++ b/markdown_it/rules_block/table.py
@@ -0,0 +1,238 @@
+# GFM table, https://github.github.com/gfm/#tables-extension-
+import re
+
+from ..common.utils import charCodeAt, isSpace
+from .state_block import StateBlock
+
+headerLineRe = re.compile(r"^:?-+:?$")
+enclosingPipesRe = re.compile(r"^\||\|$")
+
+
+def getLine(state: StateBlock, line: int):
+ pos = state.bMarks[line] + state.tShift[line]
+ maximum = state.eMarks[line]
+
+ # return state.src.substr(pos, max - pos)
+ return state.src[pos:maximum]
+
+
+def escapedSplit(string):
+ result = []
+ pos = 0
+ max = len(string)
+ isEscaped = False
+ lastPos = 0
+ current = ""
+ ch = charCodeAt(string, pos)
+
+ while pos < max:
+ if ch == 0x7C: # /* | */
+ if not isEscaped:
+ # pipe separating cells, '|'
+ result.append(current + string[lastPos:pos])
+ current = ""
+ lastPos = pos + 1
+ else:
+ # escaped pipe, '\|'
+ current += string[lastPos : pos - 1]
+ lastPos = pos
+
+ isEscaped = ch == 0x5C # /* \ */
+ pos += 1
+
+ ch = charCodeAt(string, pos)
+
+ result.append(current + string[lastPos:])
+
+ return result
+
+
+def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
+ tbodyLines = None
+
+ # should have at least two lines
+ if startLine + 2 > endLine:
+ return False
+
+ nextLine = startLine + 1
+
+ if state.sCount[nextLine] < state.blkIndent:
+ return False
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[nextLine] - state.blkIndent >= 4:
+ return False
+
+ # first character of the second line should be '|', '-', ':',
+ # and no other characters are allowed but spaces;
+ # basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
+
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ if pos >= state.eMarks[nextLine]:
+ return False
+ first_ch = state.srcCharCode[pos]
+ pos += 1
+ if first_ch not in {0x7C, 0x2D, 0x3A}: # not in {"|", "-", ":"}
+ return False
+
+ if pos >= state.eMarks[nextLine]:
+ return False
+ second_ch = state.srcCharCode[pos]
+ pos += 1
+ # not in {"|", "-", ":"} and not space
+ if second_ch not in {0x7C, 0x2D, 0x3A} and not isSpace(second_ch):
+ return False
+
+ # if first character is '-', then second character must not be a space
+ # (due to parsing ambiguity with list)
+ if first_ch == 0x2D and isSpace(second_ch):
+ return False
+
+ while pos < state.eMarks[nextLine]:
+ ch = state.srcCharCode[pos]
+
+ # /* | */ /* - */ /* : */
+ if ch not in {0x7C, 0x2D, 0x3A} and not isSpace(ch):
+ return False
+
+ pos += 1
+
+ lineText = getLine(state, startLine + 1)
+
+ columns = lineText.split("|")
+ aligns = []
+ for i in range(len(columns)):
+ t = columns[i].strip()
+ if not t:
+ # allow empty columns before and after table, but not in between columns;
+ # e.g. allow ` |---| `, disallow ` ---||--- `
+ if i == 0 or i == len(columns) - 1:
+ continue
+ else:
+ return False
+
+ if not headerLineRe.search(t):
+ return False
+ if charCodeAt(t, len(t) - 1) == 0x3A: # /* : */
+ # /* : */
+ aligns.append("center" if charCodeAt(t, 0) == 0x3A else "right")
+ elif charCodeAt(t, 0) == 0x3A: # /* : */
+ aligns.append("left")
+ else:
+ aligns.append("")
+
+ lineText = getLine(state, startLine).strip()
+ if "|" not in lineText:
+ return False
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+ columns = escapedSplit(lineText)
+ if columns and columns[0] == "":
+ columns.pop(0)
+ if columns and columns[-1] == "":
+ columns.pop()
+
+ # header row will define an amount of columns in the entire table,
+ # and align row should be exactly the same (the rest of the rows can differ)
+ columnCount = len(columns)
+ if columnCount == 0 or columnCount != len(aligns):
+ return False
+
+ if silent:
+ return True
+
+ oldParentType = state.parentType
+ state.parentType = "table"
+
+ # use 'blockquote' lists for termination because it's
+ # the most similar to tables
+ terminatorRules = state.md.block.ruler.getRules("blockquote")
+
+ token = state.push("table_open", "table", 1)
+ token.map = tableLines = [startLine, 0]
+
+ token = state.push("thead_open", "thead", 1)
+ token.map = [startLine, startLine + 1]
+
+ token = state.push("tr_open", "tr", 1)
+ token.map = [startLine, startLine + 1]
+
+ for i in range(len(columns)):
+ token = state.push("th_open", "th", 1)
+ if aligns[i]:
+ token.attrs = {"style": "text-align:" + aligns[i]}
+
+ token = state.push("inline", "", 0)
+ # note in markdown-it this map was removed in v12.0.0 however, we keep it,
+ # since it is helpful to propagate to children tokens
+ token.map = [startLine, startLine + 1]
+ token.content = columns[i].strip()
+ token.children = []
+
+ token = state.push("th_close", "th", -1)
+
+ token = state.push("tr_close", "tr", -1)
+ token = state.push("thead_close", "thead", -1)
+
+ nextLine = startLine + 2
+ while nextLine < endLine:
+ if state.sCount[nextLine] < state.blkIndent:
+ break
+
+ terminate = False
+ for i in range(len(terminatorRules)):
+ if terminatorRules[i](state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ break
+ lineText = getLine(state, nextLine).strip()
+ if not lineText:
+ break
+ if state.sCount[nextLine] - state.blkIndent >= 4:
+ break
+ columns = escapedSplit(lineText)
+ if columns and columns[0] == "":
+ columns.pop(0)
+ if columns and columns[-1] == "":
+ columns.pop()
+
+ if nextLine == startLine + 2:
+ token = state.push("tbody_open", "tbody", 1)
+ token.map = tbodyLines = [startLine + 2, 0]
+
+ token = state.push("tr_open", "tr", 1)
+ token.map = [nextLine, nextLine + 1]
+
+ for i in range(columnCount):
+ token = state.push("td_open", "td", 1)
+ if aligns[i]:
+ token.attrs = {"style": "text-align:" + aligns[i]}
+
+ token = state.push("inline", "", 0)
+ # note in markdown-it this map was removed in v12.0.0 however, we keep it,
+ # since it is helpful to propagate to children tokens
+ token.map = [nextLine, nextLine + 1]
+ try:
+ token.content = columns[i].strip() if columns[i] else ""
+ except IndexError:
+ token.content = ""
+ token.children = []
+
+ token = state.push("td_close", "td", -1)
+
+ token = state.push("tr_close", "tr", -1)
+
+ nextLine += 1
+
+ if tbodyLines:
+ token = state.push("tbody_close", "tbody", -1)
+ tbodyLines[1] = nextLine
+
+ token = state.push("table_close", "table", -1)
+
+ tableLines[1] = nextLine
+ state.parentType = oldParentType
+ state.line = nextLine
+ return True