diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-29 04:24:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-29 04:24:24 +0000 |
commit | 12e8343068b906f8b2afddc5569968a8a91fa5b0 (patch) | |
tree | 75cc5e05a4392ea0292251898f992a15a16b172b /markdown_it/rules_block | |
parent | Initial commit. (diff) | |
download | markdown-it-py-ef6b3991640e41f44752cdb6502719ca58a762c8.tar.xz markdown-it-py-ef6b3991640e41f44752cdb6502719ca58a762c8.zip |
Adding upstream version 2.1.0.upstream/2.1.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'markdown_it/rules_block')
-rw-r--r-- | markdown_it/rules_block/__init__.py | 27 | ||||
-rw-r--r-- | markdown_it/rules_block/blockquote.py | 299 | ||||
-rw-r--r-- | markdown_it/rules_block/code.py | 36 | ||||
-rw-r--r-- | markdown_it/rules_block/fence.py | 104 | ||||
-rw-r--r-- | markdown_it/rules_block/heading.py | 72 | ||||
-rw-r--r-- | markdown_it/rules_block/hr.py | 54 | ||||
-rw-r--r-- | markdown_it/rules_block/html_block.py | 91 | ||||
-rw-r--r-- | markdown_it/rules_block/lheading.py | 90 | ||||
-rw-r--r-- | markdown_it/rules_block/list.py | 344 | ||||
-rw-r--r-- | markdown_it/rules_block/paragraph.py | 67 | ||||
-rw-r--r-- | markdown_it/rules_block/reference.py | 218 | ||||
-rw-r--r-- | markdown_it/rules_block/state_block.py | 230 | ||||
-rw-r--r-- | markdown_it/rules_block/table.py | 238 |
13 files changed, 1870 insertions, 0 deletions
diff --git a/markdown_it/rules_block/__init__.py b/markdown_it/rules_block/__init__.py new file mode 100644 index 0000000..bcf138d --- /dev/null +++ b/markdown_it/rules_block/__init__.py @@ -0,0 +1,27 @@ +__all__ = ( + "StateBlock", + "paragraph", + "heading", + "lheading", + "code", + "fence", + "hr", + "list_block", + "reference", + "blockquote", + "html_block", + "table", +) + +from .blockquote import blockquote +from .code import code +from .fence import fence +from .heading import heading +from .hr import hr +from .html_block import html_block +from .lheading import lheading +from .list import list_block +from .paragraph import paragraph +from .reference import reference +from .state_block import StateBlock +from .table import table diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py new file mode 100644 index 0000000..6575731 --- /dev/null +++ b/markdown_it/rules_block/blockquote.py @@ -0,0 +1,299 @@ +# Block quotes +from __future__ import annotations + +import logging + +from ..common.utils import isSpace +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + + +def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool): + + LOGGER.debug( + "entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent + ) + + oldLineMax = state.lineMax + pos = state.bMarks[startLine] + state.tShift[startLine] + max = state.eMarks[startLine] + + # if it's indented more than 3 spaces, it should be a code block + if (state.sCount[startLine] - state.blkIndent) >= 4: + return False + + # check the block quote marker + if state.srcCharCode[pos] != 0x3E: # /* > */ + return False + pos += 1 + + # we know that it's going to be a valid blockquote, + # so no point trying to find the end of it in silent mode + if silent: + return True + + # set offset past spaces and ">" + initial = offset = state.sCount[startLine] + 1 + + try: + second_char_code: int | None = state.srcCharCode[pos] + except IndexError: + second_char_code = None + + # skip one optional space after '>' + if second_char_code == 0x20: # /* space */ + # ' > test ' + # ^ -- position start of line here: + pos += 1 + initial += 1 + offset += 1 + adjustTab = False + spaceAfterMarker = True + elif second_char_code == 0x09: # /* tab */ + spaceAfterMarker = True + + if (state.bsCount[startLine] + offset) % 4 == 3: + # ' >\t test ' + # ^ -- position start of line here (tab has width==1) + pos += 1 + initial += 1 + offset += 1 + adjustTab = False + else: + # ' >\t test ' + # ^ -- position start of line here + shift bsCount slightly + # to make extra space appear + adjustTab = True + + else: + spaceAfterMarker = False + + oldBMarks = [state.bMarks[startLine]] + state.bMarks[startLine] = pos + + while pos < max: + ch = state.srcCharCode[pos] + + if isSpace(ch): + if ch == 0x09: # / tab / + offset += ( + 4 + - (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4 + ) + else: + offset += 1 + + else: + break + + pos += 1 + + oldBSCount = [state.bsCount[startLine]] + state.bsCount[startLine] = ( + state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0) + ) + + lastLineEmpty = pos >= max + + oldSCount = [state.sCount[startLine]] + state.sCount[startLine] = offset - initial + + oldTShift = [state.tShift[startLine]] + state.tShift[startLine] = pos - state.bMarks[startLine] + + terminatorRules = state.md.block.ruler.getRules("blockquote") + + oldParentType = state.parentType + state.parentType = "blockquote" + + # Search the end of the block + # + # Block ends with either: + # 1. an empty line outside: + # ``` + # > test + # + # ``` + # 2. an empty line inside: + # ``` + # > + # test + # ``` + # 3. another tag: + # ``` + # > test + # - - - + # ``` + + # for (nextLine = startLine + 1; nextLine < endLine; nextLine++) { + nextLine = startLine + 1 + while nextLine < endLine: + + # check if it's outdented, i.e. it's inside list item and indented + # less than said list item: + # + # ``` + # 1. anything + # > current blockquote + # 2. checking this line + # ``` + isOutdented = state.sCount[nextLine] < state.blkIndent + + pos = state.bMarks[nextLine] + state.tShift[nextLine] + max = state.eMarks[nextLine] + + if pos >= max: + # Case 1: line is not inside the blockquote, and this line is empty. + break + + evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */ + pos += 1 + if evaluatesTrue: + # This line is inside the blockquote. + + # set offset past spaces and ">" + initial = offset = state.sCount[nextLine] + 1 + + try: + next_char: int | None = state.srcCharCode[pos] + except IndexError: + next_char = None + + # skip one optional space after '>' + if next_char == 0x20: # /* space */ + # ' > test ' + # ^ -- position start of line here: + pos += 1 + initial += 1 + offset += 1 + adjustTab = False + spaceAfterMarker = True + elif next_char == 0x09: # /* tab */ + spaceAfterMarker = True + + if (state.bsCount[nextLine] + offset) % 4 == 3: + # ' >\t test ' + # ^ -- position start of line here (tab has width==1) + pos += 1 + initial += 1 + offset += 1 + adjustTab = False + else: + # ' >\t test ' + # ^ -- position start of line here + shift bsCount slightly + # to make extra space appear + adjustTab = True + + else: + spaceAfterMarker = False + + oldBMarks.append(state.bMarks[nextLine]) + state.bMarks[nextLine] = pos + + while pos < max: + ch = state.srcCharCode[pos] + + if isSpace(ch): + if ch == 0x09: + offset += ( + 4 + - ( + offset + + state.bsCount[nextLine] + + (1 if adjustTab else 0) + ) + % 4 + ) + else: + offset += 1 + else: + break + + pos += 1 + + lastLineEmpty = pos >= max + + oldBSCount.append(state.bsCount[nextLine]) + state.bsCount[nextLine] = ( + state.sCount[nextLine] + 1 + (1 if spaceAfterMarker else 0) + ) + + oldSCount.append(state.sCount[nextLine]) + state.sCount[nextLine] = offset - initial + + oldTShift.append(state.tShift[nextLine]) + state.tShift[nextLine] = pos - state.bMarks[nextLine] + + nextLine += 1 + continue + + # Case 2: line is not inside the blockquote, and the last line was empty. + if lastLineEmpty: + break + + # Case 3: another tag found. + terminate = False + + for terminatorRule in terminatorRules: + if terminatorRule(state, nextLine, endLine, True): + terminate = True + break + + if terminate: + # Quirk to enforce "hard termination mode" for paragraphs; + # normally if you call `tokenize(state, startLine, nextLine)`, + # paragraphs will look below nextLine for paragraph continuation, + # but if blockquote is terminated by another tag, they shouldn't + state.lineMax = nextLine + + if state.blkIndent != 0: + # state.blkIndent was non-zero, we now set it to zero, + # so we need to re-calculate all offsets to appear as + # if indent wasn't changed + oldBMarks.append(state.bMarks[nextLine]) + oldBSCount.append(state.bsCount[nextLine]) + oldTShift.append(state.tShift[nextLine]) + oldSCount.append(state.sCount[nextLine]) + state.sCount[nextLine] -= state.blkIndent + + break + + oldBMarks.append(state.bMarks[nextLine]) + oldBSCount.append(state.bsCount[nextLine]) + oldTShift.append(state.tShift[nextLine]) + oldSCount.append(state.sCount[nextLine]) + + # A negative indentation means that this is a paragraph continuation + # + state.sCount[nextLine] = -1 + + nextLine += 1 + + oldIndent = state.blkIndent + state.blkIndent = 0 + + token = state.push("blockquote_open", "blockquote", 1) + token.markup = ">" + token.map = lines = [startLine, 0] + + state.md.block.tokenize(state, startLine, nextLine) + + token = state.push("blockquote_close", "blockquote", -1) + token.markup = ">" + + state.lineMax = oldLineMax + state.parentType = oldParentType + lines[1] = state.line + + # Restore original tShift; this might not be necessary since the parser + # has already been here, but just to make sure we can do that. + for i, item in enumerate(oldTShift): + state.bMarks[i + startLine] = oldBMarks[i] + state.tShift[i + startLine] = item + state.sCount[i + startLine] = oldSCount[i] + state.bsCount[i + startLine] = oldBSCount[i] + + state.blkIndent = oldIndent + + return True diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py new file mode 100644 index 0000000..c4fdba3 --- /dev/null +++ b/markdown_it/rules_block/code.py @@ -0,0 +1,36 @@ +"""Code block (4 spaces padded).""" +import logging + +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + + +def code(state: StateBlock, startLine: int, endLine: int, silent: bool = False): + + LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent) + + if state.sCount[startLine] - state.blkIndent < 4: + return False + + last = nextLine = startLine + 1 + + while nextLine < endLine: + if state.isEmpty(nextLine): + nextLine += 1 + continue + + if state.sCount[nextLine] - state.blkIndent >= 4: + nextLine += 1 + last = nextLine + continue + + break + + state.line = last + + token = state.push("code_block", "code", 0) + token.content = state.getLines(startLine, last, 4 + state.blkIndent, False) + "\n" + token.map = [startLine, state.line] + + return True diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py new file mode 100644 index 0000000..c4f5275 --- /dev/null +++ b/markdown_it/rules_block/fence.py @@ -0,0 +1,104 @@ +# fences (``` lang, ~~~ lang) +import logging + +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + + +def fence(state: StateBlock, startLine: int, endLine: int, silent: bool): + + LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent) + + haveEndMarker = False + pos = state.bMarks[startLine] + state.tShift[startLine] + maximum = state.eMarks[startLine] + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + return False + + if pos + 3 > maximum: + return False + + marker = state.srcCharCode[pos] + + # /* ~ */ /* ` */ + if marker != 0x7E and marker != 0x60: + return False + + # scan marker length + mem = pos + pos = state.skipChars(pos, marker) + + length = pos - mem + + if length < 3: + return False + + markup = state.src[mem:pos] + params = state.src[pos:maximum] + + # /* ` */ + if marker == 0x60: + if chr(marker) in params: + return False + + # Since start is found, we can report success here in validation mode + if silent: + return True + + # search end of block + nextLine = startLine + + while True: + nextLine += 1 + if nextLine >= endLine: + # unclosed block should be autoclosed by end of document. + # also block seems to be autoclosed by end of parent + break + + pos = mem = state.bMarks[nextLine] + state.tShift[nextLine] + maximum = state.eMarks[nextLine] + + if pos < maximum and state.sCount[nextLine] < state.blkIndent: + # non-empty line with negative indent should stop the list: + # - ``` + # test + break + + if state.srcCharCode[pos] != marker: + continue + + if state.sCount[nextLine] - state.blkIndent >= 4: + # closing fence should be indented less than 4 spaces + continue + + pos = state.skipChars(pos, marker) + + # closing code fence must be at least as long as the opening one + if pos - mem < length: + continue + + # make sure tail has spaces only + pos = state.skipSpaces(pos) + + if pos < maximum: + continue + + haveEndMarker = True + # found! + break + + # If a fence has heading spaces, they should be removed from its inner block + length = state.sCount[startLine] + + state.line = nextLine + (1 if haveEndMarker else 0) + + token = state.push("fence", "code", 0) + token.info = params + token.content = state.getLines(startLine + 1, nextLine, length, True) + token.markup = markup + token.map = [startLine, state.line] + + return True diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py new file mode 100644 index 0000000..8d4ef3e --- /dev/null +++ b/markdown_it/rules_block/heading.py @@ -0,0 +1,72 @@ +""" Atex heading (#, ##, ...) """ +from __future__ import annotations + +import logging + +from ..common.utils import isSpace +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + + +def heading(state: StateBlock, startLine: int, endLine: int, silent: bool): + + LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent) + + pos = state.bMarks[startLine] + state.tShift[startLine] + maximum = state.eMarks[startLine] + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + return False + + ch: int | None = state.srcCharCode[pos] + + # /* # */ + if ch != 0x23 or pos >= maximum: + return False + + # count heading level + level = 1 + pos += 1 + try: + ch = state.srcCharCode[pos] + except IndexError: + ch = None + # /* # */ + while ch == 0x23 and pos < maximum and level <= 6: + level += 1 + pos += 1 + try: + ch = state.srcCharCode[pos] + except IndexError: + ch = None + + if level > 6 or (pos < maximum and not isSpace(ch)): + return False + + if silent: + return True + + # Let's cut tails like ' ### ' from the end of string + + maximum = state.skipSpacesBack(maximum, pos) + tmp = state.skipCharsBack(maximum, 0x23, pos) # # + if tmp > pos and isSpace(state.srcCharCode[tmp - 1]): + maximum = tmp + + state.line = startLine + 1 + + token = state.push("heading_open", "h" + str(level), 1) + token.markup = "########"[:level] + token.map = [startLine, state.line] + + token = state.push("inline", "", 0) + token.content = state.src[pos:maximum].strip() + token.map = [startLine, state.line] + token.children = [] + + token = state.push("heading_close", "h" + str(level), -1) + token.markup = "########"[:level] + + return True diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py new file mode 100644 index 0000000..804cd9d --- /dev/null +++ b/markdown_it/rules_block/hr.py @@ -0,0 +1,54 @@ +"""Horizontal rule + +At least 3 of these characters on a line * - _ +""" +import logging + +from ..common.utils import isSpace +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + + +def hr(state: StateBlock, startLine: int, endLine: int, silent: bool): + + LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent) + + pos = state.bMarks[startLine] + state.tShift[startLine] + maximum = state.eMarks[startLine] + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + return False + + marker = state.srcCharCode[pos] + pos += 1 + + # Check hr marker: /* * */ /* - */ /* _ */ + if marker != 0x2A and marker != 0x2D and marker != 0x5F: + return False + + # markers can be mixed with spaces, but there should be at least 3 of them + + cnt = 1 + while pos < maximum: + ch = state.srcCharCode[pos] + pos += 1 + if ch != marker and not isSpace(ch): + return False + if ch == marker: + cnt += 1 + + if cnt < 3: + return False + + if silent: + return True + + state.line = startLine + 1 + + token = state.push("hr", "hr", 0) + token.map = [startLine, state.line] + token.markup = chr(marker) * (cnt + 1) + + return True diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py new file mode 100644 index 0000000..31afab7 --- /dev/null +++ b/markdown_it/rules_block/html_block.py @@ -0,0 +1,91 @@ +# HTML block +from __future__ import annotations + +import logging +import re + +from ..common.html_blocks import block_names +from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + +# An array of opening and corresponding closing sequences for html tags, +# last argument defines whether it can terminate a paragraph or not +HTML_SEQUENCES: list[tuple[re.Pattern, re.Pattern, bool]] = [ + ( + re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE), + re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE), + True, + ), + (re.compile(r"^<!--"), re.compile(r"-->"), True), + (re.compile(r"^<\?"), re.compile(r"\?>"), True), + (re.compile(r"^<![A-Z]"), re.compile(r">"), True), + (re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True), + ( + re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE), + re.compile(r"^$"), + True, + ), + (re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False), +] + + +def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool): + LOGGER.debug( + "entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent + ) + pos = state.bMarks[startLine] + state.tShift[startLine] + maximum = state.eMarks[startLine] + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + return False + + if not state.md.options.get("html", None): + return False + + if state.srcCharCode[pos] != 0x3C: # /* < */ + return False + + lineText = state.src[pos:maximum] + + html_seq = None + for HTML_SEQUENCE in HTML_SEQUENCES: + if HTML_SEQUENCE[0].search(lineText): + html_seq = HTML_SEQUENCE + break + + if not html_seq: + return False + + if silent: + # true if this sequence can be a terminator, false otherwise + return html_seq[2] + + nextLine = startLine + 1 + + # If we are here - we detected HTML block. + # Let's roll down till block end. + if not html_seq[1].search(lineText): + while nextLine < endLine: + if state.sCount[nextLine] < state.blkIndent: + break + + pos = state.bMarks[nextLine] + state.tShift[nextLine] + maximum = state.eMarks[nextLine] + lineText = state.src[pos:maximum] + + if html_seq[1].search(lineText): + if len(lineText) != 0: + nextLine += 1 + break + nextLine += 1 + + state.line = nextLine + + token = state.push("html_block", "", 0) + token.map = [startLine, nextLine] + token.content = state.getLines(startLine, nextLine, state.blkIndent, True) + + return True diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py new file mode 100644 index 0000000..f26e2af --- /dev/null +++ b/markdown_it/rules_block/lheading.py @@ -0,0 +1,90 @@ +# lheading (---, ==) +import logging + +from ..ruler import Ruler +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + + +def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool): + + LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent) + + level = None + nextLine = startLine + 1 + ruler: Ruler = state.md.block.ruler + terminatorRules = ruler.getRules("paragraph") + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + return False + + oldParentType = state.parentType + state.parentType = "paragraph" # use paragraph to match terminatorRules + + # jump line-by-line until empty one or EOF + while nextLine < endLine and not state.isEmpty(nextLine): + # this would be a code block normally, but after paragraph + # it's considered a lazy continuation regardless of what's there + if state.sCount[nextLine] - state.blkIndent > 3: + nextLine += 1 + continue + + # Check for underline in setext header + if state.sCount[nextLine] >= state.blkIndent: + pos = state.bMarks[nextLine] + state.tShift[nextLine] + maximum = state.eMarks[nextLine] + + if pos < maximum: + marker = state.srcCharCode[pos] + + # /* - */ /* = */ + if marker == 0x2D or marker == 0x3D: + pos = state.skipChars(pos, marker) + pos = state.skipSpaces(pos) + + # /* = */ + if pos >= maximum: + level = 1 if marker == 0x3D else 2 + break + + # quirk for blockquotes, this line should already be checked by that rule + if state.sCount[nextLine] < 0: + nextLine += 1 + continue + + # Some tags can terminate paragraph without empty line. + terminate = False + for terminatorRule in terminatorRules: + if terminatorRule(state, nextLine, endLine, True): + terminate = True + break + if terminate: + break + + nextLine += 1 + + if not level: + # Didn't find valid underline + return False + + content = state.getLines(startLine, nextLine, state.blkIndent, False).strip() + + state.line = nextLine + 1 + + token = state.push("heading_open", "h" + str(level), 1) + token.markup = chr(marker) + token.map = [startLine, state.line] + + token = state.push("inline", "", 0) + token.content = content + token.map = [startLine, state.line - 1] + token.children = [] + + token = state.push("heading_close", "h" + str(level), -1) + token.markup = chr(marker) + + state.parentType = oldParentType + + return True diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py new file mode 100644 index 0000000..a7617ad --- /dev/null +++ b/markdown_it/rules_block/list.py @@ -0,0 +1,344 @@ +# Lists +import logging + +from ..common.utils import isSpace +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + + +# Search `[-+*][\n ]`, returns next pos after marker on success +# or -1 on fail. +def skipBulletListMarker(state: StateBlock, startLine: int): + + pos = state.bMarks[startLine] + state.tShift[startLine] + maximum = state.eMarks[startLine] + + marker = state.srcCharCode[pos] + pos += 1 + # Check bullet /* * */ /* - */ /* + */ + if marker != 0x2A and marker != 0x2D and marker != 0x2B: + return -1 + + if pos < maximum: + ch = state.srcCharCode[pos] + + if not isSpace(ch): + # " -test " - is not a list item + return -1 + + return pos + + +# Search `\d+[.)][\n ]`, returns next pos after marker on success +# or -1 on fail. +def skipOrderedListMarker(state: StateBlock, startLine: int): + + start = state.bMarks[startLine] + state.tShift[startLine] + pos = start + maximum = state.eMarks[startLine] + + # List marker should have at least 2 chars (digit + dot) + if pos + 1 >= maximum: + return -1 + + ch = state.srcCharCode[pos] + pos += 1 + + # /* 0 */ /* 9 */ + if ch < 0x30 or ch > 0x39: + return -1 + + while True: + # EOL -> fail + if pos >= maximum: + return -1 + + ch = state.srcCharCode[pos] + pos += 1 + + # /* 0 */ /* 9 */ + if ch >= 0x30 and ch <= 0x39: + + # List marker should have no more than 9 digits + # (prevents integer overflow in browsers) + if pos - start >= 10: + return -1 + + continue + + # found valid marker: /* ) */ /* . */ + if ch == 0x29 or ch == 0x2E: + break + + return -1 + + if pos < maximum: + ch = state.srcCharCode[pos] + + if not isSpace(ch): + # " 1.test " - is not a list item + return -1 + + return pos + + +def markTightParagraphs(state: StateBlock, idx: int): + level = state.level + 2 + + i = idx + 2 + length = len(state.tokens) - 2 + while i < length: + if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open": + state.tokens[i + 2].hidden = True + state.tokens[i].hidden = True + i += 2 + i += 1 + + +def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool): + + LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent) + + isTerminatingParagraph = False + tight = True + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + return False + + # Special case: + # - item 1 + # - item 2 + # - item 3 + # - item 4 + # - this one is a paragraph continuation + if ( + state.listIndent >= 0 + and state.sCount[startLine] - state.listIndent >= 4 + and state.sCount[startLine] < state.blkIndent + ): + return False + + # limit conditions when list can interrupt + # a paragraph (validation mode only) + if silent and state.parentType == "paragraph": + # Next list item should still terminate previous list item + # + # This code can fail if plugins use blkIndent as well as lists, + # but I hope the spec gets fixed long before that happens. + # + if state.tShift[startLine] >= state.blkIndent: + isTerminatingParagraph = True + + # Detect list type and position after marker + posAfterMarker = skipOrderedListMarker(state, startLine) + if posAfterMarker >= 0: + isOrdered = True + start = state.bMarks[startLine] + state.tShift[startLine] + markerValue = int(state.src[start : posAfterMarker - 1]) + + # If we're starting a new ordered list right after + # a paragraph, it should start with 1. + if isTerminatingParagraph and markerValue != 1: + return False + else: + posAfterMarker = skipBulletListMarker(state, startLine) + if posAfterMarker >= 0: + isOrdered = False + else: + return False + + # If we're starting a new unordered list right after + # a paragraph, first line should not be empty. + if isTerminatingParagraph: + if state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]: + return False + + # We should terminate list on style change. Remember first one to compare. + markerCharCode = state.srcCharCode[posAfterMarker - 1] + + # For validation mode we can terminate immediately + if silent: + return True + + # Start list + listTokIdx = len(state.tokens) + + if isOrdered: + token = state.push("ordered_list_open", "ol", 1) + if markerValue != 1: + token.attrs = {"start": markerValue} + + else: + token = state.push("bullet_list_open", "ul", 1) + + token.map = listLines = [startLine, 0] + token.markup = chr(markerCharCode) + + # + # Iterate list items + # + + nextLine = startLine + prevEmptyEnd = False + terminatorRules = state.md.block.ruler.getRules("list") + + oldParentType = state.parentType + state.parentType = "list" + + while nextLine < endLine: + pos = posAfterMarker + maximum = state.eMarks[nextLine] + + initial = offset = ( + state.sCount[nextLine] + + posAfterMarker + - (state.bMarks[startLine] + state.tShift[startLine]) + ) + + while pos < maximum: + ch = state.srcCharCode[pos] + + if ch == 0x09: # \t + offset += 4 - (offset + state.bsCount[nextLine]) % 4 + elif ch == 0x20: # \s + offset += 1 + else: + break + + pos += 1 + + contentStart = pos + + if contentStart >= maximum: + # trimming space in "- \n 3" case, indent is 1 here + indentAfterMarker = 1 + else: + indentAfterMarker = offset - initial + + # If we have more than 4 spaces, the indent is 1 + # (the rest is just indented code block) + if indentAfterMarker > 4: + indentAfterMarker = 1 + + # " - test" + # ^^^^^ - calculating total length of this thing + indent = initial + indentAfterMarker + + # Run subparser & write tokens + token = state.push("list_item_open", "li", 1) + token.markup = chr(markerCharCode) + token.map = itemLines = [startLine, 0] + if isOrdered: + token.info = state.src[start : posAfterMarker - 1] + + # change current state, then restore it after parser subcall + oldTight = state.tight + oldTShift = state.tShift[startLine] + oldSCount = state.sCount[startLine] + + # - example list + # ^ listIndent position will be here + # ^ blkIndent position will be here + # + oldListIndent = state.listIndent + state.listIndent = state.blkIndent + state.blkIndent = indent + + state.tight = True + state.tShift[startLine] = contentStart - state.bMarks[startLine] + state.sCount[startLine] = offset + + if contentStart >= maximum and state.isEmpty(startLine + 1): + # workaround for this case + # (list item is empty, list terminates before "foo"): + # ~~~~~~~~ + # - + # + # foo + # ~~~~~~~~ + state.line = min(state.line + 2, endLine) + else: + # NOTE in list.js this was: + # state.md.block.tokenize(state, startLine, endLine, True) + # but tokeniz does not take the final parameter + state.md.block.tokenize(state, startLine, endLine) + + # If any of list item is tight, mark list as tight + if (not state.tight) or prevEmptyEnd: + tight = False + + # Item become loose if finish with empty line, + # but we should filter last element, because it means list finish + prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1) + + state.blkIndent = state.listIndent + state.listIndent = oldListIndent + state.tShift[startLine] = oldTShift + state.sCount[startLine] = oldSCount + state.tight = oldTight + + token = state.push("list_item_close", "li", -1) + token.markup = chr(markerCharCode) + + nextLine = startLine = state.line + itemLines[1] = nextLine + + if nextLine >= endLine: + break + + contentStart = state.bMarks[startLine] + + # + # Try to check if list is terminated or continued. + # + if state.sCount[nextLine] < state.blkIndent: + break + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + break + + # fail if terminating block found + terminate = False + for terminatorRule in terminatorRules: + if terminatorRule(state, nextLine, endLine, True): + terminate = True + break + + if terminate: + break + + # fail if list has another type + if isOrdered: + posAfterMarker = skipOrderedListMarker(state, nextLine) + if posAfterMarker < 0: + break + start = state.bMarks[nextLine] + state.tShift[nextLine] + else: + posAfterMarker = skipBulletListMarker(state, nextLine) + if posAfterMarker < 0: + break + + if markerCharCode != state.srcCharCode[posAfterMarker - 1]: + break + + # Finalize list + if isOrdered: + token = state.push("ordered_list_close", "ol", -1) + else: + token = state.push("bullet_list_close", "ul", -1) + + token.markup = chr(markerCharCode) + + listLines[1] = nextLine + state.line = nextLine + + state.parentType = oldParentType + + # mark paragraphs tight if needed + if tight: + markTightParagraphs(state, listTokIdx) + + return True diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py new file mode 100644 index 0000000..4fee83e --- /dev/null +++ b/markdown_it/rules_block/paragraph.py @@ -0,0 +1,67 @@ +"""Paragraph.""" +import logging + +from ..ruler import Ruler +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + + +def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool = False): + + LOGGER.debug( + "entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent + ) + + nextLine = startLine + 1 + ruler: Ruler = state.md.block.ruler + terminatorRules = ruler.getRules("paragraph") + endLine = state.lineMax + + oldParentType = state.parentType + state.parentType = "paragraph" + + # jump line-by-line until empty one or EOF + while nextLine < endLine: + if state.isEmpty(nextLine): + break + # this would be a code block normally, but after paragraph + # it's considered a lazy continuation regardless of what's there + if state.sCount[nextLine] - state.blkIndent > 3: + nextLine += 1 + continue + + # quirk for blockquotes, this line should already be checked by that rule + if state.sCount[nextLine] < 0: + nextLine += 1 + continue + + # Some tags can terminate paragraph without empty line. + terminate = False + for terminatorRule in terminatorRules: + if terminatorRule(state, nextLine, endLine, True): + terminate = True + break + + if terminate: + break + + nextLine += 1 + + content = state.getLines(startLine, nextLine, state.blkIndent, False).strip() + + state.line = nextLine + + token = state.push("paragraph_open", "p", 1) + token.map = [startLine, state.line] + + token = state.push("inline", "", 0) + token.content = content + token.map = [startLine, state.line] + token.children = [] + + token = state.push("paragraph_close", "p", -1) + + state.parentType = oldParentType + + return True diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py new file mode 100644 index 0000000..35adde2 --- /dev/null +++ b/markdown_it/rules_block/reference.py @@ -0,0 +1,218 @@ +import logging + +from ..common.utils import charCodeAt, isSpace, normalizeReference +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + + +def reference(state: StateBlock, startLine, _endLine, silent): + + LOGGER.debug( + "entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent + ) + + lines = 0 + pos = state.bMarks[startLine] + state.tShift[startLine] + maximum = state.eMarks[startLine] + nextLine = startLine + 1 + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + return False + + if state.srcCharCode[pos] != 0x5B: # /* [ */ + return False + + # Simple check to quickly interrupt scan on [link](url) at the start of line. + # Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54 + while pos < maximum: + # /* ] */ /* \ */ /* : */ + if state.srcCharCode[pos] == 0x5D and state.srcCharCode[pos - 1] != 0x5C: + if pos + 1 == maximum: + return False + if state.srcCharCode[pos + 1] != 0x3A: + return False + break + pos += 1 + + endLine = state.lineMax + + # jump line-by-line until empty one or EOF + terminatorRules = state.md.block.ruler.getRules("reference") + + oldParentType = state.parentType + state.parentType = "reference" + + while nextLine < endLine and not state.isEmpty(nextLine): + # this would be a code block normally, but after paragraph + # it's considered a lazy continuation regardless of what's there + if state.sCount[nextLine] - state.blkIndent > 3: + nextLine += 1 + continue + + # quirk for blockquotes, this line should already be checked by that rule + if state.sCount[nextLine] < 0: + nextLine += 1 + continue + + # Some tags can terminate paragraph without empty line. + terminate = False + for terminatorRule in terminatorRules: + if terminatorRule(state, nextLine, endLine, True): + terminate = True + break + + if terminate: + break + + nextLine += 1 + + string = state.getLines(startLine, nextLine, state.blkIndent, False).strip() + maximum = len(string) + + labelEnd = None + pos = 1 + while pos < maximum: + ch = charCodeAt(string, pos) + if ch == 0x5B: # /* [ */ + return False + elif ch == 0x5D: # /* ] */ + labelEnd = pos + break + elif ch == 0x0A: # /* \n */ + lines += 1 + elif ch == 0x5C: # /* \ */ + pos += 1 + if pos < maximum and charCodeAt(string, pos) == 0x0A: + lines += 1 + pos += 1 + + if ( + labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A + ): # /* : */ + return False + + # [label]: destination 'title' + # ^^^ skip optional whitespace here + pos = labelEnd + 2 + while pos < maximum: + ch = charCodeAt(string, pos) + if ch == 0x0A: + lines += 1 + elif isSpace(ch): + pass + else: + break + pos += 1 + + # [label]: destination 'title' + # ^^^^^^^^^^^ parse this + res = state.md.helpers.parseLinkDestination(string, pos, maximum) + if not res.ok: + return False + + href = state.md.normalizeLink(res.str) + if not state.md.validateLink(href): + return False + + pos = res.pos + lines += res.lines + + # save cursor state, we could require to rollback later + destEndPos = pos + destEndLineNo = lines + + # [label]: destination 'title' + # ^^^ skipping those spaces + start = pos + while pos < maximum: + ch = charCodeAt(string, pos) + if ch == 0x0A: + lines += 1 + elif isSpace(ch): + pass + else: + break + pos += 1 + + # [label]: destination 'title' + # ^^^^^^^ parse this + res = state.md.helpers.parseLinkTitle(string, pos, maximum) + if pos < maximum and start != pos and res.ok: + title = res.str + pos = res.pos + lines += res.lines + else: + title = "" + pos = destEndPos + lines = destEndLineNo + + # skip trailing spaces until the rest of the line + while pos < maximum: + ch = charCodeAt(string, pos) + if not isSpace(ch): + break + pos += 1 + + if pos < maximum and charCodeAt(string, pos) != 0x0A: + if title: + # garbage at the end of the line after title, + # but it could still be a valid reference if we roll back + title = "" + pos = destEndPos + lines = destEndLineNo + while pos < maximum: + ch = charCodeAt(string, pos) + if not isSpace(ch): + break + pos += 1 + + if pos < maximum and charCodeAt(string, pos) != 0x0A: + # garbage at the end of the line + return False + + label = normalizeReference(string[1:labelEnd]) + if not label: + # CommonMark 0.20 disallows empty labels + return False + + # Reference can not terminate anything. This check is for safety only. + if silent: + return True + + if "references" not in state.env: + state.env["references"] = {} + + state.line = startLine + lines + 1 + + # note, this is not part of markdown-it JS, but is useful for renderers + if state.md.options.get("inline_definitions", False): + token = state.push("definition", "", 0) + token.meta = { + "id": label, + "title": title, + "url": href, + "label": string[1:labelEnd], + } + token.map = [startLine, state.line] + + if label not in state.env["references"]: + state.env["references"][label] = { + "title": title, + "href": href, + "map": [startLine, state.line], + } + else: + state.env.setdefault("duplicate_refs", []).append( + { + "title": title, + "href": href, + "label": label, + "map": [startLine, state.line], + } + ) + + state.parentType = oldParentType + + return True diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py new file mode 100644 index 0000000..42b8fce --- /dev/null +++ b/markdown_it/rules_block/state_block.py @@ -0,0 +1,230 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ..common.utils import isSpace +from ..ruler import StateBase +from ..token import Token + +if TYPE_CHECKING: + from markdown_it.main import MarkdownIt + + +class StateBlock(StateBase): + def __init__( + self, + src: str, + md: MarkdownIt, + env, + tokens: list[Token], + srcCharCode: tuple[int, ...] | None = None, + ): + + if srcCharCode is not None: + self._src = src + self.srcCharCode = srcCharCode + else: + self.src = src + + # link to parser instance + self.md = md + + self.env = env + + # + # Internal state variables + # + + self.tokens = tokens + + self.bMarks = [] # line begin offsets for fast jumps + self.eMarks = [] # line end offsets for fast jumps + # offsets of the first non-space characters (tabs not expanded) + self.tShift = [] + self.sCount = [] # indents for each line (tabs expanded) + + # An amount of virtual spaces (tabs expanded) between beginning + # of each line (bMarks) and real beginning of that line. + # + # It exists only as a hack because blockquotes override bMarks + # losing information in the process. + # + # It's used only when expanding tabs, you can think about it as + # an initial tab length, e.g. bsCount=21 applied to string `\t123` + # means first tab should be expanded to 4-21%4 === 3 spaces. + # + self.bsCount = [] + + # block parser variables + self.blkIndent = 0 # required block content indent (for example, if we are + # inside a list, it would be positioned after list marker) + self.line = 0 # line index in src + self.lineMax = 0 # lines count + self.tight = False # loose/tight mode for lists + self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any) + self.listIndent = -1 # indent of the current list block (-1 if there isn't any) + + # can be 'blockquote', 'list', 'root', 'paragraph' or 'reference' + # used in lists to determine if they interrupt a paragraph + self.parentType = "root" + + self.level = 0 + + # renderer + self.result = "" + + # Create caches + # Generate markers. + indent_found = False + + start = pos = indent = offset = 0 + length = len(self.src) + + for pos, character in enumerate(self.srcCharCode): + if not indent_found: + if isSpace(character): + indent += 1 + + if character == 0x09: + offset += 4 - offset % 4 + else: + offset += 1 + continue + else: + indent_found = True + + if character == 0x0A or pos == length - 1: + if character != 0x0A: + pos += 1 + self.bMarks.append(start) + self.eMarks.append(pos) + self.tShift.append(indent) + self.sCount.append(offset) + self.bsCount.append(0) + + indent_found = False + indent = 0 + offset = 0 + start = pos + 1 + + # Push fake entry to simplify cache bounds checks + self.bMarks.append(length) + self.eMarks.append(length) + self.tShift.append(0) + self.sCount.append(0) + self.bsCount.append(0) + + self.lineMax = len(self.bMarks) - 1 # don't count last fake line + + def __repr__(self): + return ( + f"{self.__class__.__name__}" + f"(line={self.line},level={self.level},tokens={len(self.tokens)})" + ) + + def push(self, ttype: str, tag: str, nesting: int) -> Token: + """Push new token to "stream".""" + token = Token(ttype, tag, nesting) + token.block = True + if nesting < 0: + self.level -= 1 # closing tag + token.level = self.level + if nesting > 0: + self.level += 1 # opening tag + self.tokens.append(token) + return token + + def isEmpty(self, line: int) -> bool: + """.""" + return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line] + + def skipEmptyLines(self, from_pos: int) -> int: + """.""" + while from_pos < self.lineMax: + try: + if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[ + from_pos + ]: + break + except IndexError: + pass + from_pos += 1 + return from_pos + + def skipSpaces(self, pos: int) -> int: + """Skip spaces from given position.""" + while pos < len(self.src): + if not isSpace(self.srcCharCode[pos]): + break + pos += 1 + return pos + + def skipSpacesBack(self, pos: int, minimum: int) -> int: + """Skip spaces from given position in reverse.""" + if pos <= minimum: + return pos + while pos > minimum: + pos -= 1 + if not isSpace(self.srcCharCode[pos]): + return pos + 1 + return pos + + def skipChars(self, pos: int, code: int) -> int: + """Skip char codes from given position.""" + while pos < len(self.src): + if self.srcCharCode[pos] != code: + break + pos += 1 + return pos + + def skipCharsBack(self, pos: int, code: int, minimum: int) -> int: + """Skip char codes reverse from given position - 1.""" + if pos <= minimum: + return pos + while pos > minimum: + pos -= 1 + if code != self.srcCharCode[pos]: + return pos + 1 + return pos + + def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: + """Cut lines range from source.""" + line = begin + if begin >= end: + return "" + + queue = [""] * (end - begin) + + i = 1 + while line < end: + lineIndent = 0 + lineStart = first = self.bMarks[line] + if line + 1 < end or keepLastLF: + last = self.eMarks[line] + 1 + else: + last = self.eMarks[line] + + while (first < last) and (lineIndent < indent): + ch = self.srcCharCode[first] + if isSpace(ch): + if ch == 0x09: + lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4 + else: + lineIndent += 1 + elif first - lineStart < self.tShift[line]: + lineIndent += 1 + else: + break + first += 1 + + if lineIndent > indent: + # partially expanding tabs in code blocks, e.g '\t\tfoobar' + # with indent=2 becomes ' \tfoobar' + queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last] + else: + queue[i - 1] = self.src[first:last] + + line += 1 + i += 1 + + return "".join(queue) diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py new file mode 100644 index 0000000..e3db858 --- /dev/null +++ b/markdown_it/rules_block/table.py @@ -0,0 +1,238 @@ +# GFM table, https://github.github.com/gfm/#tables-extension- +import re + +from ..common.utils import charCodeAt, isSpace +from .state_block import StateBlock + +headerLineRe = re.compile(r"^:?-+:?$") +enclosingPipesRe = re.compile(r"^\||\|$") + + +def getLine(state: StateBlock, line: int): + pos = state.bMarks[line] + state.tShift[line] + maximum = state.eMarks[line] + + # return state.src.substr(pos, max - pos) + return state.src[pos:maximum] + + +def escapedSplit(string): + result = [] + pos = 0 + max = len(string) + isEscaped = False + lastPos = 0 + current = "" + ch = charCodeAt(string, pos) + + while pos < max: + if ch == 0x7C: # /* | */ + if not isEscaped: + # pipe separating cells, '|' + result.append(current + string[lastPos:pos]) + current = "" + lastPos = pos + 1 + else: + # escaped pipe, '\|' + current += string[lastPos : pos - 1] + lastPos = pos + + isEscaped = ch == 0x5C # /* \ */ + pos += 1 + + ch = charCodeAt(string, pos) + + result.append(current + string[lastPos:]) + + return result + + +def table(state: StateBlock, startLine: int, endLine: int, silent: bool): + tbodyLines = None + + # should have at least two lines + if startLine + 2 > endLine: + return False + + nextLine = startLine + 1 + + if state.sCount[nextLine] < state.blkIndent: + return False + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[nextLine] - state.blkIndent >= 4: + return False + + # first character of the second line should be '|', '-', ':', + # and no other characters are allowed but spaces; + # basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp + + pos = state.bMarks[nextLine] + state.tShift[nextLine] + if pos >= state.eMarks[nextLine]: + return False + first_ch = state.srcCharCode[pos] + pos += 1 + if first_ch not in {0x7C, 0x2D, 0x3A}: # not in {"|", "-", ":"} + return False + + if pos >= state.eMarks[nextLine]: + return False + second_ch = state.srcCharCode[pos] + pos += 1 + # not in {"|", "-", ":"} and not space + if second_ch not in {0x7C, 0x2D, 0x3A} and not isSpace(second_ch): + return False + + # if first character is '-', then second character must not be a space + # (due to parsing ambiguity with list) + if first_ch == 0x2D and isSpace(second_ch): + return False + + while pos < state.eMarks[nextLine]: + ch = state.srcCharCode[pos] + + # /* | */ /* - */ /* : */ + if ch not in {0x7C, 0x2D, 0x3A} and not isSpace(ch): + return False + + pos += 1 + + lineText = getLine(state, startLine + 1) + + columns = lineText.split("|") + aligns = [] + for i in range(len(columns)): + t = columns[i].strip() + if not t: + # allow empty columns before and after table, but not in between columns; + # e.g. allow ` |---| `, disallow ` ---||--- ` + if i == 0 or i == len(columns) - 1: + continue + else: + return False + + if not headerLineRe.search(t): + return False + if charCodeAt(t, len(t) - 1) == 0x3A: # /* : */ + # /* : */ + aligns.append("center" if charCodeAt(t, 0) == 0x3A else "right") + elif charCodeAt(t, 0) == 0x3A: # /* : */ + aligns.append("left") + else: + aligns.append("") + + lineText = getLine(state, startLine).strip() + if "|" not in lineText: + return False + if state.sCount[startLine] - state.blkIndent >= 4: + return False + columns = escapedSplit(lineText) + if columns and columns[0] == "": + columns.pop(0) + if columns and columns[-1] == "": + columns.pop() + + # header row will define an amount of columns in the entire table, + # and align row should be exactly the same (the rest of the rows can differ) + columnCount = len(columns) + if columnCount == 0 or columnCount != len(aligns): + return False + + if silent: + return True + + oldParentType = state.parentType + state.parentType = "table" + + # use 'blockquote' lists for termination because it's + # the most similar to tables + terminatorRules = state.md.block.ruler.getRules("blockquote") + + token = state.push("table_open", "table", 1) + token.map = tableLines = [startLine, 0] + + token = state.push("thead_open", "thead", 1) + token.map = [startLine, startLine + 1] + + token = state.push("tr_open", "tr", 1) + token.map = [startLine, startLine + 1] + + for i in range(len(columns)): + token = state.push("th_open", "th", 1) + if aligns[i]: + token.attrs = {"style": "text-align:" + aligns[i]} + + token = state.push("inline", "", 0) + # note in markdown-it this map was removed in v12.0.0 however, we keep it, + # since it is helpful to propagate to children tokens + token.map = [startLine, startLine + 1] + token.content = columns[i].strip() + token.children = [] + + token = state.push("th_close", "th", -1) + + token = state.push("tr_close", "tr", -1) + token = state.push("thead_close", "thead", -1) + + nextLine = startLine + 2 + while nextLine < endLine: + if state.sCount[nextLine] < state.blkIndent: + break + + terminate = False + for i in range(len(terminatorRules)): + if terminatorRules[i](state, nextLine, endLine, True): + terminate = True + break + + if terminate: + break + lineText = getLine(state, nextLine).strip() + if not lineText: + break + if state.sCount[nextLine] - state.blkIndent >= 4: + break + columns = escapedSplit(lineText) + if columns and columns[0] == "": + columns.pop(0) + if columns and columns[-1] == "": + columns.pop() + + if nextLine == startLine + 2: + token = state.push("tbody_open", "tbody", 1) + token.map = tbodyLines = [startLine + 2, 0] + + token = state.push("tr_open", "tr", 1) + token.map = [nextLine, nextLine + 1] + + for i in range(columnCount): + token = state.push("td_open", "td", 1) + if aligns[i]: + token.attrs = {"style": "text-align:" + aligns[i]} + + token = state.push("inline", "", 0) + # note in markdown-it this map was removed in v12.0.0 however, we keep it, + # since it is helpful to propagate to children tokens + token.map = [nextLine, nextLine + 1] + try: + token.content = columns[i].strip() if columns[i] else "" + except IndexError: + token.content = "" + token.children = [] + + token = state.push("td_close", "td", -1) + + token = state.push("tr_close", "tr", -1) + + nextLine += 1 + + if tbodyLines: + token = state.push("tbody_close", "tbody", -1) + tbodyLines[1] = nextLine + + token = state.push("table_close", "table", -1) + + tableLines[1] = nextLine + state.parentType = oldParentType + state.line = nextLine + return True |