From 12e8343068b906f8b2afddc5569968a8a91fa5b0 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 29 Apr 2024 06:24:24 +0200 Subject: Adding upstream version 2.1.0. Signed-off-by: Daniel Baumann --- markdown_it/rules_block/html_block.py | 91 +++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 markdown_it/rules_block/html_block.py (limited to 'markdown_it/rules_block/html_block.py') diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py new file mode 100644 index 0000000..31afab7 --- /dev/null +++ b/markdown_it/rules_block/html_block.py @@ -0,0 +1,91 @@ +# HTML block +from __future__ import annotations + +import logging +import re + +from ..common.html_blocks import block_names +from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR +from .state_block import StateBlock + +LOGGER = logging.getLogger(__name__) + +# An array of opening and corresponding closing sequences for html tags, +# last argument defines whether it can terminate a paragraph or not +HTML_SEQUENCES: list[tuple[re.Pattern, re.Pattern, bool]] = [ + ( + re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE), + re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE), + True, + ), + (re.compile(r"^"), True), + (re.compile(r"^<\?"), re.compile(r"\?>"), True), + (re.compile(r"^"), True), + (re.compile(r"^"), True), + ( + re.compile("^|$))", re.IGNORECASE), + re.compile(r"^$"), + True, + ), + (re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False), +] + + +def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool): + LOGGER.debug( + "entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent + ) + pos = state.bMarks[startLine] + state.tShift[startLine] + maximum = state.eMarks[startLine] + + # if it's indented more than 3 spaces, it should be a code block + if state.sCount[startLine] - state.blkIndent >= 4: + return False + + if not state.md.options.get("html", None): + return False + + if state.srcCharCode[pos] != 0x3C: # /* < */ + return False + + lineText = state.src[pos:maximum] + + html_seq = None + for HTML_SEQUENCE in HTML_SEQUENCES: + if HTML_SEQUENCE[0].search(lineText): + html_seq = HTML_SEQUENCE + break + + if not html_seq: + return False + + if silent: + # true if this sequence can be a terminator, false otherwise + return html_seq[2] + + nextLine = startLine + 1 + + # If we are here - we detected HTML block. + # Let's roll down till block end. + if not html_seq[1].search(lineText): + while nextLine < endLine: + if state.sCount[nextLine] < state.blkIndent: + break + + pos = state.bMarks[nextLine] + state.tShift[nextLine] + maximum = state.eMarks[nextLine] + lineText = state.src[pos:maximum] + + if html_seq[1].search(lineText): + if len(lineText) != 0: + nextLine += 1 + break + nextLine += 1 + + state.line = nextLine + + token = state.push("html_block", "", 0) + token.map = [startLine, nextLine] + token.content = state.getLines(startLine, nextLine, state.blkIndent, True) + + return True -- cgit v1.2.3