summaryrefslogtreecommitdiffstats
path: root/markdown_it/rules_block/html_block.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown_it/rules_block/html_block.py')
-rw-r--r--markdown_it/rules_block/html_block.py91
1 files changed, 91 insertions, 0 deletions
diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py
new file mode 100644
index 0000000..31afab7
--- /dev/null
+++ b/markdown_it/rules_block/html_block.py
@@ -0,0 +1,91 @@
+# HTML block
+from __future__ import annotations
+
+import logging
+import re
+
+from ..common.html_blocks import block_names
+from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+# An array of opening and corresponding closing sequences for html tags,
+# last argument defines whether it can terminate a paragraph or not
+HTML_SEQUENCES: list[tuple[re.Pattern, re.Pattern, bool]] = [
+ (
+ re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
+ re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
+ True,
+ ),
+ (re.compile(r"^<!--"), re.compile(r"-->"), True),
+ (re.compile(r"^<\?"), re.compile(r"\?>"), True),
+ (re.compile(r"^<![A-Z]"), re.compile(r">"), True),
+ (re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
+ (
+ re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
+ re.compile(r"^$"),
+ True,
+ ),
+ (re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
+]
+
+
+def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
+ LOGGER.debug(
+ "entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
+ )
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ if not state.md.options.get("html", None):
+ return False
+
+ if state.srcCharCode[pos] != 0x3C: # /* < */
+ return False
+
+ lineText = state.src[pos:maximum]
+
+ html_seq = None
+ for HTML_SEQUENCE in HTML_SEQUENCES:
+ if HTML_SEQUENCE[0].search(lineText):
+ html_seq = HTML_SEQUENCE
+ break
+
+ if not html_seq:
+ return False
+
+ if silent:
+ # true if this sequence can be a terminator, false otherwise
+ return html_seq[2]
+
+ nextLine = startLine + 1
+
+ # If we are here - we detected HTML block.
+ # Let's roll down till block end.
+ if not html_seq[1].search(lineText):
+ while nextLine < endLine:
+ if state.sCount[nextLine] < state.blkIndent:
+ break
+
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ maximum = state.eMarks[nextLine]
+ lineText = state.src[pos:maximum]
+
+ if html_seq[1].search(lineText):
+ if len(lineText) != 0:
+ nextLine += 1
+ break
+ nextLine += 1
+
+ state.line = nextLine
+
+ token = state.push("html_block", "", 0)
+ token.map = [startLine, nextLine]
+ token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
+
+ return True