markdown_it/parser_block.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

"""Block-level tokenizer."""
from __future__ import annotations

import logging

from . import rules_block
from .ruler import Ruler
from .rules_block.state_block import StateBlock
from .token import Token

LOGGER = logging.getLogger(__name__)


_rules: list[tuple] = [
    # First 2 params - rule name & source. Secondary array - list of rules,
    # which can be terminated by this one.
    ("table", rules_block.table, ["paragraph", "reference"]),
    ("code", rules_block.code),
    ("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
    (
        "blockquote",
        rules_block.blockquote,
        ["paragraph", "reference", "blockquote", "list"],
    ),
    ("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
    ("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
    ("reference", rules_block.reference),
    ("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
    ("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
    ("lheading", rules_block.lheading),
    ("paragraph", rules_block.paragraph),
]


class ParserBlock:
    """
    ParserBlock#ruler -> Ruler

    [[Ruler]] instance. Keep configuration of block rules.
    """

    def __init__(self):
        self.ruler = Ruler()
        for data in _rules:
            name = data[0]
            rule = data[1]
            self.ruler.push(name, rule, {"alt": data[2] if len(data) > 2 else []})

    def tokenize(
        self, state: StateBlock, startLine: int, endLine: int, silent: bool = False
    ) -> None:
        """Generate tokens for input range."""
        rules = self.ruler.getRules("")
        line = startLine
        maxNesting = state.md.options.maxNesting
        hasEmptyLines = False

        while line < endLine:
            state.line = line = state.skipEmptyLines(line)
            if line >= endLine:
                break
            if state.sCount[line] < state.blkIndent:
                # Termination condition for nested calls.
                # Nested calls currently used for blockquotes & lists
                break
            if state.level >= maxNesting:
                # If nesting level exceeded - skip tail to the end.
                # That's not ordinary situation and we should not care about content.
                state.line = endLine
                break

            # Try all possible rules.
            # On success, rule should:
            # - update `state.line`
            # - update `state.tokens`
            # - return True
            for rule in rules:
                if rule(state, line, endLine, False):
                    break

            # set state.tight if we had an empty line before current tag
            # i.e. latest empty line should not count
            state.tight = not hasEmptyLines

            line = state.line

            # paragraph might "eat" one newline after it in nested lists
            if (line - 1) < endLine and state.isEmpty(line - 1):
                hasEmptyLines = True

            if line < endLine and state.isEmpty(line):
                hasEmptyLines = True
                line += 1
                state.line = line

    def parse(
        self,
        src: str,
        md,
        env,
        outTokens: list[Token],
        ords: tuple[int, ...] | None = None,
    ) -> list[Token] | None:
        """Process input string and push block tokens into `outTokens`."""
        if not src:
            return None
        state = StateBlock(src, md, env, outTokens, ords)
        self.tokenize(state, state.line, state.lineMax)
        return state.tokens