summaryrefslogtreecommitdiffstats
path: root/markdown_it/parser_inline.py
diff options
context:
space:
mode:
Diffstat (limited to 'markdown_it/parser_inline.py')
-rw-r--r--markdown_it/parser_inline.py124
1 files changed, 124 insertions, 0 deletions
diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py
new file mode 100644
index 0000000..b61c990
--- /dev/null
+++ b/markdown_it/parser_inline.py
@@ -0,0 +1,124 @@
+"""Tokenizes paragraph content.
+"""
+from __future__ import annotations
+
+from . import rules_inline
+from .ruler import RuleFunc, Ruler
+from .rules_inline.state_inline import StateInline
+from .token import Token
+
+# Parser rules
+_rules: list[tuple[str, RuleFunc]] = [
+ ("text", rules_inline.text),
+ ("newline", rules_inline.newline),
+ ("escape", rules_inline.escape),
+ ("backticks", rules_inline.backtick),
+ ("strikethrough", rules_inline.strikethrough.tokenize),
+ ("emphasis", rules_inline.emphasis.tokenize),
+ ("link", rules_inline.link),
+ ("image", rules_inline.image),
+ ("autolink", rules_inline.autolink),
+ ("html_inline", rules_inline.html_inline),
+ ("entity", rules_inline.entity),
+]
+
+_rules2: list[tuple[str, RuleFunc]] = [
+ ("balance_pairs", rules_inline.link_pairs),
+ ("strikethrough", rules_inline.strikethrough.postProcess),
+ ("emphasis", rules_inline.emphasis.postProcess),
+ ("text_collapse", rules_inline.text_collapse),
+]
+
+
+class ParserInline:
+ def __init__(self):
+ self.ruler = Ruler()
+ for name, rule in _rules:
+ self.ruler.push(name, rule)
+ # Second ruler used for post-processing (e.g. in emphasis-like rules)
+ self.ruler2 = Ruler()
+ for name, rule2 in _rules2:
+ self.ruler2.push(name, rule2)
+
+ def skipToken(self, state: StateInline) -> None:
+ """Skip single token by running all rules in validation mode;
+ returns `True` if any rule reported success
+ """
+ ok = False
+ pos = state.pos
+ rules = self.ruler.getRules("")
+ maxNesting = state.md.options["maxNesting"]
+ cache = state.cache
+
+ if pos in cache:
+ state.pos = cache[pos]
+ return
+
+ if state.level < maxNesting:
+ for rule in rules:
+ # Increment state.level and decrement it later to limit recursion.
+ # It's harmless to do here, because no tokens are created.
+ # But ideally, we'd need a separate private state variable for this purpose.
+ state.level += 1
+ ok = rule(state, True)
+ state.level -= 1
+ if ok:
+ break
+ else:
+ # Too much nesting, just skip until the end of the paragraph.
+ #
+ # NOTE: this will cause links to behave incorrectly in the following case,
+ # when an amount of `[` is exactly equal to `maxNesting + 1`:
+ #
+ # [[[[[[[[[[[[[[[[[[[[[foo]()
+ #
+ # TODO: remove this workaround when CM standard will allow nested links
+ # (we can replace it by preventing links from being parsed in
+ # validation mode)
+ #
+ state.pos = state.posMax
+
+ if not ok:
+ state.pos += 1
+ cache[pos] = state.pos
+
+ def tokenize(self, state: StateInline) -> None:
+ """Generate tokens for input range."""
+ ok = False
+ rules = self.ruler.getRules("")
+ end = state.posMax
+ maxNesting = state.md.options["maxNesting"]
+
+ while state.pos < end:
+ # Try all possible rules.
+ # On success, rule should:
+ #
+ # - update `state.pos`
+ # - update `state.tokens`
+ # - return true
+
+ if state.level < maxNesting:
+ for rule in rules:
+ ok = rule(state, False)
+ if ok:
+ break
+
+ if ok:
+ if state.pos >= end:
+ break
+ continue
+
+ state.pending += state.src[state.pos]
+ state.pos += 1
+
+ if state.pending:
+ state.pushPending()
+
+ def parse(self, src: str, md, env, tokens: list[Token]) -> list[Token]:
+ """Process input string and push inline tokens into `tokens`"""
+ state = StateInline(src, md, env, tokens)
+ self.tokenize(state)
+ rules2 = self.ruler2.getRules("")
+ for rule in rules2:
+ rule(state)
+ return state.tokens