summaryrefslogtreecommitdiffstats
path: root/markdown_it
diff options
context:
space:
mode:
Diffstat (limited to 'markdown_it')
-rw-r--r--markdown_it/__init__.py5
-rw-r--r--markdown_it/_compat.py10
-rw-r--r--markdown_it/_punycode.py66
-rw-r--r--markdown_it/cli/__init__.py0
-rw-r--r--markdown_it/cli/parse.py109
-rw-r--r--markdown_it/common/__init__.py0
-rw-r--r--markdown_it/common/entities.py4
-rw-r--r--markdown_it/common/html_blocks.py68
-rw-r--r--markdown_it/common/html_re.py40
-rw-r--r--markdown_it/common/normalize_url.py82
-rw-r--r--markdown_it/common/utils.py334
-rw-r--r--markdown_it/helpers/__init__.py6
-rw-r--r--markdown_it/helpers/parse_link_destination.py86
-rw-r--r--markdown_it/helpers/parse_link_label.py44
-rw-r--r--markdown_it/helpers/parse_link_title.py60
-rw-r--r--markdown_it/main.py331
-rw-r--r--markdown_it/parser_block.py109
-rw-r--r--markdown_it/parser_core.py32
-rw-r--r--markdown_it/parser_inline.py124
-rw-r--r--markdown_it/port.yaml49
-rw-r--r--markdown_it/presets/__init__.py27
-rw-r--r--markdown_it/presets/commonmark.py73
-rw-r--r--markdown_it/presets/default.py34
-rw-r--r--markdown_it/presets/zero.py39
-rw-r--r--markdown_it/py.typed1
-rw-r--r--markdown_it/renderer.py339
-rw-r--r--markdown_it/ruler.py237
-rw-r--r--markdown_it/rules_block/__init__.py27
-rw-r--r--markdown_it/rules_block/blockquote.py299
-rw-r--r--markdown_it/rules_block/code.py36
-rw-r--r--markdown_it/rules_block/fence.py104
-rw-r--r--markdown_it/rules_block/heading.py72
-rw-r--r--markdown_it/rules_block/hr.py54
-rw-r--r--markdown_it/rules_block/html_block.py91
-rw-r--r--markdown_it/rules_block/lheading.py90
-rw-r--r--markdown_it/rules_block/list.py344
-rw-r--r--markdown_it/rules_block/paragraph.py67
-rw-r--r--markdown_it/rules_block/reference.py218
-rw-r--r--markdown_it/rules_block/state_block.py230
-rw-r--r--markdown_it/rules_block/table.py238
-rw-r--r--markdown_it/rules_core/__init__.py17
-rw-r--r--markdown_it/rules_core/block.py16
-rw-r--r--markdown_it/rules_core/inline.py10
-rw-r--r--markdown_it/rules_core/linkify.py141
-rw-r--r--markdown_it/rules_core/normalize.py19
-rw-r--r--markdown_it/rules_core/replacements.py125
-rw-r--r--markdown_it/rules_core/smartquotes.py202
-rw-r--r--markdown_it/rules_core/state_core.py25
-rw-r--r--markdown_it/rules_inline/__init__.py29
-rw-r--r--markdown_it/rules_inline/autolink.py78
-rw-r--r--markdown_it/rules_inline/backticks.py75
-rw-r--r--markdown_it/rules_inline/balance_pairs.py114
-rw-r--r--markdown_it/rules_inline/emphasis.py102
-rw-r--r--markdown_it/rules_inline/entity.py54
-rw-r--r--markdown_it/rules_inline/escape.py49
-rw-r--r--markdown_it/rules_inline/html_inline.py43
-rw-r--r--markdown_it/rules_inline/image.py151
-rw-r--r--markdown_it/rules_inline/link.py150
-rw-r--r--markdown_it/rules_inline/newline.py43
-rw-r--r--markdown_it/rules_inline/state_inline.py175
-rw-r--r--markdown_it/rules_inline/strikethrough.py133
-rw-r--r--markdown_it/rules_inline/text.py57
-rw-r--r--markdown_it/rules_inline/text_collapse.py43
-rw-r--r--markdown_it/token.py181
-rw-r--r--markdown_it/tree.py330
-rw-r--r--markdown_it/utils.py122
66 files changed, 6663 insertions, 0 deletions
diff --git a/markdown_it/__init__.py b/markdown_it/__init__.py
new file mode 100644
index 0000000..5cc232a
--- /dev/null
+++ b/markdown_it/__init__.py
@@ -0,0 +1,5 @@
+"""A Python port of Markdown-It"""
+__all__ = ("MarkdownIt",)
+__version__ = "2.1.0"
+
+from .main import MarkdownIt
diff --git a/markdown_it/_compat.py b/markdown_it/_compat.py
new file mode 100644
index 0000000..12df1aa
--- /dev/null
+++ b/markdown_it/_compat.py
@@ -0,0 +1,10 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+import sys
+from typing import Any
+
+if sys.version_info >= (3, 10):
+ DATACLASS_KWARGS: Mapping[str, Any] = {"slots": True}
+else:
+ DATACLASS_KWARGS: Mapping[str, Any] = {}
diff --git a/markdown_it/_punycode.py b/markdown_it/_punycode.py
new file mode 100644
index 0000000..9ad2442
--- /dev/null
+++ b/markdown_it/_punycode.py
@@ -0,0 +1,66 @@
+# Copyright 2014 Mathias Bynens <https://mathiasbynens.be/>
+# Copyright 2021 Taneli Hukkinen
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import codecs
+import re
+
+REGEX_SEPARATORS = re.compile(r"[\x2E\u3002\uFF0E\uFF61]")
+REGEX_NON_ASCII = re.compile(r"[^\0-\x7E]")
+
+
+def encode(uni: str) -> str:
+ return codecs.encode(uni, encoding="punycode").decode()
+
+
+def decode(ascii: str) -> str:
+ return codecs.decode(ascii, encoding="punycode") # type: ignore[call-overload]
+
+
+def map_domain(string, fn):
+ parts = string.split("@")
+ result = ""
+ if len(parts) > 1:
+ # In email addresses, only the domain name should be punycoded. Leave
+ # the local part (i.e. everything up to `@`) intact.
+ result = parts[0] + "@"
+ string = parts[1]
+ labels = REGEX_SEPARATORS.split(string)
+ encoded = ".".join(fn(label) for label in labels)
+ return result + encoded
+
+
+def to_unicode(obj: str) -> str:
+ def mapping(obj: str) -> str:
+ if obj.startswith("xn--"):
+ return decode(obj[4:].lower())
+ return obj
+
+ return map_domain(obj, mapping)
+
+
+def to_ascii(obj: str) -> str:
+ def mapping(obj: str) -> str:
+ if REGEX_NON_ASCII.search(obj):
+ return "xn--" + encode(obj)
+ return obj
+
+ return map_domain(obj, mapping)
diff --git a/markdown_it/cli/__init__.py b/markdown_it/cli/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/markdown_it/cli/__init__.py
diff --git a/markdown_it/cli/parse.py b/markdown_it/cli/parse.py
new file mode 100644
index 0000000..2d74f55
--- /dev/null
+++ b/markdown_it/cli/parse.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+"""
+CLI interface to markdown-it-py
+
+Parse one or more markdown files, convert each to HTML, and print to stdout.
+"""
+from __future__ import annotations
+
+import argparse
+from collections.abc import Iterable, Sequence
+import sys
+
+from markdown_it import __version__
+from markdown_it.main import MarkdownIt
+
+version_str = "markdown-it-py [version {}]".format(__version__)
+
+
+def main(args: Sequence[str] | None = None) -> int:
+ namespace = parse_args(args)
+ if namespace.filenames:
+ convert(namespace.filenames)
+ else:
+ interactive()
+ return 0
+
+
+def convert(filenames: Iterable[str]) -> None:
+ for filename in filenames:
+ convert_file(filename)
+
+
+def convert_file(filename: str) -> None:
+ """
+ Parse a Markdown file and dump the output to stdout.
+ """
+ try:
+ with open(filename, "r") as fin:
+ rendered = MarkdownIt().render(fin.read())
+ print(rendered, end="")
+ except OSError:
+ sys.stderr.write(f'Cannot open file "{filename}".\n')
+ sys.exit(1)
+
+
+def interactive() -> None:
+ """
+ Parse user input, dump to stdout, rinse and repeat.
+ Python REPL style.
+ """
+ print_heading()
+ contents = []
+ more = False
+ while True:
+ try:
+ prompt, more = ("... ", True) if more else (">>> ", True)
+ contents.append(input(prompt) + "\n")
+ except EOFError:
+ print("\n" + MarkdownIt().render("\n".join(contents)), end="")
+ more = False
+ contents = []
+ except KeyboardInterrupt:
+ print("\nExiting.")
+ break
+
+
+def parse_args(args: Sequence[str] | None) -> argparse.Namespace:
+ """Parse input CLI arguments."""
+ parser = argparse.ArgumentParser(
+ description="Parse one or more markdown files, "
+ "convert each to HTML, and print to stdout",
+ # NOTE: Remember to update README.md w/ the output of `markdown-it -h`
+ epilog=(
+ f"""
+Interactive:
+
+ $ markdown-it
+ markdown-it-py [version {__version__}] (interactive)
+ Type Ctrl-D to complete input, or Ctrl-C to exit.
+ >>> # Example
+ ... > markdown *input*
+ ...
+ <h1>Example</h1>
+ <blockquote>
+ <p>markdown <em>input</em></p>
+ </blockquote>
+
+Batch:
+
+ $ markdown-it README.md README.footer.md > index.html
+"""
+ ),
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+ parser.add_argument("-v", "--version", action="version", version=version_str)
+ parser.add_argument(
+ "filenames", nargs="*", help="specify an optional list of files to convert"
+ )
+ return parser.parse_args(args)
+
+
+def print_heading() -> None:
+ print("{} (interactive)".format(version_str))
+ print("Type Ctrl-D to complete input, or Ctrl-C to exit.")
+
+
+if __name__ == "__main__":
+ exit_code = main(sys.argv[1:])
+ sys.exit(exit_code)
diff --git a/markdown_it/common/__init__.py b/markdown_it/common/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/markdown_it/common/__init__.py
diff --git a/markdown_it/common/entities.py b/markdown_it/common/entities.py
new file mode 100644
index 0000000..6bb2d34
--- /dev/null
+++ b/markdown_it/common/entities.py
@@ -0,0 +1,4 @@
+"""HTML5 entities map: { name -> characters }."""
+import html.entities
+
+entities = {name.rstrip(";"): chars for name, chars in html.entities.html5.items()}
diff --git a/markdown_it/common/html_blocks.py b/markdown_it/common/html_blocks.py
new file mode 100644
index 0000000..8b199af
--- /dev/null
+++ b/markdown_it/common/html_blocks.py
@@ -0,0 +1,68 @@
+"""List of valid html blocks names, according to commonmark spec
+http://jgm.github.io/CommonMark/spec.html#html-blocks
+"""
+
+block_names = [
+ "address",
+ "article",
+ "aside",
+ "base",
+ "basefont",
+ "blockquote",
+ "body",
+ "caption",
+ "center",
+ "col",
+ "colgroup",
+ "dd",
+ "details",
+ "dialog",
+ "dir",
+ "div",
+ "dl",
+ "dt",
+ "fieldset",
+ "figcaption",
+ "figure",
+ "footer",
+ "form",
+ "frame",
+ "frameset",
+ "h1",
+ "h2",
+ "h3",
+ "h4",
+ "h5",
+ "h6",
+ "head",
+ "header",
+ "hr",
+ "html",
+ "iframe",
+ "legend",
+ "li",
+ "link",
+ "main",
+ "menu",
+ "menuitem",
+ "nav",
+ "noframes",
+ "ol",
+ "optgroup",
+ "option",
+ "p",
+ "param",
+ "section",
+ "source",
+ "summary",
+ "table",
+ "tbody",
+ "td",
+ "tfoot",
+ "th",
+ "thead",
+ "title",
+ "tr",
+ "track",
+ "ul",
+]
diff --git a/markdown_it/common/html_re.py b/markdown_it/common/html_re.py
new file mode 100644
index 0000000..f0c336d
--- /dev/null
+++ b/markdown_it/common/html_re.py
@@ -0,0 +1,40 @@
+"""Regexps to match html elements
+"""
+
+import re
+
+attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
+
+unquoted = "[^\"'=<>`\\x00-\\x20]+"
+single_quoted = "'[^']*'"
+double_quoted = '"[^"]*"'
+
+attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")"
+
+attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)"
+
+open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>"
+
+close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>"
+comment = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->"
+processing = "<[?][\\s\\S]*?[?]>"
+declaration = "<![A-Z]+\\s+[^>]*>"
+cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
+
+HTML_TAG_RE = re.compile(
+ "^(?:"
+ + open_tag
+ + "|"
+ + close_tag
+ + "|"
+ + comment
+ + "|"
+ + processing
+ + "|"
+ + declaration
+ + "|"
+ + cdata
+ + ")"
+)
+HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")"
+HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR)
diff --git a/markdown_it/common/normalize_url.py b/markdown_it/common/normalize_url.py
new file mode 100644
index 0000000..afec928
--- /dev/null
+++ b/markdown_it/common/normalize_url.py
@@ -0,0 +1,82 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+import re
+from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401
+
+import mdurl
+
+from .. import _punycode
+
+RECODE_HOSTNAME_FOR = ("http:", "https:", "mailto:")
+
+
+def normalizeLink(url: str) -> str:
+ """Normalize destination URLs in links
+
+ ::
+
+ [label]: destination 'title'
+ ^^^^^^^^^^^
+ """
+ parsed = mdurl.parse(url, slashes_denote_host=True)
+
+ if parsed.hostname:
+ # Encode hostnames in urls like:
+ # `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
+ #
+ # We don't encode unknown schemas, because it's likely that we encode
+ # something we shouldn't (e.g. `skype:name` treated as `skype:host`)
+ #
+ if not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR:
+ try:
+ parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname))
+ except Exception:
+ pass
+
+ return mdurl.encode(mdurl.format(parsed))
+
+
+def normalizeLinkText(url: str) -> str:
+ """Normalize autolink content
+
+ ::
+
+ <destination>
+ ~~~~~~~~~~~
+ """
+ parsed = mdurl.parse(url, slashes_denote_host=True)
+
+ if parsed.hostname:
+ # Encode hostnames in urls like:
+ # `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
+ #
+ # We don't encode unknown schemas, because it's likely that we encode
+ # something we shouldn't (e.g. `skype:name` treated as `skype:host`)
+ #
+ if not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR:
+ try:
+ parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname))
+ except Exception:
+ pass
+
+ # add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720
+ return mdurl.decode(mdurl.format(parsed), mdurl.DECODE_DEFAULT_CHARS + "%")
+
+
+BAD_PROTO_RE = re.compile(r"^(vbscript|javascript|file|data):")
+GOOD_DATA_RE = re.compile(r"^data:image\/(gif|png|jpeg|webp);")
+
+
+def validateLink(url: str, validator: Callable | None = None) -> bool:
+ """Validate URL link is allowed in output.
+
+ This validator can prohibit more than really needed to prevent XSS.
+ It's a tradeoff to keep code simple and to be secure by default.
+
+ Note: url should be normalized at this point, and existing entities decoded.
+ """
+ if validator is not None:
+ return validator(url)
+ url = url.strip().lower()
+ return bool(GOOD_DATA_RE.search(url)) if BAD_PROTO_RE.search(url) else True
diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py
new file mode 100644
index 0000000..edc24ca
--- /dev/null
+++ b/markdown_it/common/utils.py
@@ -0,0 +1,334 @@
+"""Utilities for parsing source text
+"""
+import html
+import re
+from typing import Any
+
+from .entities import entities
+
+
+def charCodeAt(src: str, pos: int) -> Any:
+ """
+ Returns the Unicode value of the character at the specified location.
+
+ @param - index The zero-based index of the desired character.
+ If there is no character at the specified index, NaN is returned.
+
+ This was added for compatibility with python
+ """
+ try:
+ return ord(src[pos])
+ except IndexError:
+ return None
+
+
+# Merge objects
+#
+def assign(obj):
+ """Merge objects /*from1, from2, from3, ...*/)"""
+ raise NotImplementedError
+ # sources = Array.prototype.slice.call(arguments, 1)
+
+ # sources.forEach(function (source) {
+ # if (!source) { return; }
+
+ # if (typeof source !== 'object') {
+ # throw new TypeError(source + 'must be object')
+ # }
+
+ # Object.keys(source).forEach(function (key) {
+ # obj[key] = source[key]
+ # })
+ # })
+
+ # return obj
+
+
+def arrayReplaceAt(src: list, pos: int, newElements: list) -> list:
+ """
+ Remove element from array and put another array at those position.
+ Useful for some operations with tokens
+ """
+ return src[:pos] + newElements + src[pos + 1 :]
+
+
+######################################################################
+
+
+def isValidEntityCode(c: int) -> bool:
+
+ # broken sequence
+ if c >= 0xD800 and c <= 0xDFFF:
+ return False
+ # never used
+ if c >= 0xFDD0 and c <= 0xFDEF:
+ return False
+ if ((c & 0xFFFF) == 0xFFFF) or ((c & 0xFFFF) == 0xFFFE):
+ return False
+ # control codes
+ if c >= 0x00 and c <= 0x08:
+ return False
+ if c == 0x0B:
+ return False
+ if c >= 0x0E and c <= 0x1F:
+ return False
+ if c >= 0x7F and c <= 0x9F:
+ return False
+ # out of range
+ if c > 0x10FFFF:
+ return False
+ return True
+
+
+def fromCodePoint(c: int) -> str:
+ """Convert ordinal to unicode.
+
+ Note, in the original Javascript two string characters were required,
+ for codepoints larger than `0xFFFF`.
+ But Python 3 can represent any unicode codepoint in one character.
+ """
+ return chr(c)
+
+
+UNESCAPE_MD_RE = re.compile(r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])')
+# ENTITY_RE_g = re.compile(r'&([a-z#][a-z0-9]{1,31})', re.IGNORECASE)
+UNESCAPE_ALL_RE = re.compile(
+ r'\\([!"#$%&\'()*+,\-.\/:;<=>?@[\\\]^_`{|}~])' + "|" + r"&([a-z#][a-z0-9]{1,31});",
+ re.IGNORECASE,
+)
+DIGITAL_ENTITY_TEST_RE = re.compile(r"^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))", re.IGNORECASE)
+
+
+def replaceEntityPattern(match: str, name: str) -> str:
+ """Convert HTML entity patterns
+
+ ::
+
+ https://www.google.com -> https%3A//www.google.com
+
+ """
+ code = 0
+
+ if name in entities:
+ return entities[name]
+
+ if ord(name[0]) == 0x23 and DIGITAL_ENTITY_TEST_RE.search(name):
+ code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10)
+ if isValidEntityCode(code):
+ return fromCodePoint(code)
+
+ return match
+
+
+# def replaceEntities(string):
+# if (string.indexOf('&') < 0):
+# return string
+# return string.replace(ENTITY_RE, replaceEntityPattern)
+
+
+def unescapeMd(string: str) -> str:
+ raise NotImplementedError
+ # if "\\" in string:
+ # return string
+ # return string.replace(UNESCAPE_MD_RE, "$1")
+
+
+def unescapeAll(string: str) -> str:
+ def replacer_func(match):
+ escaped = match.group(1)
+ if escaped:
+ return escaped
+ entity = match.group(2)
+ return replaceEntityPattern(match.group(), entity)
+
+ if "\\" not in string and "&" not in string:
+ return string
+ return UNESCAPE_ALL_RE.sub(replacer_func, string)
+
+
+ESCAPABLE = r"""\\!"#$%&'()*+,./:;<=>?@\[\]^`{}|_~-"""
+ESCAPE_CHAR = re.compile(r"\\([" + ESCAPABLE + r"])")
+
+
+def stripEscape(string: str) -> str:
+ """Strip escape \\ characters"""
+ return ESCAPE_CHAR.sub(r"\1", string)
+
+
+# //////////////////////////////////////////////////////////////////////////////
+
+# TODO This section changed quite a lot, should re-check
+
+# UNESCAPE_HTML_RE = re.compile(r"\\&(?=(amp\;|lt\;|gt\;|quot\;))")
+# ESCAPE_AND_HTML = re.compile(r"&(?!(amp\;|lt\;|gt\;|quot\;))")
+# HTML_ESCAPE_REPLACE_RE = re.compile(r'[&<>"]')
+
+
+# def escapeHtml(string: str):
+
+# if HTML_ESCAPE_REPLACE_RE.search(string):
+
+# string = UNESCAPE_HTML_RE.sub("&", string)
+# string = ESCAPE_AND_HTML.sub("&amp;", string)
+# for k, v in {"<": "&lt;", ">": "&gt;", '"': "&quot;"}.items():
+# string = string.replace(k, v)
+
+# return string
+
+
+def escapeHtml(raw: str) -> str:
+ # return html.escape(html.unescape(raw)).replace("&#x27;", "'")
+ return html.escape(raw).replace("&#x27;", "'")
+
+
+# //////////////////////////////////////////////////////////////////////////////
+
+REGEXP_ESCAPE_RE = re.compile(r"[.?*+^$[\]\\(){}|-]")
+
+
+def escapeRE(string: str) -> str:
+ string = REGEXP_ESCAPE_RE.sub("\\$&", string)
+ return string
+
+
+# //////////////////////////////////////////////////////////////////////////////
+
+
+def isSpace(code: object) -> bool:
+ return code in {0x09, 0x20}
+
+
+MD_WHITESPACE = {
+ 0x09, # \t
+ 0x0A, # \n
+ 0x0B, # \v
+ 0x0C, # \f
+ 0x0D, # \r
+ 0x20,
+ 0xA0,
+ 0x1680,
+ 0x202F,
+ 0x205F,
+ 0x3000,
+}
+
+
+def isWhiteSpace(code: int) -> bool:
+ r"""Zs (unicode class) || [\t\f\v\r\n]"""
+ if code >= 0x2000 and code <= 0x200A:
+ return True
+ return code in MD_WHITESPACE
+
+
+# //////////////////////////////////////////////////////////////////////////////
+
+UNICODE_PUNCT_RE = re.compile(
+ r"[!-#%-\*,-\/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u09FD\u0A76\u0AF0\u0C84\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E4E\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD803[\uDF55-\uDF59]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC8\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDC4B-\uDC4F\uDC5B\uDC5D\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDE60-\uDE6C\uDF3C-\uDF3E]|\uD806[\uDC3B\uDE3F-\uDE46\uDE9A-\uDE9C\uDE9E-\uDEA2]|\uD807[\uDC41-\uDC45\uDC70\uDC71\uDEF7\uDEF8]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD81B[\uDE97-\uDE9A]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]|\uD83A[\uDD5E\uDD5F]" # noqa: E501
+)
+
+
+# Currently without astral characters support.
+def isPunctChar(ch: str) -> bool:
+ return UNICODE_PUNCT_RE.search(ch) is not None
+
+
+MD_ASCII_PUNCT = {
+ 0x21, # /* ! */
+ 0x22, # /* " */
+ 0x23, # /* # */
+ 0x24, # /* $ */
+ 0x25, # /* % */
+ 0x26, # /* & */
+ 0x27, # /* ' */
+ 0x28, # /* ( */
+ 0x29, # /* ) */
+ 0x2A, # /* * */
+ 0x2B, # /* + */
+ 0x2C, # /* , */
+ 0x2D, # /* - */
+ 0x2E, # /* . */
+ 0x2F, # /* / */
+ 0x3A, # /* : */
+ 0x3B, # /* ; */
+ 0x3C, # /* < */
+ 0x3D, # /* = */
+ 0x3E, # /* > */
+ 0x3F, # /* ? */
+ 0x40, # /* @ */
+ 0x5B, # /* [ */
+ 0x5C, # /* \ */
+ 0x5D, # /* ] */
+ 0x5E, # /* ^ */
+ 0x5F, # /* _ */
+ 0x60, # /* ` */
+ 0x7B, # /* { */
+ 0x7C, # /* | */
+ 0x7D, # /* } */
+ 0x7E, # /* ~ */
+}
+
+
+def isMdAsciiPunct(ch: int) -> bool:
+ """Markdown ASCII punctuation characters.
+
+ ::
+
+ !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \\, ], ^, _, `, {, |, }, or ~
+
+ See http://spec.commonmark.org/0.15/#ascii-punctuation-character
+
+ Don't confuse with unicode punctuation !!! It lacks some chars in ascii range.
+
+ """ # noqa: E501
+ return ch in MD_ASCII_PUNCT
+
+
+def normalizeReference(string: str) -> str:
+ """Helper to unify [reference labels]."""
+ # Trim and collapse whitespace
+ #
+ string = re.sub(r"\s+", " ", string.strip())
+
+ # In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug
+ # fixed in v12 (couldn't find any details).
+ #
+ # So treat this one as a special case
+ # (remove this when node v10 is no longer supported).
+ #
+ # if ('ẞ'.toLowerCase() === 'Ṿ') {
+ # str = str.replace(/ẞ/g, 'ß')
+ # }
+
+ # .toLowerCase().toUpperCase() should get rid of all differences
+ # between letter variants.
+ #
+ # Simple .toLowerCase() doesn't normalize 125 code points correctly,
+ # and .toUpperCase doesn't normalize 6 of them (list of exceptions:
+ # İ, ϴ, ẞ, Ω, K, Å - those are already uppercased, but have differently
+ # uppercased versions).
+ #
+ # Here's an example showing how it happens. Lets take greek letter omega:
+ # uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ)
+ #
+ # Unicode entries:
+ # 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8
+ # 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398
+ # 03D1;GREEK THETA SYMBOL;Ll;0;L;<compat> 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398
+ # 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L;<compat> 0398;;;;N;;;;03B8
+ #
+ # Case-insensitive comparison should treat all of them as equivalent.
+ #
+ # But .toLowerCase() doesn't change ϑ (it's already lowercase),
+ # and .toUpperCase() doesn't change ϴ (already uppercase).
+ #
+ # Applying first lower then upper case normalizes any character:
+ # '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398'
+ #
+ # Note: this is equivalent to unicode case folding; unicode normalization
+ # is a different step that is not required here.
+ #
+ # Final result should be uppercased, because it's later stored in an object
+ # (this avoid a conflict with Object.prototype members,
+ # most notably, `__proto__`)
+ #
+ return string.lower().upper()
diff --git a/markdown_it/helpers/__init__.py b/markdown_it/helpers/__init__.py
new file mode 100644
index 0000000..3dbbdd1
--- /dev/null
+++ b/markdown_it/helpers/__init__.py
@@ -0,0 +1,6 @@
+"""Functions for parsing Links
+"""
+__all__ = ("parseLinkLabel", "parseLinkDestination", "parseLinkTitle")
+from .parse_link_destination import parseLinkDestination
+from .parse_link_label import parseLinkLabel
+from .parse_link_title import parseLinkTitle
diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py
new file mode 100644
index 0000000..58b76f3
--- /dev/null
+++ b/markdown_it/helpers/parse_link_destination.py
@@ -0,0 +1,86 @@
+"""
+Parse link destination
+"""
+
+from ..common.utils import charCodeAt, unescapeAll
+
+
+class _Result:
+ __slots__ = ("ok", "pos", "lines", "str")
+
+ def __init__(self):
+ self.ok = False
+ self.pos = 0
+ self.lines = 0
+ self.str = ""
+
+
+def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
+ lines = 0
+ start = pos
+ result = _Result()
+
+ if charCodeAt(string, pos) == 0x3C: # /* < */
+ pos += 1
+ while pos < maximum:
+ code = charCodeAt(string, pos)
+ if code == 0x0A: # /* \n */)
+ return result
+ if code == 0x3C: # / * < * /
+ return result
+ if code == 0x3E: # /* > */) {
+ result.pos = pos + 1
+ result.str = unescapeAll(string[start + 1 : pos])
+ result.ok = True
+ return result
+
+ if code == 0x5C and pos + 1 < maximum: # \
+ pos += 2
+ continue
+
+ pos += 1
+
+ # no closing '>'
+ return result
+
+ # this should be ... } else { ... branch
+
+ level = 0
+ while pos < maximum:
+ code = charCodeAt(string, pos)
+
+ if code == 0x20:
+ break
+
+ # ascii control characters
+ if code < 0x20 or code == 0x7F:
+ break
+
+ if code == 0x5C and pos + 1 < maximum:
+ if charCodeAt(string, pos + 1) == 0x20:
+ break
+ pos += 2
+ continue
+
+ if code == 0x28: # /* ( */)
+ level += 1
+ if level > 32:
+ return result
+
+ if code == 0x29: # /* ) */)
+ if level == 0:
+ break
+ level -= 1
+
+ pos += 1
+
+ if start == pos:
+ return result
+ if level != 0:
+ return result
+
+ result.str = unescapeAll(string[start:pos])
+ result.lines = lines
+ result.pos = pos
+ result.ok = True
+ return result
diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py
new file mode 100644
index 0000000..20e3c14
--- /dev/null
+++ b/markdown_it/helpers/parse_link_label.py
@@ -0,0 +1,44 @@
+"""
+Parse link label
+
+this function assumes that first character ("[") already matches
+returns the end of the label
+
+"""
+from markdown_it.rules_inline import StateInline
+
+
+def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) -> int:
+
+ labelEnd = -1
+ oldPos = state.pos
+ found = False
+
+ state.pos = start + 1
+ level = 1
+
+ while state.pos < state.posMax:
+ marker = state.srcCharCode[state.pos]
+ if marker == 0x5D: # /* ] */)
+ level -= 1
+ if level == 0:
+ found = True
+ break
+
+ prevPos = state.pos
+ state.md.inline.skipToken(state)
+ if marker == 0x5B: # /* [ */)
+ if prevPos == state.pos - 1:
+ # increase level if we find text `[`,
+ # which is not a part of any token
+ level += 1
+ elif disableNested:
+ state.pos = oldPos
+ return -1
+ if found:
+ labelEnd = state.pos
+
+ # restore old state
+ state.pos = oldPos
+
+ return labelEnd
diff --git a/markdown_it/helpers/parse_link_title.py b/markdown_it/helpers/parse_link_title.py
new file mode 100644
index 0000000..842c83b
--- /dev/null
+++ b/markdown_it/helpers/parse_link_title.py
@@ -0,0 +1,60 @@
+"""Parse link title
+"""
+from ..common.utils import charCodeAt, unescapeAll
+
+
+class _Result:
+ __slots__ = ("ok", "pos", "lines", "str")
+
+ def __init__(self):
+ self.ok = False
+ self.pos = 0
+ self.lines = 0
+ self.str = ""
+
+ def __str__(self):
+ return self.str
+
+
+def parseLinkTitle(string: str, pos: int, maximum: int) -> _Result:
+ lines = 0
+ start = pos
+ result = _Result()
+
+ if pos >= maximum:
+ return result
+
+ marker = charCodeAt(string, pos)
+
+ # /* " */ /* ' */ /* ( */
+ if marker != 0x22 and marker != 0x27 and marker != 0x28:
+ return result
+
+ pos += 1
+
+ # if opening marker is "(", switch it to closing marker ")"
+ if marker == 0x28:
+ marker = 0x29
+
+ while pos < maximum:
+ code = charCodeAt(string, pos)
+ if code == marker:
+ title = string[start + 1 : pos]
+ title = unescapeAll(title)
+ result.pos = pos + 1
+ result.lines = lines
+ result.str = title
+ result.ok = True
+ return result
+ elif code == 0x28 and marker == 0x29: # /* ( */ /* ) */
+ return result
+ elif code == 0x0A:
+ lines += 1
+ elif code == 0x5C and pos + 1 < maximum: # /* \ */
+ pos += 1
+ if charCodeAt(string, pos) == 0x0A:
+ lines += 1
+
+ pos += 1
+
+ return result
diff --git a/markdown_it/main.py b/markdown_it/main.py
new file mode 100644
index 0000000..7faac5a
--- /dev/null
+++ b/markdown_it/main.py
@@ -0,0 +1,331 @@
+from __future__ import annotations
+
+from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping
+from contextlib import contextmanager
+from typing import Any
+
+from . import helpers, presets # noqa F401
+from .common import normalize_url, utils # noqa F401
+from .parser_block import ParserBlock # noqa F401
+from .parser_core import ParserCore # noqa F401
+from .parser_inline import ParserInline # noqa F401
+from .renderer import RendererHTML, RendererProtocol
+from .rules_core.state_core import StateCore
+from .token import Token
+from .utils import OptionsDict
+
+try:
+ import linkify_it
+except ModuleNotFoundError:
+ linkify_it = None
+
+
+_PRESETS = {
+ "default": presets.default.make(),
+ "js-default": presets.js_default.make(),
+ "zero": presets.zero.make(),
+ "commonmark": presets.commonmark.make(),
+ "gfm-like": presets.gfm_like.make(),
+}
+
+
+class MarkdownIt:
+ def __init__(
+ self,
+ config: str | Mapping = "commonmark",
+ options_update: Mapping | None = None,
+ *,
+ renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML,
+ ):
+ """Main parser class
+
+ :param config: name of configuration to load or a pre-defined dictionary
+ :param options_update: dictionary that will be merged into ``config["options"]``
+ :param renderer_cls: the class to load as the renderer:
+ ``self.renderer = renderer_cls(self)
+ """
+ # add modules
+ self.utils = utils
+ self.helpers: Any = helpers
+
+ # initialise classes
+ self.inline = ParserInline()
+ self.block = ParserBlock()
+ self.core = ParserCore()
+ self.renderer = renderer_cls(self)
+ self.linkify = linkify_it.LinkifyIt() if linkify_it else None
+
+ # set the configuration
+ if options_update and not isinstance(options_update, Mapping):
+ # catch signature change where renderer_cls was not used as a key-word
+ raise TypeError(
+ f"options_update should be a mapping: {options_update}"
+ "\n(Perhaps you intended this to be the renderer_cls?)"
+ )
+ self.configure(config, options_update=options_update)
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__module__}.{self.__class__.__name__}()"
+
+ def __getitem__(self, name: str) -> Any:
+ return {
+ "inline": self.inline,
+ "block": self.block,
+ "core": self.core,
+ "renderer": self.renderer,
+ }[name]
+
+ def set(self, options: MutableMapping) -> None:
+ """Set parser options (in the same format as in constructor).
+ Probably, you will never need it, but you can change options after constructor call.
+
+ __Note:__ To achieve the best possible performance, don't modify a
+ `markdown-it` instance options on the fly. If you need multiple configurations
+ it's best to create multiple instances and initialize each with separate config.
+ """
+ self.options = OptionsDict(options)
+
+ def configure(
+ self, presets: str | Mapping, options_update: Mapping | None = None
+ ) -> MarkdownIt:
+ """Batch load of all options and component settings.
+ This is an internal method, and you probably will not need it.
+ But if you will - see available presets and data structure
+ [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
+
+ We strongly recommend to use presets instead of direct config loads.
+ That will give better compatibility with next versions.
+ """
+ if isinstance(presets, str):
+ if presets not in _PRESETS:
+ raise KeyError(f"Wrong `markdown-it` preset '{presets}', check name")
+ config = _PRESETS[presets]
+ else:
+ config = presets
+
+ if not config:
+ raise ValueError("Wrong `markdown-it` config, can't be empty")
+
+ options = config.get("options", {}) or {}
+ if options_update:
+ options = {**options, **options_update}
+
+ self.set(options)
+
+ if "components" in config:
+ for name, component in config["components"].items():
+ rules = component.get("rules", None)
+ if rules:
+ self[name].ruler.enableOnly(rules)
+ rules2 = component.get("rules2", None)
+ if rules2:
+ self[name].ruler2.enableOnly(rules2)
+
+ return self
+
+ def get_all_rules(self) -> dict[str, list[str]]:
+ """Return the names of all active rules."""
+ rules = {
+ chain: self[chain].ruler.get_all_rules()
+ for chain in ["core", "block", "inline"]
+ }
+ rules["inline2"] = self.inline.ruler2.get_all_rules()
+ return rules
+
+ def get_active_rules(self) -> dict[str, list[str]]:
+ """Return the names of all active rules."""
+ rules = {
+ chain: self[chain].ruler.get_active_rules()
+ for chain in ["core", "block", "inline"]
+ }
+ rules["inline2"] = self.inline.ruler2.get_active_rules()
+ return rules
+
+ def enable(
+ self, names: str | Iterable[str], ignoreInvalid: bool = False
+ ) -> MarkdownIt:
+ """Enable list or rules. (chainable)
+
+ :param names: rule name or list of rule names to enable.
+ :param ignoreInvalid: set `true` to ignore errors when rule not found.
+
+ It will automatically find appropriate components,
+ containing rules with given names. If rule not found, and `ignoreInvalid`
+ not set - throws exception.
+
+ Example::
+
+ md = MarkdownIt().enable(['sub', 'sup']).disable('smartquotes')
+
+ """
+ result = []
+
+ if isinstance(names, str):
+ names = [names]
+
+ for chain in ["core", "block", "inline"]:
+ result.extend(self[chain].ruler.enable(names, True))
+ result.extend(self.inline.ruler2.enable(names, True))
+
+ missed = [name for name in names if name not in result]
+ if missed and not ignoreInvalid:
+ raise ValueError(f"MarkdownIt. Failed to enable unknown rule(s): {missed}")
+
+ return self
+
+ def disable(
+ self, names: str | Iterable[str], ignoreInvalid: bool = False
+ ) -> MarkdownIt:
+ """The same as [[MarkdownIt.enable]], but turn specified rules off. (chainable)
+
+ :param names: rule name or list of rule names to disable.
+ :param ignoreInvalid: set `true` to ignore errors when rule not found.
+
+ """
+ result = []
+
+ if isinstance(names, str):
+ names = [names]
+
+ for chain in ["core", "block", "inline"]:
+ result.extend(self[chain].ruler.disable(names, True))
+ result.extend(self.inline.ruler2.disable(names, True))
+
+ missed = [name for name in names if name not in result]
+ if missed and not ignoreInvalid:
+ raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}")
+ return self
+
+ @contextmanager
+ def reset_rules(self) -> Generator[None, None, None]:
+ """A context manager, that will reset the current enabled rules on exit."""
+ chain_rules = self.get_active_rules()
+ yield
+ for chain, rules in chain_rules.items():
+ if chain != "inline2":
+ self[chain].ruler.enableOnly(rules)
+ self.inline.ruler2.enableOnly(chain_rules["inline2"])
+
+ def add_render_rule(self, name: str, function: Callable, fmt: str = "html") -> None:
+ """Add a rule for rendering a particular Token type.
+
+ Only applied when ``renderer.__output__ == fmt``
+ """
+ if self.renderer.__output__ == fmt:
+ self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore
+
+ def use(self, plugin: Callable, *params, **options) -> MarkdownIt:
+ """Load specified plugin with given params into current parser instance. (chainable)
+
+ It's just a sugar to call `plugin(md, params)` with curring.
+
+ Example::
+
+ def func(tokens, idx):
+ tokens[idx].content = tokens[idx].content.replace('foo', 'bar')
+ md = MarkdownIt().use(plugin, 'foo_replace', 'text', func)
+
+ """
+ plugin(self, *params, **options)
+ return self
+
+ def parse(self, src: str, env: MutableMapping | None = None) -> list[Token]:
+ """Parse the source string to a token stream
+
+ :param src: source string
+ :param env: environment sandbox
+
+ Parse input string and return list of block tokens (special token type
+ "inline" will contain list of inline tokens).
+
+ `env` is used to pass data between "distributed" rules and return additional
+ metadata like reference info, needed for the renderer. It also can be used to
+ inject data in specific cases. Usually, you will be ok to pass `{}`,
+ and then pass updated object to renderer.
+ """
+ env = {} if env is None else env
+ if not isinstance(env, MutableMapping):
+ raise TypeError(f"Input data should be a MutableMapping, not {type(env)}")
+ if not isinstance(src, str):
+ raise TypeError(f"Input data should be a string, not {type(src)}")
+ state = StateCore(src, self, env)
+ self.core.process(state)
+ return state.tokens
+
+ def render(self, src: str, env: MutableMapping | None = None) -> Any:
+ """Render markdown string into html. It does all magic for you :).
+
+ :param src: source string
+ :param env: environment sandbox
+ :returns: The output of the loaded renderer
+
+ `env` can be used to inject additional metadata (`{}` by default).
+ But you will not need it with high probability. See also comment
+ in [[MarkdownIt.parse]].
+ """
+ env = {} if env is None else env
+ return self.renderer.render(self.parse(src, env), self.options, env)
+
+ def parseInline(self, src: str, env: MutableMapping | None = None) -> list[Token]:
+ """The same as [[MarkdownIt.parse]] but skip all block rules.
+
+ :param src: source string
+ :param env: environment sandbox
+
+ It returns the
+ block tokens list with the single `inline` element, containing parsed inline
+ tokens in `children` property. Also updates `env` object.
+ """
+ env = {} if env is None else env
+ if not isinstance(env, MutableMapping):
+ raise TypeError(f"Input data should be an MutableMapping, not {type(env)}")
+ if not isinstance(src, str):
+ raise TypeError(f"Input data should be a string, not {type(src)}")
+ state = StateCore(src, self, env)
+ state.inlineMode = True
+ self.core.process(state)
+ return state.tokens
+
+ def renderInline(self, src: str, env: MutableMapping | None = None) -> Any:
+ """Similar to [[MarkdownIt.render]] but for single paragraph content.
+
+ :param src: source string
+ :param env: environment sandbox
+
+ Similar to [[MarkdownIt.render]] but for single paragraph content. Result
+ will NOT be wrapped into `<p>` tags.
+ """
+ env = {} if env is None else env
+ return self.renderer.render(self.parseInline(src, env), self.options, env)
+
+ # link methods
+
+ def validateLink(self, url: str) -> bool:
+ """Validate if the URL link is allowed in output.
+
+ This validator can prohibit more than really needed to prevent XSS.
+ It's a tradeoff to keep code simple and to be secure by default.
+
+ Note: the url should be normalized at this point, and existing entities decoded.
+ """
+ return normalize_url.validateLink(url)
+
+ def normalizeLink(self, url: str) -> str:
+ """Normalize destination URLs in links
+
+ ::
+
+ [label]: destination 'title'
+ ^^^^^^^^^^^
+ """
+ return normalize_url.normalizeLink(url)
+
+ def normalizeLinkText(self, link: str) -> str:
+ """Normalize autolink content
+
+ ::
+
+ <destination>
+ ~~~~~~~~~~~
+ """
+ return normalize_url.normalizeLinkText(link)
diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py
new file mode 100644
index 0000000..f331ec5
--- /dev/null
+++ b/markdown_it/parser_block.py
@@ -0,0 +1,109 @@
+"""Block-level tokenizer."""
+from __future__ import annotations
+
+import logging
+
+from . import rules_block
+from .ruler import Ruler
+from .rules_block.state_block import StateBlock
+from .token import Token
+
+LOGGER = logging.getLogger(__name__)
+
+
+_rules: list[tuple] = [
+ # First 2 params - rule name & source. Secondary array - list of rules,
+ # which can be terminated by this one.
+ ("table", rules_block.table, ["paragraph", "reference"]),
+ ("code", rules_block.code),
+ ("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
+ (
+ "blockquote",
+ rules_block.blockquote,
+ ["paragraph", "reference", "blockquote", "list"],
+ ),
+ ("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
+ ("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
+ ("reference", rules_block.reference),
+ ("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
+ ("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
+ ("lheading", rules_block.lheading),
+ ("paragraph", rules_block.paragraph),
+]
+
+
+class ParserBlock:
+ """
+ ParserBlock#ruler -> Ruler
+
+ [[Ruler]] instance. Keep configuration of block rules.
+ """
+
+ def __init__(self):
+ self.ruler = Ruler()
+ for data in _rules:
+ name = data[0]
+ rule = data[1]
+ self.ruler.push(name, rule, {"alt": data[2] if len(data) > 2 else []})
+
+ def tokenize(
+ self, state: StateBlock, startLine: int, endLine: int, silent: bool = False
+ ) -> None:
+ """Generate tokens for input range."""
+ rules = self.ruler.getRules("")
+ line = startLine
+ maxNesting = state.md.options.maxNesting
+ hasEmptyLines = False
+
+ while line < endLine:
+ state.line = line = state.skipEmptyLines(line)
+ if line >= endLine:
+ break
+ if state.sCount[line] < state.blkIndent:
+ # Termination condition for nested calls.
+ # Nested calls currently used for blockquotes & lists
+ break
+ if state.level >= maxNesting:
+ # If nesting level exceeded - skip tail to the end.
+ # That's not ordinary situation and we should not care about content.
+ state.line = endLine
+ break
+
+ # Try all possible rules.
+ # On success, rule should:
+ # - update `state.line`
+ # - update `state.tokens`
+ # - return True
+ for rule in rules:
+ if rule(state, line, endLine, False):
+ break
+
+ # set state.tight if we had an empty line before current tag
+ # i.e. latest empty line should not count
+ state.tight = not hasEmptyLines
+
+ line = state.line
+
+ # paragraph might "eat" one newline after it in nested lists
+ if (line - 1) < endLine and state.isEmpty(line - 1):
+ hasEmptyLines = True
+
+ if line < endLine and state.isEmpty(line):
+ hasEmptyLines = True
+ line += 1
+ state.line = line
+
+ def parse(
+ self,
+ src: str,
+ md,
+ env,
+ outTokens: list[Token],
+ ords: tuple[int, ...] | None = None,
+ ) -> list[Token] | None:
+ """Process input string and push block tokens into `outTokens`."""
+ if not src:
+ return None
+ state = StateBlock(src, md, env, outTokens, ords)
+ self.tokenize(state, state.line, state.lineMax)
+ return state.tokens
diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py
new file mode 100644
index 0000000..32209b3
--- /dev/null
+++ b/markdown_it/parser_core.py
@@ -0,0 +1,32 @@
+"""
+ * class Core
+ *
+ * Top-level rules executor. Glues block/inline parsers and does intermediate
+ * transformations.
+"""
+from __future__ import annotations
+
+from .ruler import RuleFunc, Ruler
+from .rules_core import block, inline, linkify, normalize, replace, smartquotes
+from .rules_core.state_core import StateCore
+
+_rules: list[tuple[str, RuleFunc]] = [
+ ("normalize", normalize),
+ ("block", block),
+ ("inline", inline),
+ ("linkify", linkify),
+ ("replacements", replace),
+ ("smartquotes", smartquotes),
+]
+
+
+class ParserCore:
+ def __init__(self):
+ self.ruler = Ruler()
+ for name, rule in _rules:
+ self.ruler.push(name, rule)
+
+ def process(self, state: StateCore) -> None:
+ """Executes core chain rules."""
+ for rule in self.ruler.getRules(""):
+ rule(state)
diff --git a/markdown_it/parser_inline.py b/markdown_it/parser_inline.py
new file mode 100644
index 0000000..b61c990
--- /dev/null
+++ b/markdown_it/parser_inline.py
@@ -0,0 +1,124 @@
+"""Tokenizes paragraph content.
+"""
+from __future__ import annotations
+
+from . import rules_inline
+from .ruler import RuleFunc, Ruler
+from .rules_inline.state_inline import StateInline
+from .token import Token
+
+# Parser rules
+_rules: list[tuple[str, RuleFunc]] = [
+ ("text", rules_inline.text),
+ ("newline", rules_inline.newline),
+ ("escape", rules_inline.escape),
+ ("backticks", rules_inline.backtick),
+ ("strikethrough", rules_inline.strikethrough.tokenize),
+ ("emphasis", rules_inline.emphasis.tokenize),
+ ("link", rules_inline.link),
+ ("image", rules_inline.image),
+ ("autolink", rules_inline.autolink),
+ ("html_inline", rules_inline.html_inline),
+ ("entity", rules_inline.entity),
+]
+
+_rules2: list[tuple[str, RuleFunc]] = [
+ ("balance_pairs", rules_inline.link_pairs),
+ ("strikethrough", rules_inline.strikethrough.postProcess),
+ ("emphasis", rules_inline.emphasis.postProcess),
+ ("text_collapse", rules_inline.text_collapse),
+]
+
+
+class ParserInline:
+ def __init__(self):
+ self.ruler = Ruler()
+ for name, rule in _rules:
+ self.ruler.push(name, rule)
+ # Second ruler used for post-processing (e.g. in emphasis-like rules)
+ self.ruler2 = Ruler()
+ for name, rule2 in _rules2:
+ self.ruler2.push(name, rule2)
+
+ def skipToken(self, state: StateInline) -> None:
+ """Skip single token by running all rules in validation mode;
+ returns `True` if any rule reported success
+ """
+ ok = False
+ pos = state.pos
+ rules = self.ruler.getRules("")
+ maxNesting = state.md.options["maxNesting"]
+ cache = state.cache
+
+ if pos in cache:
+ state.pos = cache[pos]
+ return
+
+ if state.level < maxNesting:
+ for rule in rules:
+ # Increment state.level and decrement it later to limit recursion.
+ # It's harmless to do here, because no tokens are created.
+ # But ideally, we'd need a separate private state variable for this purpose.
+ state.level += 1
+ ok = rule(state, True)
+ state.level -= 1
+ if ok:
+ break
+ else:
+ # Too much nesting, just skip until the end of the paragraph.
+ #
+ # NOTE: this will cause links to behave incorrectly in the following case,
+ # when an amount of `[` is exactly equal to `maxNesting + 1`:
+ #
+ # [[[[[[[[[[[[[[[[[[[[[foo]()
+ #
+ # TODO: remove this workaround when CM standard will allow nested links
+ # (we can replace it by preventing links from being parsed in
+ # validation mode)
+ #
+ state.pos = state.posMax
+
+ if not ok:
+ state.pos += 1
+ cache[pos] = state.pos
+
+ def tokenize(self, state: StateInline) -> None:
+ """Generate tokens for input range."""
+ ok = False
+ rules = self.ruler.getRules("")
+ end = state.posMax
+ maxNesting = state.md.options["maxNesting"]
+
+ while state.pos < end:
+ # Try all possible rules.
+ # On success, rule should:
+ #
+ # - update `state.pos`
+ # - update `state.tokens`
+ # - return true
+
+ if state.level < maxNesting:
+ for rule in rules:
+ ok = rule(state, False)
+ if ok:
+ break
+
+ if ok:
+ if state.pos >= end:
+ break
+ continue
+
+ state.pending += state.src[state.pos]
+ state.pos += 1
+
+ if state.pending:
+ state.pushPending()
+
+ def parse(self, src: str, md, env, tokens: list[Token]) -> list[Token]:
+ """Process input string and push inline tokens into `tokens`"""
+ state = StateInline(src, md, env, tokens)
+ self.tokenize(state)
+ rules2 = self.ruler2.getRules("")
+ for rule in rules2:
+ rule(state)
+ return state.tokens
diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml
new file mode 100644
index 0000000..a6718fd
--- /dev/null
+++ b/markdown_it/port.yaml
@@ -0,0 +1,49 @@
+- package: markdown-it/markdown-it
+ version: 12.2.0
+ commit: 6e2de08a0b03d3d0dcc524b89710ce05f83a0283
+ date: Aug 2, 2021
+ notes:
+ - Rename variables that use python built-in names, e.g.
+ - `max` -> `maximum`
+ - `len` -> `length`
+ - `str` -> `string`
+ - |
+ Convert JS `for` loops to `while` loops
+ this is generally the main difference between the codes,
+ because in python you can't do e.g. `for {i=1;i<x;i++} {}`
+ - |
+ `env` is a common Python dictionary, and so does not have attribute access to keys,
+ as with JavaScript dictionaries.
+ `options` have attribute access only to core markdownit configuration options
+ - |
+ `Token.attrs` is a dictionary, instead of a list of lists.
+ Upstream the list format is only used to guarantee order: https://github.com/markdown-it/markdown-it/issues/142,
+ but in Python 3.7+ order of dictionaries is guaranteed.
+ One should anyhow use the `attrGet`, `attrSet`, `attrPush` and `attrJoin` methods
+ to manipulate `Token.attrs`, which have an identical signature to those upstream.
+ - Use python version of `charCodeAt`
+ - |
+ Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state
+ objects and sharing those whenever possible
+ This provides a significant performance boost
+ - |
+ In markdown_it/rules_block/reference.py,
+ record line range in state.env["references"] and add state.env["duplicate_refs"]
+ This is to allow renderers to report on issues regarding references
+ - |
+ The `MarkdownIt.__init__` signature is slightly different for updating options,
+ since you must always specify the config first, e.g.
+ use `MarkdownIt("commonmark", {"html": False})` instead of `MarkdownIt({"html": False})`
+ - The default configuration preset for `MarkdownIt` is "commonmark" not "default"
+ - Allow custom renderer to be passed to `MarkdownIt`
+ - |
+ change render method signatures
+ `func(tokens, idx, options, env, slf)` to
+ `func(self, tokens, idx, options, env)`
+ - |
+ Extensions add render methods by format
+ `MarkdownIt.add_render_rule(name, function, fmt="html")`,
+ rather than `MarkdownIt.renderer.rules[name] = function`
+ and renderers should declare a class property `__output__ = "html"`.
+ This allows for extensibility to more than just HTML renderers
+ - inline tokens in tables are assigned a map (this is helpful for propagation to children)
diff --git a/markdown_it/presets/__init__.py b/markdown_it/presets/__init__.py
new file mode 100644
index 0000000..16f10e5
--- /dev/null
+++ b/markdown_it/presets/__init__.py
@@ -0,0 +1,27 @@
+__all__ = ("commonmark", "default", "zero", "js_default", "gfm_like")
+
+from . import commonmark, default, zero
+
+js_default = default
+
+
+class gfm_like:
+ """GitHub Flavoured Markdown (GFM) like.
+
+ This adds the linkify, table and strikethrough components to CommmonMark.
+
+ Note, it lacks task-list items and raw HTML filtering,
+ to meet the the full GFM specification
+ (see https://github.github.com/gfm/#autolinks-extension-).
+ """
+
+ @staticmethod
+ def make():
+ config = commonmark.make()
+ config["components"]["core"]["rules"].append("linkify")
+ config["components"]["block"]["rules"].append("table")
+ config["components"]["inline"]["rules"].append("strikethrough")
+ config["components"]["inline"]["rules2"].append("strikethrough")
+ config["options"]["linkify"] = True
+ config["options"]["html"] = True
+ return config
diff --git a/markdown_it/presets/commonmark.py b/markdown_it/presets/commonmark.py
new file mode 100644
index 0000000..e44b66b
--- /dev/null
+++ b/markdown_it/presets/commonmark.py
@@ -0,0 +1,73 @@
+"""Commonmark default options.
+
+This differs to presets.default,
+primarily in that it allows HTML and does not enable components:
+
+- block: table
+- inline: strikethrough
+"""
+
+
+def make():
+ return {
+ "options": {
+ "maxNesting": 20, # Internal protection, recursion limit
+ "html": True, # Enable HTML tags in source,
+ # this is just a shorthand for .enable(["html_inline", "html_block"])
+ # used by the linkify rule:
+ "linkify": False, # autoconvert URL-like texts to links
+ # used by the replacements and smartquotes rules
+ # Enable some language-neutral replacements + quotes beautification
+ "typographer": False,
+ # used by the smartquotes rule:
+ # Double + single quotes replacement pairs, when typographer enabled,
+ # and smartquotes on. Could be either a String or an Array.
+ #
+ # For example, you can use '«»„“' for Russian, '„“‚‘' for German,
+ # and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
+ "quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
+ # Renderer specific; these options are used directly in the HTML renderer
+ "xhtmlOut": True, # Use '/' to close single tags (<br />)
+ "breaks": False, # Convert '\n' in paragraphs into <br>
+ "langPrefix": "language-", # CSS language prefix for fenced blocks
+ # Highlighter function. Should return escaped HTML,
+ # or '' if the source string is not changed and should be escaped externally.
+ # If result starts with <pre... internal wrapper is skipped.
+ #
+ # function (/*str, lang, attrs*/) { return ''; }
+ #
+ "highlight": None,
+ },
+ "components": {
+ "core": {"rules": ["normalize", "block", "inline"]},
+ "block": {
+ "rules": [
+ "blockquote",
+ "code",
+ "fence",
+ "heading",
+ "hr",
+ "html_block",
+ "lheading",
+ "list",
+ "reference",
+ "paragraph",
+ ]
+ },
+ "inline": {
+ "rules": [
+ "autolink",
+ "backticks",
+ "emphasis",
+ "entity",
+ "escape",
+ "html_inline",
+ "image",
+ "link",
+ "newline",
+ "text",
+ ],
+ "rules2": ["balance_pairs", "emphasis", "text_collapse"],
+ },
+ },
+ }
diff --git a/markdown_it/presets/default.py b/markdown_it/presets/default.py
new file mode 100644
index 0000000..59f4855
--- /dev/null
+++ b/markdown_it/presets/default.py
@@ -0,0 +1,34 @@
+"""markdown-it default options."""
+
+
+def make():
+ return {
+ "options": {
+ "maxNesting": 100, # Internal protection, recursion limit
+ "html": False, # Enable HTML tags in source
+ # this is just a shorthand for .disable(["html_inline", "html_block"])
+ # used by the linkify rule:
+ "linkify": False, # autoconvert URL-like texts to links
+ # used by the replacements and smartquotes rules:
+ # Enable some language-neutral replacements + quotes beautification
+ "typographer": False,
+ # used by the smartquotes rule:
+ # Double + single quotes replacement pairs, when typographer enabled,
+ # and smartquotes on. Could be either a String or an Array.
+ # For example, you can use '«»„“' for Russian, '„“‚‘' for German,
+ # and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
+ "quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
+ # Renderer specific; these options are used directly in the HTML renderer
+ "xhtmlOut": False, # Use '/' to close single tags (<br />)
+ "breaks": False, # Convert '\n' in paragraphs into <br>
+ "langPrefix": "language-", # CSS language prefix for fenced blocks
+ # Highlighter function. Should return escaped HTML,
+ # or '' if the source string is not changed and should be escaped externally.
+ # If result starts with <pre... internal wrapper is skipped.
+ #
+ # function (/*str, lang, attrs*/) { return ''; }
+ #
+ "highlight": None,
+ },
+ "components": {"core": {}, "block": {}, "inline": {}},
+ }
diff --git a/markdown_it/presets/zero.py b/markdown_it/presets/zero.py
new file mode 100644
index 0000000..af1d9c7
--- /dev/null
+++ b/markdown_it/presets/zero.py
@@ -0,0 +1,39 @@
+"""
+"Zero" preset, with nothing enabled. Useful for manual configuring of simple
+modes. For example, to parse bold/italic only.
+"""
+
+
+def make():
+ return {
+ "options": {
+ "maxNesting": 20, # Internal protection, recursion limit
+ "html": False, # Enable HTML tags in source
+ # this is just a shorthand for .disable(["html_inline", "html_block"])
+ # used by the linkify rule:
+ "linkify": False, # autoconvert URL-like texts to links
+ # used by the replacements and smartquotes rules:
+ # Enable some language-neutral replacements + quotes beautification
+ "typographer": False,
+ # used by the smartquotes rule:
+ # Double + single quotes replacement pairs, when typographer enabled,
+ # and smartquotes on. Could be either a String or an Array.
+ # For example, you can use '«»„“' for Russian, '„“‚‘' for German,
+ # and ['«\xA0', '\xA0»', '‹\xA0', '\xA0›'] for French (including nbsp).
+ "quotes": "\u201c\u201d\u2018\u2019", # /* “”‘’ */
+ # Renderer specific; these options are used directly in the HTML renderer
+ "xhtmlOut": False, # Use '/' to close single tags (<br />)
+ "breaks": False, # Convert '\n' in paragraphs into <br>
+ "langPrefix": "language-", # CSS language prefix for fenced blocks
+ # Highlighter function. Should return escaped HTML,
+ # or '' if the source string is not changed and should be escaped externally.
+ # If result starts with <pre... internal wrapper is skipped.
+ # function (/*str, lang, attrs*/) { return ''; }
+ "highlight": None,
+ },
+ "components": {
+ "core": {"rules": ["normalize", "block", "inline"]},
+ "block": {"rules": ["paragraph"]},
+ "inline": {"rules": ["text"], "rules2": ["balance_pairs", "text_collapse"]},
+ },
+ }
diff --git a/markdown_it/py.typed b/markdown_it/py.typed
new file mode 100644
index 0000000..7632ecf
--- /dev/null
+++ b/markdown_it/py.typed
@@ -0,0 +1 @@
+# Marker file for PEP 561
diff --git a/markdown_it/renderer.py b/markdown_it/renderer.py
new file mode 100644
index 0000000..b8bfe4d
--- /dev/null
+++ b/markdown_it/renderer.py
@@ -0,0 +1,339 @@
+"""
+class Renderer
+
+Generates HTML from parsed token stream. Each instance has independent
+copy of rules. Those can be rewritten with ease. Also, you can add new
+rules if you create plugin and adds new token types.
+"""
+from __future__ import annotations
+
+from collections.abc import MutableMapping, Sequence
+import inspect
+from typing import Any, ClassVar
+
+from .common.utils import escapeHtml, unescapeAll
+from .token import Token
+from .utils import OptionsDict
+
+try:
+ from typing import Protocol
+except ImportError: # Python <3.8 doesn't have `Protocol` in the stdlib
+ from typing_extensions import Protocol # type: ignore[misc]
+
+
+class RendererProtocol(Protocol):
+ __output__: ClassVar[str]
+
+ def render(
+ self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping
+ ) -> Any:
+ ...
+
+
+class RendererHTML(RendererProtocol):
+ """Contains render rules for tokens. Can be updated and extended.
+
+ Example:
+
+ Each rule is called as independent static function with fixed signature:
+
+ ::
+
+ class Renderer:
+ def token_type_name(self, tokens, idx, options, env) {
+ # ...
+ return renderedHTML
+
+ ::
+
+ class CustomRenderer(RendererHTML):
+ def strong_open(self, tokens, idx, options, env):
+ return '<b>'
+ def strong_close(self, tokens, idx, options, env):
+ return '</b>'
+
+ md = MarkdownIt(renderer_cls=CustomRenderer)
+
+ result = md.render(...)
+
+ See https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js
+ for more details and examples.
+ """
+
+ __output__ = "html"
+
+ def __init__(self, parser=None):
+ self.rules = {
+ k: v
+ for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
+ if not (k.startswith("render") or k.startswith("_"))
+ }
+
+ def render(
+ self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping
+ ) -> str:
+ """Takes token stream and generates HTML.
+
+ :param tokens: list on block tokens to render
+ :param options: params of parser instance
+ :param env: additional data from parsed input
+
+ """
+ result = ""
+
+ for i, token in enumerate(tokens):
+
+ if token.type == "inline":
+ assert token.children is not None
+ result += self.renderInline(token.children, options, env)
+ elif token.type in self.rules:
+ result += self.rules[token.type](tokens, i, options, env)
+ else:
+ result += self.renderToken(tokens, i, options, env)
+
+ return result
+
+ def renderInline(
+ self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping
+ ) -> str:
+ """The same as ``render``, but for single token of `inline` type.
+
+ :param tokens: list on block tokens to render
+ :param options: params of parser instance
+ :param env: additional data from parsed input (references, for example)
+ """
+ result = ""
+
+ for i, token in enumerate(tokens):
+ if token.type in self.rules:
+ result += self.rules[token.type](tokens, i, options, env)
+ else:
+ result += self.renderToken(tokens, i, options, env)
+
+ return result
+
+ def renderToken(
+ self,
+ tokens: Sequence[Token],
+ idx: int,
+ options: OptionsDict,
+ env: MutableMapping,
+ ) -> str:
+ """Default token renderer.
+
+ Can be overridden by custom function
+
+ :param idx: token index to render
+ :param options: params of parser instance
+ """
+ result = ""
+ needLf = False
+ token = tokens[idx]
+
+ # Tight list paragraphs
+ if token.hidden:
+ return ""
+
+ # Insert a newline between hidden paragraph and subsequent opening
+ # block-level tag.
+ #
+ # For example, here we should insert a newline before blockquote:
+ # - a
+ # >
+ #
+ if token.block and token.nesting != -1 and idx and tokens[idx - 1].hidden:
+ result += "\n"
+
+ # Add token name, e.g. `<img`
+ result += ("</" if token.nesting == -1 else "<") + token.tag
+
+ # Encode attributes, e.g. `<img src="foo"`
+ result += self.renderAttrs(token)
+
+ # Add a slash for self-closing tags, e.g. `<img src="foo" /`
+ if token.nesting == 0 and options["xhtmlOut"]:
+ result += " /"
+
+ # Check if we need to add a newline after this tag
+ if token.block:
+ needLf = True
+
+ if token.nesting == 1:
+ if idx + 1 < len(tokens):
+ nextToken = tokens[idx + 1]
+
+ if nextToken.type == "inline" or nextToken.hidden:
+ # Block-level tag containing an inline tag.
+ #
+ needLf = False
+
+ elif nextToken.nesting == -1 and nextToken.tag == token.tag:
+ # Opening tag + closing tag of the same type. E.g. `<li></li>`.
+ #
+ needLf = False
+
+ result += ">\n" if needLf else ">"
+
+ return result
+
+ @staticmethod
+ def renderAttrs(token: Token) -> str:
+ """Render token attributes to string."""
+ result = ""
+
+ for key, value in token.attrItems():
+ result += " " + escapeHtml(key) + '="' + escapeHtml(str(value)) + '"'
+
+ return result
+
+ def renderInlineAsText(
+ self,
+ tokens: Sequence[Token] | None,
+ options: OptionsDict,
+ env: MutableMapping,
+ ) -> str:
+ """Special kludge for image `alt` attributes to conform CommonMark spec.
+
+ Don't try to use it! Spec requires to show `alt` content with stripped markup,
+ instead of simple escaping.
+
+ :param tokens: list on block tokens to render
+ :param options: params of parser instance
+ :param env: additional data from parsed input
+ """
+ result = ""
+
+ for token in tokens or []:
+ if token.type == "text":
+ result += token.content
+ elif token.type == "image":
+ assert token.children is not None
+ result += self.renderInlineAsText(token.children, options, env)
+ elif token.type == "softbreak":
+ result += "\n"
+
+ return result
+
+ ###################################################
+
+ def code_inline(self, tokens: Sequence[Token], idx: int, options, env) -> str:
+ token = tokens[idx]
+ return (
+ "<code"
+ + self.renderAttrs(token)
+ + ">"
+ + escapeHtml(tokens[idx].content)
+ + "</code>"
+ )
+
+ def code_block(
+ self,
+ tokens: Sequence[Token],
+ idx: int,
+ options: OptionsDict,
+ env: MutableMapping,
+ ) -> str:
+ token = tokens[idx]
+
+ return (
+ "<pre"
+ + self.renderAttrs(token)
+ + "><code>"
+ + escapeHtml(tokens[idx].content)
+ + "</code></pre>\n"
+ )
+
+ def fence(
+ self,
+ tokens: Sequence[Token],
+ idx: int,
+ options: OptionsDict,
+ env: MutableMapping,
+ ) -> str:
+ token = tokens[idx]
+ info = unescapeAll(token.info).strip() if token.info else ""
+ langName = ""
+ langAttrs = ""
+
+ if info:
+ arr = info.split(maxsplit=1)
+ langName = arr[0]
+ if len(arr) == 2:
+ langAttrs = arr[1]
+
+ if options.highlight:
+ highlighted = options.highlight(
+ token.content, langName, langAttrs
+ ) or escapeHtml(token.content)
+ else:
+ highlighted = escapeHtml(token.content)
+
+ if highlighted.startswith("<pre"):
+ return highlighted + "\n"
+
+ # If language exists, inject class gently, without modifying original token.
+ # May be, one day we will add .deepClone() for token and simplify this part, but
+ # now we prefer to keep things local.
+ if info:
+ # Fake token just to render attributes
+ tmpToken = Token(type="", tag="", nesting=0, attrs=token.attrs.copy())
+ tmpToken.attrJoin("class", options.langPrefix + langName)
+
+ return (
+ "<pre><code"
+ + self.renderAttrs(tmpToken)
+ + ">"
+ + highlighted
+ + "</code></pre>\n"
+ )
+
+ return (
+ "<pre><code"
+ + self.renderAttrs(token)
+ + ">"
+ + highlighted
+ + "</code></pre>\n"
+ )
+
+ def image(
+ self,
+ tokens: Sequence[Token],
+ idx: int,
+ options: OptionsDict,
+ env: MutableMapping,
+ ) -> str:
+ token = tokens[idx]
+
+ # "alt" attr MUST be set, even if empty. Because it's mandatory and
+ # should be placed on proper position for tests.
+
+ assert (
+ token.attrs and "alt" in token.attrs
+ ), '"image" token\'s attrs must contain `alt`'
+
+ # Replace content with actual value
+
+ token.attrSet("alt", self.renderInlineAsText(token.children, options, env))
+
+ return self.renderToken(tokens, idx, options, env)
+
+ def hardbreak(
+ self, tokens: Sequence[Token], idx: int, options: OptionsDict, *args
+ ) -> str:
+ return "<br />\n" if options.xhtmlOut else "<br>\n"
+
+ def softbreak(
+ self, tokens: Sequence[Token], idx: int, options: OptionsDict, *args
+ ) -> str:
+ return (
+ ("<br />\n" if options.xhtmlOut else "<br>\n") if options.breaks else "\n"
+ )
+
+ def text(self, tokens: Sequence[Token], idx: int, *args) -> str:
+ return escapeHtml(tokens[idx].content)
+
+ def html_block(self, tokens: Sequence[Token], idx: int, *args) -> str:
+ return tokens[idx].content
+
+ def html_inline(self, tokens: Sequence[Token], idx: int, *args) -> str:
+ return tokens[idx].content
diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py
new file mode 100644
index 0000000..11b937a
--- /dev/null
+++ b/markdown_it/ruler.py
@@ -0,0 +1,237 @@
+"""
+class Ruler
+
+Helper class, used by [[MarkdownIt#core]], [[MarkdownIt#block]] and
+[[MarkdownIt#inline]] to manage sequences of functions (rules):
+
+- keep rules in defined order
+- assign the name to each rule
+- enable/disable rules
+- add/replace rules
+- allow assign rules to additional named chains (in the same)
+- caching lists of active rules
+
+You will not need use this class directly until write plugins. For simple
+rules control use [[MarkdownIt.disable]], [[MarkdownIt.enable]] and
+[[MarkdownIt.use]].
+"""
+from __future__ import annotations
+
+from collections.abc import Callable, Iterable, MutableMapping
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING
+
+from markdown_it._compat import DATACLASS_KWARGS
+
+if TYPE_CHECKING:
+ from markdown_it import MarkdownIt
+
+
+class StateBase:
+ srcCharCode: tuple[int, ...]
+
+ def __init__(self, src: str, md: MarkdownIt, env: MutableMapping):
+ self.src = src
+ self.env = env
+ self.md = md
+
+ @property
+ def src(self) -> str:
+ return self._src
+
+ @src.setter
+ def src(self, value: str) -> None:
+ self._src = value
+ self.srcCharCode = tuple(ord(c) for c in self.src)
+
+
+# The first positional arg is always a subtype of `StateBase`. Other
+# arguments may or may not exist, based on the rule's type (block,
+# core, inline). Return type is either `None` or `bool` based on the
+# rule's type.
+RuleFunc = Callable
+
+
+@dataclass(**DATACLASS_KWARGS)
+class Rule:
+ name: str
+ enabled: bool
+ fn: RuleFunc = field(repr=False)
+ alt: list[str]
+
+
+class Ruler:
+ def __init__(self):
+ # List of added rules.
+ self.__rules__: list[Rule] = []
+ # Cached rule chains.
+ # First level - chain name, '' for default.
+ # Second level - diginal anchor for fast filtering by charcodes.
+ self.__cache__: dict[str, list[RuleFunc]] | None = None
+
+ def __find__(self, name: str) -> int:
+ """Find rule index by name"""
+ for i, rule in enumerate(self.__rules__):
+ if rule.name == name:
+ return i
+ return -1
+
+ def __compile__(self) -> None:
+ """Build rules lookup cache"""
+ chains = {""}
+ # collect unique names
+ for rule in self.__rules__:
+ if not rule.enabled:
+ continue
+ for name in rule.alt:
+ chains.add(name)
+ self.__cache__ = {}
+ for chain in chains:
+ self.__cache__[chain] = []
+ for rule in self.__rules__:
+ if not rule.enabled:
+ continue
+ if chain and (chain not in rule.alt):
+ continue
+ self.__cache__[chain].append(rule.fn)
+
+ def at(self, ruleName: str, fn: RuleFunc, options=None):
+ """Replace rule by name with new function & options.
+
+ :param ruleName: rule name to replace.
+ :param fn: new rule function.
+ :param options: new rule options (not mandatory).
+ :raises: KeyError if name not found
+ """
+ index = self.__find__(ruleName)
+ options = options or {}
+ if index == -1:
+ raise KeyError(f"Parser rule not found: {ruleName}")
+ self.__rules__[index].fn = fn
+ self.__rules__[index].alt = options.get("alt", [])
+ self.__cache__ = None
+
+ def before(self, beforeName: str, ruleName: str, fn: RuleFunc, options=None):
+ """Add new rule to chain before one with given name.
+
+ :param beforeName: new rule will be added before this one.
+ :param ruleName: new rule will be added before this one.
+ :param fn: new rule function.
+ :param options: new rule options (not mandatory).
+ :raises: KeyError if name not found
+ """
+ index = self.__find__(beforeName)
+ options = options or {}
+ if index == -1:
+ raise KeyError(f"Parser rule not found: {beforeName}")
+ self.__rules__.insert(index, Rule(ruleName, True, fn, options.get("alt", [])))
+ self.__cache__ = None
+
+ def after(self, afterName: str, ruleName: str, fn: RuleFunc, options=None):
+ """Add new rule to chain after one with given name.
+
+ :param afterName: new rule will be added after this one.
+ :param ruleName: new rule will be added after this one.
+ :param fn: new rule function.
+ :param options: new rule options (not mandatory).
+ :raises: KeyError if name not found
+ """
+ index = self.__find__(afterName)
+ options = options or {}
+ if index == -1:
+ raise KeyError(f"Parser rule not found: {afterName}")
+ self.__rules__.insert(
+ index + 1, Rule(ruleName, True, fn, options.get("alt", []))
+ )
+ self.__cache__ = None
+
+ def push(self, ruleName: str, fn: RuleFunc, options=None):
+ """Push new rule to the end of chain.
+
+ :param ruleName: new rule will be added to the end of chain.
+ :param fn: new rule function.
+ :param options: new rule options (not mandatory).
+
+ """
+ self.__rules__.append(Rule(ruleName, True, fn, (options or {}).get("alt", [])))
+ self.__cache__ = None
+
+ def enable(self, names: str | Iterable[str], ignoreInvalid: bool = False):
+ """Enable rules with given names.
+
+ :param names: name or list of rule names to enable.
+ :param ignoreInvalid: ignore errors when rule not found
+ :raises: KeyError if name not found and not ignoreInvalid
+ :return: list of found rule names
+ """
+ if isinstance(names, str):
+ names = [names]
+ result = []
+ for name in names:
+ idx = self.__find__(name)
+ if (idx < 0) and ignoreInvalid:
+ continue
+ if (idx < 0) and not ignoreInvalid:
+ raise KeyError(f"Rules manager: invalid rule name {name}")
+ self.__rules__[idx].enabled = True
+ result.append(name)
+ self.__cache__ = None
+ return result
+
+ def enableOnly(self, names: str | Iterable[str], ignoreInvalid: bool = False):
+ """Enable rules with given names, and disable everything else.
+
+ :param names: name or list of rule names to enable.
+ :param ignoreInvalid: ignore errors when rule not found
+ :raises: KeyError if name not found and not ignoreInvalid
+ :return: list of found rule names
+ """
+ if isinstance(names, str):
+ names = [names]
+ for rule in self.__rules__:
+ rule.enabled = False
+ self.enable(names, ignoreInvalid)
+
+ def disable(self, names: str | Iterable[str], ignoreInvalid: bool = False):
+ """Disable rules with given names.
+
+ :param names: name or list of rule names to enable.
+ :param ignoreInvalid: ignore errors when rule not found
+ :raises: KeyError if name not found and not ignoreInvalid
+ :return: list of found rule names
+ """
+ if isinstance(names, str):
+ names = [names]
+ result = []
+ for name in names:
+ idx = self.__find__(name)
+ if (idx < 0) and ignoreInvalid:
+ continue
+ if (idx < 0) and not ignoreInvalid:
+ raise KeyError(f"Rules manager: invalid rule name {name}")
+ self.__rules__[idx].enabled = False
+ result.append(name)
+ self.__cache__ = None
+ return result
+
+ def getRules(self, chainName: str) -> list[RuleFunc]:
+ """Return array of active functions (rules) for given chain name.
+ It analyzes rules configuration, compiles caches if not exists and returns result.
+
+ Default chain name is `''` (empty string). It can't be skipped.
+ That's done intentionally, to keep signature monomorphic for high speed.
+
+ """
+ if self.__cache__ is None:
+ self.__compile__()
+ assert self.__cache__ is not None
+ # Chain can be empty, if rules disabled. But we still have to return Array.
+ return self.__cache__.get(chainName, []) or []
+
+ def get_all_rules(self) -> list[str]:
+ """Return all available rule names."""
+ return [r.name for r in self.__rules__]
+
+ def get_active_rules(self) -> list[str]:
+ """Return the active rule names."""
+ return [r.name for r in self.__rules__ if r.enabled]
diff --git a/markdown_it/rules_block/__init__.py b/markdown_it/rules_block/__init__.py
new file mode 100644
index 0000000..bcf138d
--- /dev/null
+++ b/markdown_it/rules_block/__init__.py
@@ -0,0 +1,27 @@
+__all__ = (
+ "StateBlock",
+ "paragraph",
+ "heading",
+ "lheading",
+ "code",
+ "fence",
+ "hr",
+ "list_block",
+ "reference",
+ "blockquote",
+ "html_block",
+ "table",
+)
+
+from .blockquote import blockquote
+from .code import code
+from .fence import fence
+from .heading import heading
+from .hr import hr
+from .html_block import html_block
+from .lheading import lheading
+from .list import list_block
+from .paragraph import paragraph
+from .reference import reference
+from .state_block import StateBlock
+from .table import table
diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py
new file mode 100644
index 0000000..6575731
--- /dev/null
+++ b/markdown_it/rules_block/blockquote.py
@@ -0,0 +1,299 @@
+# Block quotes
+from __future__ import annotations
+
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug(
+ "entering blockquote: %s, %s, %s, %s", state, startLine, endLine, silent
+ )
+
+ oldLineMax = state.lineMax
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ max = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if (state.sCount[startLine] - state.blkIndent) >= 4:
+ return False
+
+ # check the block quote marker
+ if state.srcCharCode[pos] != 0x3E: # /* > */
+ return False
+ pos += 1
+
+ # we know that it's going to be a valid blockquote,
+ # so no point trying to find the end of it in silent mode
+ if silent:
+ return True
+
+ # set offset past spaces and ">"
+ initial = offset = state.sCount[startLine] + 1
+
+ try:
+ second_char_code: int | None = state.srcCharCode[pos]
+ except IndexError:
+ second_char_code = None
+
+ # skip one optional space after '>'
+ if second_char_code == 0x20: # /* space */
+ # ' > test '
+ # ^ -- position start of line here:
+ pos += 1
+ initial += 1
+ offset += 1
+ adjustTab = False
+ spaceAfterMarker = True
+ elif second_char_code == 0x09: # /* tab */
+ spaceAfterMarker = True
+
+ if (state.bsCount[startLine] + offset) % 4 == 3:
+ # ' >\t test '
+ # ^ -- position start of line here (tab has width==1)
+ pos += 1
+ initial += 1
+ offset += 1
+ adjustTab = False
+ else:
+ # ' >\t test '
+ # ^ -- position start of line here + shift bsCount slightly
+ # to make extra space appear
+ adjustTab = True
+
+ else:
+ spaceAfterMarker = False
+
+ oldBMarks = [state.bMarks[startLine]]
+ state.bMarks[startLine] = pos
+
+ while pos < max:
+ ch = state.srcCharCode[pos]
+
+ if isSpace(ch):
+ if ch == 0x09: # / tab /
+ offset += (
+ 4
+ - (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
+ )
+ else:
+ offset += 1
+
+ else:
+ break
+
+ pos += 1
+
+ oldBSCount = [state.bsCount[startLine]]
+ state.bsCount[startLine] = (
+ state.sCount[startLine] + 1 + (1 if spaceAfterMarker else 0)
+ )
+
+ lastLineEmpty = pos >= max
+
+ oldSCount = [state.sCount[startLine]]
+ state.sCount[startLine] = offset - initial
+
+ oldTShift = [state.tShift[startLine]]
+ state.tShift[startLine] = pos - state.bMarks[startLine]
+
+ terminatorRules = state.md.block.ruler.getRules("blockquote")
+
+ oldParentType = state.parentType
+ state.parentType = "blockquote"
+
+ # Search the end of the block
+ #
+ # Block ends with either:
+ # 1. an empty line outside:
+ # ```
+ # > test
+ #
+ # ```
+ # 2. an empty line inside:
+ # ```
+ # >
+ # test
+ # ```
+ # 3. another tag:
+ # ```
+ # > test
+ # - - -
+ # ```
+
+ # for (nextLine = startLine + 1; nextLine < endLine; nextLine++) {
+ nextLine = startLine + 1
+ while nextLine < endLine:
+
+ # check if it's outdented, i.e. it's inside list item and indented
+ # less than said list item:
+ #
+ # ```
+ # 1. anything
+ # > current blockquote
+ # 2. checking this line
+ # ```
+ isOutdented = state.sCount[nextLine] < state.blkIndent
+
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ max = state.eMarks[nextLine]
+
+ if pos >= max:
+ # Case 1: line is not inside the blockquote, and this line is empty.
+ break
+
+ evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */
+ pos += 1
+ if evaluatesTrue:
+ # This line is inside the blockquote.
+
+ # set offset past spaces and ">"
+ initial = offset = state.sCount[nextLine] + 1
+
+ try:
+ next_char: int | None = state.srcCharCode[pos]
+ except IndexError:
+ next_char = None
+
+ # skip one optional space after '>'
+ if next_char == 0x20: # /* space */
+ # ' > test '
+ # ^ -- position start of line here:
+ pos += 1
+ initial += 1
+ offset += 1
+ adjustTab = False
+ spaceAfterMarker = True
+ elif next_char == 0x09: # /* tab */
+ spaceAfterMarker = True
+
+ if (state.bsCount[nextLine] + offset) % 4 == 3:
+ # ' >\t test '
+ # ^ -- position start of line here (tab has width==1)
+ pos += 1
+ initial += 1
+ offset += 1
+ adjustTab = False
+ else:
+ # ' >\t test '
+ # ^ -- position start of line here + shift bsCount slightly
+ # to make extra space appear
+ adjustTab = True
+
+ else:
+ spaceAfterMarker = False
+
+ oldBMarks.append(state.bMarks[nextLine])
+ state.bMarks[nextLine] = pos
+
+ while pos < max:
+ ch = state.srcCharCode[pos]
+
+ if isSpace(ch):
+ if ch == 0x09:
+ offset += (
+ 4
+ - (
+ offset
+ + state.bsCount[nextLine]
+ + (1 if adjustTab else 0)
+ )
+ % 4
+ )
+ else:
+ offset += 1
+ else:
+ break
+
+ pos += 1
+
+ lastLineEmpty = pos >= max
+
+ oldBSCount.append(state.bsCount[nextLine])
+ state.bsCount[nextLine] = (
+ state.sCount[nextLine] + 1 + (1 if spaceAfterMarker else 0)
+ )
+
+ oldSCount.append(state.sCount[nextLine])
+ state.sCount[nextLine] = offset - initial
+
+ oldTShift.append(state.tShift[nextLine])
+ state.tShift[nextLine] = pos - state.bMarks[nextLine]
+
+ nextLine += 1
+ continue
+
+ # Case 2: line is not inside the blockquote, and the last line was empty.
+ if lastLineEmpty:
+ break
+
+ # Case 3: another tag found.
+ terminate = False
+
+ for terminatorRule in terminatorRules:
+ if terminatorRule(state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ # Quirk to enforce "hard termination mode" for paragraphs;
+ # normally if you call `tokenize(state, startLine, nextLine)`,
+ # paragraphs will look below nextLine for paragraph continuation,
+ # but if blockquote is terminated by another tag, they shouldn't
+ state.lineMax = nextLine
+
+ if state.blkIndent != 0:
+ # state.blkIndent was non-zero, we now set it to zero,
+ # so we need to re-calculate all offsets to appear as
+ # if indent wasn't changed
+ oldBMarks.append(state.bMarks[nextLine])
+ oldBSCount.append(state.bsCount[nextLine])
+ oldTShift.append(state.tShift[nextLine])
+ oldSCount.append(state.sCount[nextLine])
+ state.sCount[nextLine] -= state.blkIndent
+
+ break
+
+ oldBMarks.append(state.bMarks[nextLine])
+ oldBSCount.append(state.bsCount[nextLine])
+ oldTShift.append(state.tShift[nextLine])
+ oldSCount.append(state.sCount[nextLine])
+
+ # A negative indentation means that this is a paragraph continuation
+ #
+ state.sCount[nextLine] = -1
+
+ nextLine += 1
+
+ oldIndent = state.blkIndent
+ state.blkIndent = 0
+
+ token = state.push("blockquote_open", "blockquote", 1)
+ token.markup = ">"
+ token.map = lines = [startLine, 0]
+
+ state.md.block.tokenize(state, startLine, nextLine)
+
+ token = state.push("blockquote_close", "blockquote", -1)
+ token.markup = ">"
+
+ state.lineMax = oldLineMax
+ state.parentType = oldParentType
+ lines[1] = state.line
+
+ # Restore original tShift; this might not be necessary since the parser
+ # has already been here, but just to make sure we can do that.
+ for i, item in enumerate(oldTShift):
+ state.bMarks[i + startLine] = oldBMarks[i]
+ state.tShift[i + startLine] = item
+ state.sCount[i + startLine] = oldSCount[i]
+ state.bsCount[i + startLine] = oldBSCount[i]
+
+ state.blkIndent = oldIndent
+
+ return True
diff --git a/markdown_it/rules_block/code.py b/markdown_it/rules_block/code.py
new file mode 100644
index 0000000..c4fdba3
--- /dev/null
+++ b/markdown_it/rules_block/code.py
@@ -0,0 +1,36 @@
+"""Code block (4 spaces padded)."""
+import logging
+
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def code(state: StateBlock, startLine: int, endLine: int, silent: bool = False):
+
+ LOGGER.debug("entering code: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ if state.sCount[startLine] - state.blkIndent < 4:
+ return False
+
+ last = nextLine = startLine + 1
+
+ while nextLine < endLine:
+ if state.isEmpty(nextLine):
+ nextLine += 1
+ continue
+
+ if state.sCount[nextLine] - state.blkIndent >= 4:
+ nextLine += 1
+ last = nextLine
+ continue
+
+ break
+
+ state.line = last
+
+ token = state.push("code_block", "code", 0)
+ token.content = state.getLines(startLine, last, 4 + state.blkIndent, False) + "\n"
+ token.map = [startLine, state.line]
+
+ return True
diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py
new file mode 100644
index 0000000..c4f5275
--- /dev/null
+++ b/markdown_it/rules_block/fence.py
@@ -0,0 +1,104 @@
+# fences (``` lang, ~~~ lang)
+import logging
+
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug("entering fence: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ haveEndMarker = False
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ if pos + 3 > maximum:
+ return False
+
+ marker = state.srcCharCode[pos]
+
+ # /* ~ */ /* ` */
+ if marker != 0x7E and marker != 0x60:
+ return False
+
+ # scan marker length
+ mem = pos
+ pos = state.skipChars(pos, marker)
+
+ length = pos - mem
+
+ if length < 3:
+ return False
+
+ markup = state.src[mem:pos]
+ params = state.src[pos:maximum]
+
+ # /* ` */
+ if marker == 0x60:
+ if chr(marker) in params:
+ return False
+
+ # Since start is found, we can report success here in validation mode
+ if silent:
+ return True
+
+ # search end of block
+ nextLine = startLine
+
+ while True:
+ nextLine += 1
+ if nextLine >= endLine:
+ # unclosed block should be autoclosed by end of document.
+ # also block seems to be autoclosed by end of parent
+ break
+
+ pos = mem = state.bMarks[nextLine] + state.tShift[nextLine]
+ maximum = state.eMarks[nextLine]
+
+ if pos < maximum and state.sCount[nextLine] < state.blkIndent:
+ # non-empty line with negative indent should stop the list:
+ # - ```
+ # test
+ break
+
+ if state.srcCharCode[pos] != marker:
+ continue
+
+ if state.sCount[nextLine] - state.blkIndent >= 4:
+ # closing fence should be indented less than 4 spaces
+ continue
+
+ pos = state.skipChars(pos, marker)
+
+ # closing code fence must be at least as long as the opening one
+ if pos - mem < length:
+ continue
+
+ # make sure tail has spaces only
+ pos = state.skipSpaces(pos)
+
+ if pos < maximum:
+ continue
+
+ haveEndMarker = True
+ # found!
+ break
+
+ # If a fence has heading spaces, they should be removed from its inner block
+ length = state.sCount[startLine]
+
+ state.line = nextLine + (1 if haveEndMarker else 0)
+
+ token = state.push("fence", "code", 0)
+ token.info = params
+ token.content = state.getLines(startLine + 1, nextLine, length, True)
+ token.markup = markup
+ token.map = [startLine, state.line]
+
+ return True
diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py
new file mode 100644
index 0000000..8d4ef3e
--- /dev/null
+++ b/markdown_it/rules_block/heading.py
@@ -0,0 +1,72 @@
+""" Atex heading (#, ##, ...) """
+from __future__ import annotations
+
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug("entering heading: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ ch: int | None = state.srcCharCode[pos]
+
+ # /* # */
+ if ch != 0x23 or pos >= maximum:
+ return False
+
+ # count heading level
+ level = 1
+ pos += 1
+ try:
+ ch = state.srcCharCode[pos]
+ except IndexError:
+ ch = None
+ # /* # */
+ while ch == 0x23 and pos < maximum and level <= 6:
+ level += 1
+ pos += 1
+ try:
+ ch = state.srcCharCode[pos]
+ except IndexError:
+ ch = None
+
+ if level > 6 or (pos < maximum and not isSpace(ch)):
+ return False
+
+ if silent:
+ return True
+
+ # Let's cut tails like ' ### ' from the end of string
+
+ maximum = state.skipSpacesBack(maximum, pos)
+ tmp = state.skipCharsBack(maximum, 0x23, pos) # #
+ if tmp > pos and isSpace(state.srcCharCode[tmp - 1]):
+ maximum = tmp
+
+ state.line = startLine + 1
+
+ token = state.push("heading_open", "h" + str(level), 1)
+ token.markup = "########"[:level]
+ token.map = [startLine, state.line]
+
+ token = state.push("inline", "", 0)
+ token.content = state.src[pos:maximum].strip()
+ token.map = [startLine, state.line]
+ token.children = []
+
+ token = state.push("heading_close", "h" + str(level), -1)
+ token.markup = "########"[:level]
+
+ return True
diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py
new file mode 100644
index 0000000..804cd9d
--- /dev/null
+++ b/markdown_it/rules_block/hr.py
@@ -0,0 +1,54 @@
+"""Horizontal rule
+
+At least 3 of these characters on a line * - _
+"""
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def hr(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug("entering hr: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ marker = state.srcCharCode[pos]
+ pos += 1
+
+ # Check hr marker: /* * */ /* - */ /* _ */
+ if marker != 0x2A and marker != 0x2D and marker != 0x5F:
+ return False
+
+ # markers can be mixed with spaces, but there should be at least 3 of them
+
+ cnt = 1
+ while pos < maximum:
+ ch = state.srcCharCode[pos]
+ pos += 1
+ if ch != marker and not isSpace(ch):
+ return False
+ if ch == marker:
+ cnt += 1
+
+ if cnt < 3:
+ return False
+
+ if silent:
+ return True
+
+ state.line = startLine + 1
+
+ token = state.push("hr", "hr", 0)
+ token.map = [startLine, state.line]
+ token.markup = chr(marker) * (cnt + 1)
+
+ return True
diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py
new file mode 100644
index 0000000..31afab7
--- /dev/null
+++ b/markdown_it/rules_block/html_block.py
@@ -0,0 +1,91 @@
+# HTML block
+from __future__ import annotations
+
+import logging
+import re
+
+from ..common.html_blocks import block_names
+from ..common.html_re import HTML_OPEN_CLOSE_TAG_STR
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+# An array of opening and corresponding closing sequences for html tags,
+# last argument defines whether it can terminate a paragraph or not
+HTML_SEQUENCES: list[tuple[re.Pattern, re.Pattern, bool]] = [
+ (
+ re.compile(r"^<(script|pre|style|textarea)(?=(\s|>|$))", re.IGNORECASE),
+ re.compile(r"<\/(script|pre|style|textarea)>", re.IGNORECASE),
+ True,
+ ),
+ (re.compile(r"^<!--"), re.compile(r"-->"), True),
+ (re.compile(r"^<\?"), re.compile(r"\?>"), True),
+ (re.compile(r"^<![A-Z]"), re.compile(r">"), True),
+ (re.compile(r"^<!\[CDATA\["), re.compile(r"\]\]>"), True),
+ (
+ re.compile("^</?(" + "|".join(block_names) + ")(?=(\\s|/?>|$))", re.IGNORECASE),
+ re.compile(r"^$"),
+ True,
+ ),
+ (re.compile(HTML_OPEN_CLOSE_TAG_STR + "\\s*$"), re.compile(r"^$"), False),
+]
+
+
+def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
+ LOGGER.debug(
+ "entering html_block: %s, %s, %s, %s", state, startLine, endLine, silent
+ )
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ if not state.md.options.get("html", None):
+ return False
+
+ if state.srcCharCode[pos] != 0x3C: # /* < */
+ return False
+
+ lineText = state.src[pos:maximum]
+
+ html_seq = None
+ for HTML_SEQUENCE in HTML_SEQUENCES:
+ if HTML_SEQUENCE[0].search(lineText):
+ html_seq = HTML_SEQUENCE
+ break
+
+ if not html_seq:
+ return False
+
+ if silent:
+ # true if this sequence can be a terminator, false otherwise
+ return html_seq[2]
+
+ nextLine = startLine + 1
+
+ # If we are here - we detected HTML block.
+ # Let's roll down till block end.
+ if not html_seq[1].search(lineText):
+ while nextLine < endLine:
+ if state.sCount[nextLine] < state.blkIndent:
+ break
+
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ maximum = state.eMarks[nextLine]
+ lineText = state.src[pos:maximum]
+
+ if html_seq[1].search(lineText):
+ if len(lineText) != 0:
+ nextLine += 1
+ break
+ nextLine += 1
+
+ state.line = nextLine
+
+ token = state.push("html_block", "", 0)
+ token.map = [startLine, nextLine]
+ token.content = state.getLines(startLine, nextLine, state.blkIndent, True)
+
+ return True
diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py
new file mode 100644
index 0000000..f26e2af
--- /dev/null
+++ b/markdown_it/rules_block/lheading.py
@@ -0,0 +1,90 @@
+# lheading (---, ==)
+import logging
+
+from ..ruler import Ruler
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug("entering lheading: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ level = None
+ nextLine = startLine + 1
+ ruler: Ruler = state.md.block.ruler
+ terminatorRules = ruler.getRules("paragraph")
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ oldParentType = state.parentType
+ state.parentType = "paragraph" # use paragraph to match terminatorRules
+
+ # jump line-by-line until empty one or EOF
+ while nextLine < endLine and not state.isEmpty(nextLine):
+ # this would be a code block normally, but after paragraph
+ # it's considered a lazy continuation regardless of what's there
+ if state.sCount[nextLine] - state.blkIndent > 3:
+ nextLine += 1
+ continue
+
+ # Check for underline in setext header
+ if state.sCount[nextLine] >= state.blkIndent:
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ maximum = state.eMarks[nextLine]
+
+ if pos < maximum:
+ marker = state.srcCharCode[pos]
+
+ # /* - */ /* = */
+ if marker == 0x2D or marker == 0x3D:
+ pos = state.skipChars(pos, marker)
+ pos = state.skipSpaces(pos)
+
+ # /* = */
+ if pos >= maximum:
+ level = 1 if marker == 0x3D else 2
+ break
+
+ # quirk for blockquotes, this line should already be checked by that rule
+ if state.sCount[nextLine] < 0:
+ nextLine += 1
+ continue
+
+ # Some tags can terminate paragraph without empty line.
+ terminate = False
+ for terminatorRule in terminatorRules:
+ if terminatorRule(state, nextLine, endLine, True):
+ terminate = True
+ break
+ if terminate:
+ break
+
+ nextLine += 1
+
+ if not level:
+ # Didn't find valid underline
+ return False
+
+ content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
+
+ state.line = nextLine + 1
+
+ token = state.push("heading_open", "h" + str(level), 1)
+ token.markup = chr(marker)
+ token.map = [startLine, state.line]
+
+ token = state.push("inline", "", 0)
+ token.content = content
+ token.map = [startLine, state.line - 1]
+ token.children = []
+
+ token = state.push("heading_close", "h" + str(level), -1)
+ token.markup = chr(marker)
+
+ state.parentType = oldParentType
+
+ return True
diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py
new file mode 100644
index 0000000..a7617ad
--- /dev/null
+++ b/markdown_it/rules_block/list.py
@@ -0,0 +1,344 @@
+# Lists
+import logging
+
+from ..common.utils import isSpace
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+# Search `[-+*][\n ]`, returns next pos after marker on success
+# or -1 on fail.
+def skipBulletListMarker(state: StateBlock, startLine: int):
+
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+
+ marker = state.srcCharCode[pos]
+ pos += 1
+ # Check bullet /* * */ /* - */ /* + */
+ if marker != 0x2A and marker != 0x2D and marker != 0x2B:
+ return -1
+
+ if pos < maximum:
+ ch = state.srcCharCode[pos]
+
+ if not isSpace(ch):
+ # " -test " - is not a list item
+ return -1
+
+ return pos
+
+
+# Search `\d+[.)][\n ]`, returns next pos after marker on success
+# or -1 on fail.
+def skipOrderedListMarker(state: StateBlock, startLine: int):
+
+ start = state.bMarks[startLine] + state.tShift[startLine]
+ pos = start
+ maximum = state.eMarks[startLine]
+
+ # List marker should have at least 2 chars (digit + dot)
+ if pos + 1 >= maximum:
+ return -1
+
+ ch = state.srcCharCode[pos]
+ pos += 1
+
+ # /* 0 */ /* 9 */
+ if ch < 0x30 or ch > 0x39:
+ return -1
+
+ while True:
+ # EOL -> fail
+ if pos >= maximum:
+ return -1
+
+ ch = state.srcCharCode[pos]
+ pos += 1
+
+ # /* 0 */ /* 9 */
+ if ch >= 0x30 and ch <= 0x39:
+
+ # List marker should have no more than 9 digits
+ # (prevents integer overflow in browsers)
+ if pos - start >= 10:
+ return -1
+
+ continue
+
+ # found valid marker: /* ) */ /* . */
+ if ch == 0x29 or ch == 0x2E:
+ break
+
+ return -1
+
+ if pos < maximum:
+ ch = state.srcCharCode[pos]
+
+ if not isSpace(ch):
+ # " 1.test " - is not a list item
+ return -1
+
+ return pos
+
+
+def markTightParagraphs(state: StateBlock, idx: int):
+ level = state.level + 2
+
+ i = idx + 2
+ length = len(state.tokens) - 2
+ while i < length:
+ if state.tokens[i].level == level and state.tokens[i].type == "paragraph_open":
+ state.tokens[i + 2].hidden = True
+ state.tokens[i].hidden = True
+ i += 2
+ i += 1
+
+
+def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
+
+ LOGGER.debug("entering list: %s, %s, %s, %s", state, startLine, endLine, silent)
+
+ isTerminatingParagraph = False
+ tight = True
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ # Special case:
+ # - item 1
+ # - item 2
+ # - item 3
+ # - item 4
+ # - this one is a paragraph continuation
+ if (
+ state.listIndent >= 0
+ and state.sCount[startLine] - state.listIndent >= 4
+ and state.sCount[startLine] < state.blkIndent
+ ):
+ return False
+
+ # limit conditions when list can interrupt
+ # a paragraph (validation mode only)
+ if silent and state.parentType == "paragraph":
+ # Next list item should still terminate previous list item
+ #
+ # This code can fail if plugins use blkIndent as well as lists,
+ # but I hope the spec gets fixed long before that happens.
+ #
+ if state.tShift[startLine] >= state.blkIndent:
+ isTerminatingParagraph = True
+
+ # Detect list type and position after marker
+ posAfterMarker = skipOrderedListMarker(state, startLine)
+ if posAfterMarker >= 0:
+ isOrdered = True
+ start = state.bMarks[startLine] + state.tShift[startLine]
+ markerValue = int(state.src[start : posAfterMarker - 1])
+
+ # If we're starting a new ordered list right after
+ # a paragraph, it should start with 1.
+ if isTerminatingParagraph and markerValue != 1:
+ return False
+ else:
+ posAfterMarker = skipBulletListMarker(state, startLine)
+ if posAfterMarker >= 0:
+ isOrdered = False
+ else:
+ return False
+
+ # If we're starting a new unordered list right after
+ # a paragraph, first line should not be empty.
+ if isTerminatingParagraph:
+ if state.skipSpaces(posAfterMarker) >= state.eMarks[startLine]:
+ return False
+
+ # We should terminate list on style change. Remember first one to compare.
+ markerCharCode = state.srcCharCode[posAfterMarker - 1]
+
+ # For validation mode we can terminate immediately
+ if silent:
+ return True
+
+ # Start list
+ listTokIdx = len(state.tokens)
+
+ if isOrdered:
+ token = state.push("ordered_list_open", "ol", 1)
+ if markerValue != 1:
+ token.attrs = {"start": markerValue}
+
+ else:
+ token = state.push("bullet_list_open", "ul", 1)
+
+ token.map = listLines = [startLine, 0]
+ token.markup = chr(markerCharCode)
+
+ #
+ # Iterate list items
+ #
+
+ nextLine = startLine
+ prevEmptyEnd = False
+ terminatorRules = state.md.block.ruler.getRules("list")
+
+ oldParentType = state.parentType
+ state.parentType = "list"
+
+ while nextLine < endLine:
+ pos = posAfterMarker
+ maximum = state.eMarks[nextLine]
+
+ initial = offset = (
+ state.sCount[nextLine]
+ + posAfterMarker
+ - (state.bMarks[startLine] + state.tShift[startLine])
+ )
+
+ while pos < maximum:
+ ch = state.srcCharCode[pos]
+
+ if ch == 0x09: # \t
+ offset += 4 - (offset + state.bsCount[nextLine]) % 4
+ elif ch == 0x20: # \s
+ offset += 1
+ else:
+ break
+
+ pos += 1
+
+ contentStart = pos
+
+ if contentStart >= maximum:
+ # trimming space in "- \n 3" case, indent is 1 here
+ indentAfterMarker = 1
+ else:
+ indentAfterMarker = offset - initial
+
+ # If we have more than 4 spaces, the indent is 1
+ # (the rest is just indented code block)
+ if indentAfterMarker > 4:
+ indentAfterMarker = 1
+
+ # " - test"
+ # ^^^^^ - calculating total length of this thing
+ indent = initial + indentAfterMarker
+
+ # Run subparser & write tokens
+ token = state.push("list_item_open", "li", 1)
+ token.markup = chr(markerCharCode)
+ token.map = itemLines = [startLine, 0]
+ if isOrdered:
+ token.info = state.src[start : posAfterMarker - 1]
+
+ # change current state, then restore it after parser subcall
+ oldTight = state.tight
+ oldTShift = state.tShift[startLine]
+ oldSCount = state.sCount[startLine]
+
+ # - example list
+ # ^ listIndent position will be here
+ # ^ blkIndent position will be here
+ #
+ oldListIndent = state.listIndent
+ state.listIndent = state.blkIndent
+ state.blkIndent = indent
+
+ state.tight = True
+ state.tShift[startLine] = contentStart - state.bMarks[startLine]
+ state.sCount[startLine] = offset
+
+ if contentStart >= maximum and state.isEmpty(startLine + 1):
+ # workaround for this case
+ # (list item is empty, list terminates before "foo"):
+ # ~~~~~~~~
+ # -
+ #
+ # foo
+ # ~~~~~~~~
+ state.line = min(state.line + 2, endLine)
+ else:
+ # NOTE in list.js this was:
+ # state.md.block.tokenize(state, startLine, endLine, True)
+ # but tokeniz does not take the final parameter
+ state.md.block.tokenize(state, startLine, endLine)
+
+ # If any of list item is tight, mark list as tight
+ if (not state.tight) or prevEmptyEnd:
+ tight = False
+
+ # Item become loose if finish with empty line,
+ # but we should filter last element, because it means list finish
+ prevEmptyEnd = (state.line - startLine) > 1 and state.isEmpty(state.line - 1)
+
+ state.blkIndent = state.listIndent
+ state.listIndent = oldListIndent
+ state.tShift[startLine] = oldTShift
+ state.sCount[startLine] = oldSCount
+ state.tight = oldTight
+
+ token = state.push("list_item_close", "li", -1)
+ token.markup = chr(markerCharCode)
+
+ nextLine = startLine = state.line
+ itemLines[1] = nextLine
+
+ if nextLine >= endLine:
+ break
+
+ contentStart = state.bMarks[startLine]
+
+ #
+ # Try to check if list is terminated or continued.
+ #
+ if state.sCount[nextLine] < state.blkIndent:
+ break
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ break
+
+ # fail if terminating block found
+ terminate = False
+ for terminatorRule in terminatorRules:
+ if terminatorRule(state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ break
+
+ # fail if list has another type
+ if isOrdered:
+ posAfterMarker = skipOrderedListMarker(state, nextLine)
+ if posAfterMarker < 0:
+ break
+ start = state.bMarks[nextLine] + state.tShift[nextLine]
+ else:
+ posAfterMarker = skipBulletListMarker(state, nextLine)
+ if posAfterMarker < 0:
+ break
+
+ if markerCharCode != state.srcCharCode[posAfterMarker - 1]:
+ break
+
+ # Finalize list
+ if isOrdered:
+ token = state.push("ordered_list_close", "ol", -1)
+ else:
+ token = state.push("bullet_list_close", "ul", -1)
+
+ token.markup = chr(markerCharCode)
+
+ listLines[1] = nextLine
+ state.line = nextLine
+
+ state.parentType = oldParentType
+
+ # mark paragraphs tight if needed
+ if tight:
+ markTightParagraphs(state, listTokIdx)
+
+ return True
diff --git a/markdown_it/rules_block/paragraph.py b/markdown_it/rules_block/paragraph.py
new file mode 100644
index 0000000..4fee83e
--- /dev/null
+++ b/markdown_it/rules_block/paragraph.py
@@ -0,0 +1,67 @@
+"""Paragraph."""
+import logging
+
+from ..ruler import Ruler
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def paragraph(state: StateBlock, startLine: int, endLine: int, silent: bool = False):
+
+ LOGGER.debug(
+ "entering paragraph: %s, %s, %s, %s", state, startLine, endLine, silent
+ )
+
+ nextLine = startLine + 1
+ ruler: Ruler = state.md.block.ruler
+ terminatorRules = ruler.getRules("paragraph")
+ endLine = state.lineMax
+
+ oldParentType = state.parentType
+ state.parentType = "paragraph"
+
+ # jump line-by-line until empty one or EOF
+ while nextLine < endLine:
+ if state.isEmpty(nextLine):
+ break
+ # this would be a code block normally, but after paragraph
+ # it's considered a lazy continuation regardless of what's there
+ if state.sCount[nextLine] - state.blkIndent > 3:
+ nextLine += 1
+ continue
+
+ # quirk for blockquotes, this line should already be checked by that rule
+ if state.sCount[nextLine] < 0:
+ nextLine += 1
+ continue
+
+ # Some tags can terminate paragraph without empty line.
+ terminate = False
+ for terminatorRule in terminatorRules:
+ if terminatorRule(state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ break
+
+ nextLine += 1
+
+ content = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
+
+ state.line = nextLine
+
+ token = state.push("paragraph_open", "p", 1)
+ token.map = [startLine, state.line]
+
+ token = state.push("inline", "", 0)
+ token.content = content
+ token.map = [startLine, state.line]
+ token.children = []
+
+ token = state.push("paragraph_close", "p", -1)
+
+ state.parentType = oldParentType
+
+ return True
diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py
new file mode 100644
index 0000000..35adde2
--- /dev/null
+++ b/markdown_it/rules_block/reference.py
@@ -0,0 +1,218 @@
+import logging
+
+from ..common.utils import charCodeAt, isSpace, normalizeReference
+from .state_block import StateBlock
+
+LOGGER = logging.getLogger(__name__)
+
+
+def reference(state: StateBlock, startLine, _endLine, silent):
+
+ LOGGER.debug(
+ "entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
+ )
+
+ lines = 0
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+ nextLine = startLine + 1
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+
+ if state.srcCharCode[pos] != 0x5B: # /* [ */
+ return False
+
+ # Simple check to quickly interrupt scan on [link](url) at the start of line.
+ # Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54
+ while pos < maximum:
+ # /* ] */ /* \ */ /* : */
+ if state.srcCharCode[pos] == 0x5D and state.srcCharCode[pos - 1] != 0x5C:
+ if pos + 1 == maximum:
+ return False
+ if state.srcCharCode[pos + 1] != 0x3A:
+ return False
+ break
+ pos += 1
+
+ endLine = state.lineMax
+
+ # jump line-by-line until empty one or EOF
+ terminatorRules = state.md.block.ruler.getRules("reference")
+
+ oldParentType = state.parentType
+ state.parentType = "reference"
+
+ while nextLine < endLine and not state.isEmpty(nextLine):
+ # this would be a code block normally, but after paragraph
+ # it's considered a lazy continuation regardless of what's there
+ if state.sCount[nextLine] - state.blkIndent > 3:
+ nextLine += 1
+ continue
+
+ # quirk for blockquotes, this line should already be checked by that rule
+ if state.sCount[nextLine] < 0:
+ nextLine += 1
+ continue
+
+ # Some tags can terminate paragraph without empty line.
+ terminate = False
+ for terminatorRule in terminatorRules:
+ if terminatorRule(state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ break
+
+ nextLine += 1
+
+ string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
+ maximum = len(string)
+
+ labelEnd = None
+ pos = 1
+ while pos < maximum:
+ ch = charCodeAt(string, pos)
+ if ch == 0x5B: # /* [ */
+ return False
+ elif ch == 0x5D: # /* ] */
+ labelEnd = pos
+ break
+ elif ch == 0x0A: # /* \n */
+ lines += 1
+ elif ch == 0x5C: # /* \ */
+ pos += 1
+ if pos < maximum and charCodeAt(string, pos) == 0x0A:
+ lines += 1
+ pos += 1
+
+ if (
+ labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A
+ ): # /* : */
+ return False
+
+ # [label]: destination 'title'
+ # ^^^ skip optional whitespace here
+ pos = labelEnd + 2
+ while pos < maximum:
+ ch = charCodeAt(string, pos)
+ if ch == 0x0A:
+ lines += 1
+ elif isSpace(ch):
+ pass
+ else:
+ break
+ pos += 1
+
+ # [label]: destination 'title'
+ # ^^^^^^^^^^^ parse this
+ res = state.md.helpers.parseLinkDestination(string, pos, maximum)
+ if not res.ok:
+ return False
+
+ href = state.md.normalizeLink(res.str)
+ if not state.md.validateLink(href):
+ return False
+
+ pos = res.pos
+ lines += res.lines
+
+ # save cursor state, we could require to rollback later
+ destEndPos = pos
+ destEndLineNo = lines
+
+ # [label]: destination 'title'
+ # ^^^ skipping those spaces
+ start = pos
+ while pos < maximum:
+ ch = charCodeAt(string, pos)
+ if ch == 0x0A:
+ lines += 1
+ elif isSpace(ch):
+ pass
+ else:
+ break
+ pos += 1
+
+ # [label]: destination 'title'
+ # ^^^^^^^ parse this
+ res = state.md.helpers.parseLinkTitle(string, pos, maximum)
+ if pos < maximum and start != pos and res.ok:
+ title = res.str
+ pos = res.pos
+ lines += res.lines
+ else:
+ title = ""
+ pos = destEndPos
+ lines = destEndLineNo
+
+ # skip trailing spaces until the rest of the line
+ while pos < maximum:
+ ch = charCodeAt(string, pos)
+ if not isSpace(ch):
+ break
+ pos += 1
+
+ if pos < maximum and charCodeAt(string, pos) != 0x0A:
+ if title:
+ # garbage at the end of the line after title,
+ # but it could still be a valid reference if we roll back
+ title = ""
+ pos = destEndPos
+ lines = destEndLineNo
+ while pos < maximum:
+ ch = charCodeAt(string, pos)
+ if not isSpace(ch):
+ break
+ pos += 1
+
+ if pos < maximum and charCodeAt(string, pos) != 0x0A:
+ # garbage at the end of the line
+ return False
+
+ label = normalizeReference(string[1:labelEnd])
+ if not label:
+ # CommonMark 0.20 disallows empty labels
+ return False
+
+ # Reference can not terminate anything. This check is for safety only.
+ if silent:
+ return True
+
+ if "references" not in state.env:
+ state.env["references"] = {}
+
+ state.line = startLine + lines + 1
+
+ # note, this is not part of markdown-it JS, but is useful for renderers
+ if state.md.options.get("inline_definitions", False):
+ token = state.push("definition", "", 0)
+ token.meta = {
+ "id": label,
+ "title": title,
+ "url": href,
+ "label": string[1:labelEnd],
+ }
+ token.map = [startLine, state.line]
+
+ if label not in state.env["references"]:
+ state.env["references"][label] = {
+ "title": title,
+ "href": href,
+ "map": [startLine, state.line],
+ }
+ else:
+ state.env.setdefault("duplicate_refs", []).append(
+ {
+ "title": title,
+ "href": href,
+ "label": label,
+ "map": [startLine, state.line],
+ }
+ )
+
+ state.parentType = oldParentType
+
+ return True
diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py
new file mode 100644
index 0000000..42b8fce
--- /dev/null
+++ b/markdown_it/rules_block/state_block.py
@@ -0,0 +1,230 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from ..common.utils import isSpace
+from ..ruler import StateBase
+from ..token import Token
+
+if TYPE_CHECKING:
+ from markdown_it.main import MarkdownIt
+
+
+class StateBlock(StateBase):
+ def __init__(
+ self,
+ src: str,
+ md: MarkdownIt,
+ env,
+ tokens: list[Token],
+ srcCharCode: tuple[int, ...] | None = None,
+ ):
+
+ if srcCharCode is not None:
+ self._src = src
+ self.srcCharCode = srcCharCode
+ else:
+ self.src = src
+
+ # link to parser instance
+ self.md = md
+
+ self.env = env
+
+ #
+ # Internal state variables
+ #
+
+ self.tokens = tokens
+
+ self.bMarks = [] # line begin offsets for fast jumps
+ self.eMarks = [] # line end offsets for fast jumps
+ # offsets of the first non-space characters (tabs not expanded)
+ self.tShift = []
+ self.sCount = [] # indents for each line (tabs expanded)
+
+ # An amount of virtual spaces (tabs expanded) between beginning
+ # of each line (bMarks) and real beginning of that line.
+ #
+ # It exists only as a hack because blockquotes override bMarks
+ # losing information in the process.
+ #
+ # It's used only when expanding tabs, you can think about it as
+ # an initial tab length, e.g. bsCount=21 applied to string `\t123`
+ # means first tab should be expanded to 4-21%4 === 3 spaces.
+ #
+ self.bsCount = []
+
+ # block parser variables
+ self.blkIndent = 0 # required block content indent (for example, if we are
+ # inside a list, it would be positioned after list marker)
+ self.line = 0 # line index in src
+ self.lineMax = 0 # lines count
+ self.tight = False # loose/tight mode for lists
+ self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
+ self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
+
+ # can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
+ # used in lists to determine if they interrupt a paragraph
+ self.parentType = "root"
+
+ self.level = 0
+
+ # renderer
+ self.result = ""
+
+ # Create caches
+ # Generate markers.
+ indent_found = False
+
+ start = pos = indent = offset = 0
+ length = len(self.src)
+
+ for pos, character in enumerate(self.srcCharCode):
+ if not indent_found:
+ if isSpace(character):
+ indent += 1
+
+ if character == 0x09:
+ offset += 4 - offset % 4
+ else:
+ offset += 1
+ continue
+ else:
+ indent_found = True
+
+ if character == 0x0A or pos == length - 1:
+ if character != 0x0A:
+ pos += 1
+ self.bMarks.append(start)
+ self.eMarks.append(pos)
+ self.tShift.append(indent)
+ self.sCount.append(offset)
+ self.bsCount.append(0)
+
+ indent_found = False
+ indent = 0
+ offset = 0
+ start = pos + 1
+
+ # Push fake entry to simplify cache bounds checks
+ self.bMarks.append(length)
+ self.eMarks.append(length)
+ self.tShift.append(0)
+ self.sCount.append(0)
+ self.bsCount.append(0)
+
+ self.lineMax = len(self.bMarks) - 1 # don't count last fake line
+
+ def __repr__(self):
+ return (
+ f"{self.__class__.__name__}"
+ f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
+ )
+
+ def push(self, ttype: str, tag: str, nesting: int) -> Token:
+ """Push new token to "stream"."""
+ token = Token(ttype, tag, nesting)
+ token.block = True
+ if nesting < 0:
+ self.level -= 1 # closing tag
+ token.level = self.level
+ if nesting > 0:
+ self.level += 1 # opening tag
+ self.tokens.append(token)
+ return token
+
+ def isEmpty(self, line: int) -> bool:
+ """."""
+ return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
+
+ def skipEmptyLines(self, from_pos: int) -> int:
+ """."""
+ while from_pos < self.lineMax:
+ try:
+ if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
+ from_pos
+ ]:
+ break
+ except IndexError:
+ pass
+ from_pos += 1
+ return from_pos
+
+ def skipSpaces(self, pos: int) -> int:
+ """Skip spaces from given position."""
+ while pos < len(self.src):
+ if not isSpace(self.srcCharCode[pos]):
+ break
+ pos += 1
+ return pos
+
+ def skipSpacesBack(self, pos: int, minimum: int) -> int:
+ """Skip spaces from given position in reverse."""
+ if pos <= minimum:
+ return pos
+ while pos > minimum:
+ pos -= 1
+ if not isSpace(self.srcCharCode[pos]):
+ return pos + 1
+ return pos
+
+ def skipChars(self, pos: int, code: int) -> int:
+ """Skip char codes from given position."""
+ while pos < len(self.src):
+ if self.srcCharCode[pos] != code:
+ break
+ pos += 1
+ return pos
+
+ def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
+ """Skip char codes reverse from given position - 1."""
+ if pos <= minimum:
+ return pos
+ while pos > minimum:
+ pos -= 1
+ if code != self.srcCharCode[pos]:
+ return pos + 1
+ return pos
+
+ def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
+ """Cut lines range from source."""
+ line = begin
+ if begin >= end:
+ return ""
+
+ queue = [""] * (end - begin)
+
+ i = 1
+ while line < end:
+ lineIndent = 0
+ lineStart = first = self.bMarks[line]
+ if line + 1 < end or keepLastLF:
+ last = self.eMarks[line] + 1
+ else:
+ last = self.eMarks[line]
+
+ while (first < last) and (lineIndent < indent):
+ ch = self.srcCharCode[first]
+ if isSpace(ch):
+ if ch == 0x09:
+ lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
+ else:
+ lineIndent += 1
+ elif first - lineStart < self.tShift[line]:
+ lineIndent += 1
+ else:
+ break
+ first += 1
+
+ if lineIndent > indent:
+ # partially expanding tabs in code blocks, e.g '\t\tfoobar'
+ # with indent=2 becomes ' \tfoobar'
+ queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
+ else:
+ queue[i - 1] = self.src[first:last]
+
+ line += 1
+ i += 1
+
+ return "".join(queue)
diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py
new file mode 100644
index 0000000..e3db858
--- /dev/null
+++ b/markdown_it/rules_block/table.py
@@ -0,0 +1,238 @@
+# GFM table, https://github.github.com/gfm/#tables-extension-
+import re
+
+from ..common.utils import charCodeAt, isSpace
+from .state_block import StateBlock
+
+headerLineRe = re.compile(r"^:?-+:?$")
+enclosingPipesRe = re.compile(r"^\||\|$")
+
+
+def getLine(state: StateBlock, line: int):
+ pos = state.bMarks[line] + state.tShift[line]
+ maximum = state.eMarks[line]
+
+ # return state.src.substr(pos, max - pos)
+ return state.src[pos:maximum]
+
+
+def escapedSplit(string):
+ result = []
+ pos = 0
+ max = len(string)
+ isEscaped = False
+ lastPos = 0
+ current = ""
+ ch = charCodeAt(string, pos)
+
+ while pos < max:
+ if ch == 0x7C: # /* | */
+ if not isEscaped:
+ # pipe separating cells, '|'
+ result.append(current + string[lastPos:pos])
+ current = ""
+ lastPos = pos + 1
+ else:
+ # escaped pipe, '\|'
+ current += string[lastPos : pos - 1]
+ lastPos = pos
+
+ isEscaped = ch == 0x5C # /* \ */
+ pos += 1
+
+ ch = charCodeAt(string, pos)
+
+ result.append(current + string[lastPos:])
+
+ return result
+
+
+def table(state: StateBlock, startLine: int, endLine: int, silent: bool):
+ tbodyLines = None
+
+ # should have at least two lines
+ if startLine + 2 > endLine:
+ return False
+
+ nextLine = startLine + 1
+
+ if state.sCount[nextLine] < state.blkIndent:
+ return False
+
+ # if it's indented more than 3 spaces, it should be a code block
+ if state.sCount[nextLine] - state.blkIndent >= 4:
+ return False
+
+ # first character of the second line should be '|', '-', ':',
+ # and no other characters are allowed but spaces;
+ # basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
+
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ if pos >= state.eMarks[nextLine]:
+ return False
+ first_ch = state.srcCharCode[pos]
+ pos += 1
+ if first_ch not in {0x7C, 0x2D, 0x3A}: # not in {"|", "-", ":"}
+ return False
+
+ if pos >= state.eMarks[nextLine]:
+ return False
+ second_ch = state.srcCharCode[pos]
+ pos += 1
+ # not in {"|", "-", ":"} and not space
+ if second_ch not in {0x7C, 0x2D, 0x3A} and not isSpace(second_ch):
+ return False
+
+ # if first character is '-', then second character must not be a space
+ # (due to parsing ambiguity with list)
+ if first_ch == 0x2D and isSpace(second_ch):
+ return False
+
+ while pos < state.eMarks[nextLine]:
+ ch = state.srcCharCode[pos]
+
+ # /* | */ /* - */ /* : */
+ if ch not in {0x7C, 0x2D, 0x3A} and not isSpace(ch):
+ return False
+
+ pos += 1
+
+ lineText = getLine(state, startLine + 1)
+
+ columns = lineText.split("|")
+ aligns = []
+ for i in range(len(columns)):
+ t = columns[i].strip()
+ if not t:
+ # allow empty columns before and after table, but not in between columns;
+ # e.g. allow ` |---| `, disallow ` ---||--- `
+ if i == 0 or i == len(columns) - 1:
+ continue
+ else:
+ return False
+
+ if not headerLineRe.search(t):
+ return False
+ if charCodeAt(t, len(t) - 1) == 0x3A: # /* : */
+ # /* : */
+ aligns.append("center" if charCodeAt(t, 0) == 0x3A else "right")
+ elif charCodeAt(t, 0) == 0x3A: # /* : */
+ aligns.append("left")
+ else:
+ aligns.append("")
+
+ lineText = getLine(state, startLine).strip()
+ if "|" not in lineText:
+ return False
+ if state.sCount[startLine] - state.blkIndent >= 4:
+ return False
+ columns = escapedSplit(lineText)
+ if columns and columns[0] == "":
+ columns.pop(0)
+ if columns and columns[-1] == "":
+ columns.pop()
+
+ # header row will define an amount of columns in the entire table,
+ # and align row should be exactly the same (the rest of the rows can differ)
+ columnCount = len(columns)
+ if columnCount == 0 or columnCount != len(aligns):
+ return False
+
+ if silent:
+ return True
+
+ oldParentType = state.parentType
+ state.parentType = "table"
+
+ # use 'blockquote' lists for termination because it's
+ # the most similar to tables
+ terminatorRules = state.md.block.ruler.getRules("blockquote")
+
+ token = state.push("table_open", "table", 1)
+ token.map = tableLines = [startLine, 0]
+
+ token = state.push("thead_open", "thead", 1)
+ token.map = [startLine, startLine + 1]
+
+ token = state.push("tr_open", "tr", 1)
+ token.map = [startLine, startLine + 1]
+
+ for i in range(len(columns)):
+ token = state.push("th_open", "th", 1)
+ if aligns[i]:
+ token.attrs = {"style": "text-align:" + aligns[i]}
+
+ token = state.push("inline", "", 0)
+ # note in markdown-it this map was removed in v12.0.0 however, we keep it,
+ # since it is helpful to propagate to children tokens
+ token.map = [startLine, startLine + 1]
+ token.content = columns[i].strip()
+ token.children = []
+
+ token = state.push("th_close", "th", -1)
+
+ token = state.push("tr_close", "tr", -1)
+ token = state.push("thead_close", "thead", -1)
+
+ nextLine = startLine + 2
+ while nextLine < endLine:
+ if state.sCount[nextLine] < state.blkIndent:
+ break
+
+ terminate = False
+ for i in range(len(terminatorRules)):
+ if terminatorRules[i](state, nextLine, endLine, True):
+ terminate = True
+ break
+
+ if terminate:
+ break
+ lineText = getLine(state, nextLine).strip()
+ if not lineText:
+ break
+ if state.sCount[nextLine] - state.blkIndent >= 4:
+ break
+ columns = escapedSplit(lineText)
+ if columns and columns[0] == "":
+ columns.pop(0)
+ if columns and columns[-1] == "":
+ columns.pop()
+
+ if nextLine == startLine + 2:
+ token = state.push("tbody_open", "tbody", 1)
+ token.map = tbodyLines = [startLine + 2, 0]
+
+ token = state.push("tr_open", "tr", 1)
+ token.map = [nextLine, nextLine + 1]
+
+ for i in range(columnCount):
+ token = state.push("td_open", "td", 1)
+ if aligns[i]:
+ token.attrs = {"style": "text-align:" + aligns[i]}
+
+ token = state.push("inline", "", 0)
+ # note in markdown-it this map was removed in v12.0.0 however, we keep it,
+ # since it is helpful to propagate to children tokens
+ token.map = [nextLine, nextLine + 1]
+ try:
+ token.content = columns[i].strip() if columns[i] else ""
+ except IndexError:
+ token.content = ""
+ token.children = []
+
+ token = state.push("td_close", "td", -1)
+
+ token = state.push("tr_close", "tr", -1)
+
+ nextLine += 1
+
+ if tbodyLines:
+ token = state.push("tbody_close", "tbody", -1)
+ tbodyLines[1] = nextLine
+
+ token = state.push("table_close", "table", -1)
+
+ tableLines[1] = nextLine
+ state.parentType = oldParentType
+ state.line = nextLine
+ return True
diff --git a/markdown_it/rules_core/__init__.py b/markdown_it/rules_core/__init__.py
new file mode 100644
index 0000000..f80034c
--- /dev/null
+++ b/markdown_it/rules_core/__init__.py
@@ -0,0 +1,17 @@
+__all__ = (
+ "StateCore",
+ "normalize",
+ "block",
+ "inline",
+ "replace",
+ "smartquotes",
+ "linkify",
+)
+
+from .block import block
+from .inline import inline
+from .linkify import linkify
+from .normalize import normalize
+from .replacements import replace
+from .smartquotes import smartquotes
+from .state_core import StateCore
diff --git a/markdown_it/rules_core/block.py b/markdown_it/rules_core/block.py
new file mode 100644
index 0000000..fa1c52c
--- /dev/null
+++ b/markdown_it/rules_core/block.py
@@ -0,0 +1,16 @@
+from ..token import Token
+from .state_core import StateCore
+
+
+def block(state: StateCore) -> None:
+
+ if state.inlineMode:
+ token = Token("inline", "", 0)
+ token.content = state.src
+ token.map = [0, 1]
+ token.children = []
+ state.tokens.append(token)
+ else:
+ state.md.block.parse(
+ state.src, state.md, state.env, state.tokens, state.srcCharCode
+ )
diff --git a/markdown_it/rules_core/inline.py b/markdown_it/rules_core/inline.py
new file mode 100644
index 0000000..c3fd0b5
--- /dev/null
+++ b/markdown_it/rules_core/inline.py
@@ -0,0 +1,10 @@
+from .state_core import StateCore
+
+
+def inline(state: StateCore) -> None:
+ """Parse inlines"""
+ for token in state.tokens:
+ if token.type == "inline":
+ if token.children is None:
+ token.children = []
+ state.md.inline.parse(token.content, state.md, state.env, token.children)
diff --git a/markdown_it/rules_core/linkify.py b/markdown_it/rules_core/linkify.py
new file mode 100644
index 0000000..49bb4ef
--- /dev/null
+++ b/markdown_it/rules_core/linkify.py
@@ -0,0 +1,141 @@
+import re
+
+from ..common.utils import arrayReplaceAt
+from ..token import Token
+from .state_core import StateCore
+
+LINK_OPEN_RE = re.compile(r"^<a[>\s]", flags=re.IGNORECASE)
+LINK_CLOSE_RE = re.compile(r"^</a\s*>", flags=re.IGNORECASE)
+
+HTTP_RE = re.compile(r"^http://")
+MAILTO_RE = re.compile(r"^mailto:")
+TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE)
+
+
+def isLinkOpen(string: str) -> bool:
+ return bool(LINK_OPEN_RE.search(string))
+
+
+def isLinkClose(string: str) -> bool:
+ return bool(LINK_CLOSE_RE.search(string))
+
+
+def linkify(state: StateCore) -> None:
+ blockTokens = state.tokens
+
+ if not state.md.options.linkify:
+ return
+
+ if not state.md.linkify:
+ raise ModuleNotFoundError("Linkify enabled but not installed.")
+
+ for j in range(len(blockTokens)):
+ if blockTokens[j].type != "inline" or not state.md.linkify.pretest(
+ blockTokens[j].content
+ ):
+ continue
+
+ tokens = blockTokens[j].children
+
+ htmlLinkLevel = 0
+
+ # We scan from the end, to keep position when new tags added.
+ # Use reversed logic in links start/end match
+ assert tokens is not None
+ i = len(tokens)
+ while i >= 1:
+ i -= 1
+ assert isinstance(tokens, list)
+ currentToken = tokens[i]
+
+ # Skip content of markdown links
+ if currentToken.type == "link_close":
+ i -= 1
+ while (
+ tokens[i].level != currentToken.level
+ and tokens[i].type != "link_open"
+ ):
+ i -= 1
+ continue
+
+ # Skip content of html tag links
+ if currentToken.type == "html_inline":
+ if isLinkOpen(currentToken.content) and htmlLinkLevel > 0:
+ htmlLinkLevel -= 1
+ if isLinkClose(currentToken.content):
+ htmlLinkLevel += 1
+ if htmlLinkLevel > 0:
+ continue
+
+ if currentToken.type == "text" and state.md.linkify.test(
+ currentToken.content
+ ):
+ text = currentToken.content
+ links = state.md.linkify.match(text)
+
+ # Now split string to nodes
+ nodes = []
+ level = currentToken.level
+ lastPos = 0
+
+ for ln in range(len(links)):
+ url = links[ln].url
+ fullUrl = state.md.normalizeLink(url)
+ if not state.md.validateLink(fullUrl):
+ continue
+
+ urlText = links[ln].text
+
+ # Linkifier might send raw hostnames like "example.com", where url
+ # starts with domain name. So we prepend http:// in those cases,
+ # and remove it afterwards.
+ if not links[ln].schema:
+ urlText = HTTP_RE.sub(
+ "", state.md.normalizeLinkText("http://" + urlText)
+ )
+ elif links[ln].schema == "mailto:" and TEST_MAILTO_RE.search(
+ urlText
+ ):
+ urlText = MAILTO_RE.sub(
+ "", state.md.normalizeLinkText("mailto:" + urlText)
+ )
+ else:
+ urlText = state.md.normalizeLinkText(urlText)
+
+ pos = links[ln].index
+
+ if pos > lastPos:
+ token = Token("text", "", 0)
+ token.content = text[lastPos:pos]
+ token.level = level
+ nodes.append(token)
+
+ token = Token("link_open", "a", 1)
+ token.attrs = {"href": fullUrl}
+ token.level = level
+ level += 1
+ token.markup = "linkify"
+ token.info = "auto"
+ nodes.append(token)
+
+ token = Token("text", "", 0)
+ token.content = urlText
+ token.level = level
+ nodes.append(token)
+
+ token = Token("link_close", "a", -1)
+ level -= 1
+ token.level = level
+ token.markup = "linkify"
+ token.info = "auto"
+ nodes.append(token)
+
+ lastPos = links[ln].last_index
+
+ if lastPos < len(text):
+ token = Token("text", "", 0)
+ token.content = text[lastPos:]
+ token.level = level
+ nodes.append(token)
+
+ blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes)
diff --git a/markdown_it/rules_core/normalize.py b/markdown_it/rules_core/normalize.py
new file mode 100644
index 0000000..bf16fd7
--- /dev/null
+++ b/markdown_it/rules_core/normalize.py
@@ -0,0 +1,19 @@
+"""Normalize input string."""
+import re
+
+from .state_core import StateCore
+
+# https://spec.commonmark.org/0.29/#line-ending
+NEWLINES_RE = re.compile(r"\r\n?|\n")
+NULL_RE = re.compile(r"\0")
+
+
+def normalize(state: StateCore) -> None:
+
+ # Normalize newlines
+ string = NEWLINES_RE.sub("\n", state.src)
+
+ # Replace NULL characters
+ string = NULL_RE.sub("\uFFFD", string)
+
+ state.src = string
diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py
new file mode 100644
index 0000000..45377d3
--- /dev/null
+++ b/markdown_it/rules_core/replacements.py
@@ -0,0 +1,125 @@
+"""Simple typographic replacements
+
+* ``(c)``, ``(C)`` → ©
+* ``(tm)``, ``(TM)`` → ™
+* ``(r)``, ``(R)`` → ®
+* ``(p)``, ``(P)`` → §
+* ``+-`` → ±
+* ``...`` → …
+* ``?....`` → ?..
+* ``!....`` → !..
+* ``????????`` → ???
+* ``!!!!!`` → !!!
+* ``,,,`` → ,
+* ``--`` → &ndash
+* ``---`` → &mdash
+"""
+from __future__ import annotations
+
+import logging
+import re
+
+from ..token import Token
+from .state_core import StateCore
+
+LOGGER = logging.getLogger(__name__)
+
+# TODO:
+# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
+# - miltiplication 2 x 4 -> 2 × 4
+
+RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
+
+# Workaround for phantomjs - need regex without /g flag,
+# or root check will fail every second time
+# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)"
+
+SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE)
+
+PLUS_MINUS_RE = re.compile(r"\+-")
+
+ELLIPSIS_RE = re.compile(r"\.{2,}")
+
+ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")
+
+QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")
+
+COMMA_RE = re.compile(r",{2,}")
+
+EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)
+
+EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)
+
+EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)
+
+
+SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"}
+
+
+def replaceFn(match: re.Match[str]):
+ return SCOPED_ABBR[match.group(1).lower()]
+
+
+def replace_scoped(inlineTokens: list[Token]) -> None:
+ inside_autolink = 0
+
+ for token in inlineTokens:
+ if token.type == "text" and not inside_autolink:
+ token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)
+
+ if token.type == "link_open" and token.info == "auto":
+ inside_autolink -= 1
+
+ if token.type == "link_close" and token.info == "auto":
+ inside_autolink += 1
+
+
+def replace_rare(inlineTokens: list[Token]) -> None:
+ inside_autolink = 0
+
+ for token in inlineTokens:
+ if token.type == "text" and not inside_autolink:
+ if RARE_RE.search(token.content):
+ # +- -> ±
+ token.content = PLUS_MINUS_RE.sub("±", token.content)
+
+ # .., ..., ....... -> …
+ token.content = ELLIPSIS_RE.sub("…", token.content)
+
+ # but ?..... & !..... -> ?.. & !..
+ token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub(
+ "\\1..", token.content
+ )
+ token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)
+
+ # ,, ,,, ,,,, -> ,
+ token.content = COMMA_RE.sub(",", token.content)
+
+ # em-dash
+ token.content = EM_DASH_RE.sub("\\1\u2014", token.content)
+
+ # en-dash
+ token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
+ token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)
+
+ if token.type == "link_open" and token.info == "auto":
+ inside_autolink -= 1
+
+ if token.type == "link_close" and token.info == "auto":
+ inside_autolink += 1
+
+
+def replace(state: StateCore) -> None:
+ if not state.md.options.typographer:
+ return
+
+ for token in state.tokens:
+ if token.type != "inline":
+ continue
+ assert token.children is not None
+
+ if SCOPED_ABBR_RE.search(token.content):
+ replace_scoped(token.children)
+
+ if RARE_RE.search(token.content):
+ replace_rare(token.children)
diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py
new file mode 100644
index 0000000..93f8be2
--- /dev/null
+++ b/markdown_it/rules_core/smartquotes.py
@@ -0,0 +1,202 @@
+"""Convert straight quotation marks to typographic ones
+"""
+from __future__ import annotations
+
+import re
+from typing import Any
+
+from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace
+from ..token import Token
+from .state_core import StateCore
+
+QUOTE_TEST_RE = re.compile(r"['\"]")
+QUOTE_RE = re.compile(r"['\"]")
+APOSTROPHE = "\u2019" # ’
+
+
+def replaceAt(string: str, index: int, ch: str) -> str:
+ # When the index is negative, the behavior is different from the js version.
+ # But basically, the index will not be negative.
+ assert index >= 0
+ return string[:index] + ch + string[index + 1 :]
+
+
+def process_inlines(tokens: list[Token], state: StateCore) -> None:
+ stack: list[dict[str, Any]] = []
+
+ for i in range(len(tokens)):
+ token = tokens[i]
+
+ thisLevel = token.level
+
+ j = 0
+ for j in range(len(stack))[::-1]:
+ if stack[j]["level"] <= thisLevel:
+ break
+ else:
+ # When the loop is terminated without a "break".
+ # Subtract 1 to get the same index as the js version.
+ j -= 1
+
+ stack = stack[: j + 1]
+
+ if token.type != "text":
+ continue
+
+ text = token.content
+ pos = 0
+ maximum = len(text)
+
+ while pos < maximum:
+ goto_outer = False
+ lastIndex = pos
+ t = QUOTE_RE.search(text[lastIndex:])
+ if not t:
+ break
+
+ canOpen = canClose = True
+ pos = t.start(0) + lastIndex + 1
+ isSingle = t.group(0) == "'"
+
+ # Find previous character,
+ # default to space if it's the beginning of the line
+ lastChar = 0x20
+
+ if t.start(0) + lastIndex - 1 >= 0:
+ lastChar = charCodeAt(text, t.start(0) + lastIndex - 1)
+ else:
+ for j in range(i)[::-1]:
+ # lastChar defaults to 0x20
+ if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
+ break
+ # should skip all tokens except 'text', 'html_inline' or 'code_inline'
+ if not tokens[j].content:
+ continue
+
+ lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1)
+ break
+
+ # Find next character,
+ # default to space if it's the end of the line
+ nextChar = 0x20
+
+ if pos < maximum:
+ nextChar = charCodeAt(text, pos)
+ else:
+ for j in range(i + 1, len(tokens)):
+ # nextChar defaults to 0x20
+ if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
+ break
+ # should skip all tokens except 'text', 'html_inline' or 'code_inline'
+ if not tokens[j].content:
+ continue
+
+ nextChar = charCodeAt(tokens[j].content, 0)
+ break
+
+ isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
+ isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
+
+ isLastWhiteSpace = isWhiteSpace(lastChar)
+ isNextWhiteSpace = isWhiteSpace(nextChar)
+
+ if isNextWhiteSpace:
+ canOpen = False
+ elif isNextPunctChar:
+ if not (isLastWhiteSpace or isLastPunctChar):
+ canOpen = False
+
+ if isLastWhiteSpace:
+ canClose = False
+ elif isLastPunctChar:
+ if not (isNextWhiteSpace or isNextPunctChar):
+ canClose = False
+
+ if nextChar == 0x22 and t.group(0) == '"': # 0x22: "
+ if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9
+ # special case: 1"" - count first quote as an inch
+ canClose = canOpen = False
+
+ if canOpen and canClose:
+ # Replace quotes in the middle of punctuation sequence, but not
+ # in the middle of the words, i.e.:
+ #
+ # 1. foo " bar " baz - not replaced
+ # 2. foo-"-bar-"-baz - replaced
+ # 3. foo"bar"baz - not replaced
+ canOpen = isLastPunctChar
+ canClose = isNextPunctChar
+
+ if not canOpen and not canClose:
+ # middle of word
+ if isSingle:
+ token.content = replaceAt(
+ token.content, t.start(0) + lastIndex, APOSTROPHE
+ )
+ continue
+
+ if canClose:
+ # this could be a closing quote, rewind the stack to get a match
+ for j in range(len(stack))[::-1]:
+ item = stack[j]
+ if stack[j]["level"] < thisLevel:
+ break
+ if item["single"] == isSingle and stack[j]["level"] == thisLevel:
+ item = stack[j]
+
+ if isSingle:
+ openQuote = state.md.options.quotes[2]
+ closeQuote = state.md.options.quotes[3]
+ else:
+ openQuote = state.md.options.quotes[0]
+ closeQuote = state.md.options.quotes[1]
+
+ # replace token.content *before* tokens[item.token].content,
+ # because, if they are pointing at the same token, replaceAt
+ # could mess up indices when quote length != 1
+ token.content = replaceAt(
+ token.content, t.start(0) + lastIndex, closeQuote
+ )
+ tokens[item["token"]].content = replaceAt(
+ tokens[item["token"]].content, item["pos"], openQuote
+ )
+
+ pos += len(closeQuote) - 1
+ if item["token"] == i:
+ pos += len(openQuote) - 1
+
+ text = token.content
+ maximum = len(text)
+
+ stack = stack[:j]
+ goto_outer = True
+ break
+ if goto_outer:
+ goto_outer = False
+ continue
+
+ if canOpen:
+ stack.append(
+ {
+ "token": i,
+ "pos": t.start(0) + lastIndex,
+ "single": isSingle,
+ "level": thisLevel,
+ }
+ )
+ elif canClose and isSingle:
+ token.content = replaceAt(
+ token.content, t.start(0) + lastIndex, APOSTROPHE
+ )
+
+
+def smartquotes(state: StateCore) -> None:
+ if not state.md.options.typographer:
+ return
+
+ for token in state.tokens:
+
+ if token.type != "inline" or not QUOTE_RE.search(token.content):
+ continue
+ assert token.children is not None
+ process_inlines(token.children, state)
diff --git a/markdown_it/rules_core/state_core.py b/markdown_it/rules_core/state_core.py
new file mode 100644
index 0000000..15b7c60
--- /dev/null
+++ b/markdown_it/rules_core/state_core.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from collections.abc import MutableMapping
+from typing import TYPE_CHECKING
+
+from ..ruler import StateBase
+from ..token import Token
+
+if TYPE_CHECKING:
+ from markdown_it import MarkdownIt
+
+
+class StateCore(StateBase):
+ def __init__(
+ self,
+ src: str,
+ md: MarkdownIt,
+ env: MutableMapping,
+ tokens: list[Token] | None = None,
+ ):
+ self.src = src
+ self.md = md # link to parser instance
+ self.env = env
+ self.tokens: list[Token] = tokens or []
+ self.inlineMode = False
diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py
new file mode 100644
index 0000000..f27907c
--- /dev/null
+++ b/markdown_it/rules_inline/__init__.py
@@ -0,0 +1,29 @@
+__all__ = (
+ "StateInline",
+ "text",
+ "text_collapse",
+ "link_pairs",
+ "escape",
+ "newline",
+ "backtick",
+ "emphasis",
+ "image",
+ "link",
+ "autolink",
+ "entity",
+ "html_inline",
+ "strikethrough",
+)
+from . import emphasis, strikethrough
+from .autolink import autolink
+from .backticks import backtick
+from .balance_pairs import link_pairs
+from .entity import entity
+from .escape import escape
+from .html_inline import html_inline
+from .image import image
+from .link import link
+from .newline import newline
+from .state_inline import StateInline
+from .text import text
+from .text_collapse import text_collapse
diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py
new file mode 100644
index 0000000..a4ee61c
--- /dev/null
+++ b/markdown_it/rules_inline/autolink.py
@@ -0,0 +1,78 @@
+# Process autolinks '<protocol:...>'
+import re
+
+from .state_inline import StateInline
+
+EMAIL_RE = re.compile(
+ r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$" # noqa: E501
+)
+AUTOLINK_RE = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)$")
+
+
+def autolink(state: StateInline, silent: bool) -> bool:
+
+ pos = state.pos
+
+ if state.srcCharCode[pos] != 0x3C: # /* < */
+ return False
+
+ start = state.pos
+ maximum = state.posMax
+
+ while True:
+ pos += 1
+ if pos >= maximum:
+ return False
+
+ ch = state.srcCharCode[pos]
+
+ if ch == 0x3C: # /* < */
+ return False
+ if ch == 0x3E: # /* > */
+ break
+
+ url = state.src[start + 1 : pos]
+
+ if AUTOLINK_RE.search(url) is not None:
+ fullUrl = state.md.normalizeLink(url)
+ if not state.md.validateLink(fullUrl):
+ return False
+
+ if not silent:
+ token = state.push("link_open", "a", 1)
+ token.attrs = {"href": fullUrl}
+ token.markup = "autolink"
+ token.info = "auto"
+
+ token = state.push("text", "", 0)
+ token.content = state.md.normalizeLinkText(url)
+
+ token = state.push("link_close", "a", -1)
+ token.markup = "autolink"
+ token.info = "auto"
+
+ state.pos += len(url) + 2
+ return True
+
+ if EMAIL_RE.search(url) is not None:
+ fullUrl = state.md.normalizeLink("mailto:" + url)
+ if not state.md.validateLink(fullUrl):
+ return False
+
+ if not silent:
+ token = state.push("link_open", "a", 1)
+ token.attrs = {"href": fullUrl}
+ token.markup = "autolink"
+ token.info = "auto"
+
+ token = state.push("text", "", 0)
+ token.content = state.md.normalizeLinkText(url)
+
+ token = state.push("link_close", "a", -1)
+ token.markup = "autolink"
+ token.info = "auto"
+
+ state.pos += len(url) + 2
+ return True
+
+ return False
diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py
new file mode 100644
index 0000000..7bff12f
--- /dev/null
+++ b/markdown_it/rules_inline/backticks.py
@@ -0,0 +1,75 @@
+# Parse backticks
+import re
+
+from .state_inline import StateInline
+
+regex = re.compile("^ (.+) $")
+
+
+def backtick(state: StateInline, silent: bool) -> bool:
+
+ pos = state.pos
+ ch = state.srcCharCode[pos]
+
+ # /* ` */
+ if ch != 0x60:
+ return False
+
+ start = pos
+ pos += 1
+ maximum = state.posMax
+
+ # scan marker length
+ while pos < maximum and (state.srcCharCode[pos] == 0x60): # /* ` */
+ pos += 1
+
+ marker = state.src[start:pos]
+ openerLength = len(marker)
+
+ if state.backticksScanned and state.backticks.get(openerLength, 0) <= start:
+ if not silent:
+ state.pending += marker
+ state.pos += openerLength
+ return True
+
+ matchStart = matchEnd = pos
+
+ # Nothing found in the cache, scan until the end of the line (or until marker is found)
+ while True:
+ try:
+ matchStart = state.src.index("`", matchEnd)
+ except ValueError:
+ break
+ matchEnd = matchStart + 1
+
+ # scan marker length
+ while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60): # /* ` */
+ matchEnd += 1
+
+ closerLength = matchEnd - matchStart
+
+ if closerLength == openerLength:
+ # Found matching closer length.
+ if not silent:
+ token = state.push("code_inline", "code", 0)
+ token.markup = marker
+ token.content = state.src[pos:matchStart].replace("\n", " ")
+ if (
+ token.content.startswith(" ")
+ and token.content.endswith(" ")
+ and len(token.content.strip()) > 0
+ ):
+ token.content = token.content[1:-1]
+ state.pos = matchEnd
+ return True
+
+ # Some different length found, put it in cache as upper limit of where closer can be found
+ state.backticks[closerLength] = matchStart
+
+ # Scanned through the end, didn't find anything
+ state.backticksScanned = True
+
+ if not silent:
+ state.pending += marker
+ state.pos += openerLength
+ return True
diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py
new file mode 100644
index 0000000..db622f0
--- /dev/null
+++ b/markdown_it/rules_inline/balance_pairs.py
@@ -0,0 +1,114 @@
+# For each opening emphasis-like marker find a matching closing one
+#
+from .state_inline import StateInline
+
+
+def processDelimiters(state: StateInline, delimiters, *args):
+
+ openersBottom = {}
+ maximum = len(delimiters)
+
+ closerIdx = 0
+ while closerIdx < maximum:
+ closer = delimiters[closerIdx]
+
+ # Length is only used for emphasis-specific "rule of 3",
+ # if it's not defined (in strikethrough or 3rd party plugins),
+ # we can default it to 0 to disable those checks.
+ #
+ closer.length = closer.length or 0
+
+ if not closer.close:
+ closerIdx += 1
+ continue
+
+ # Previously calculated lower bounds (previous fails)
+ # for each marker, each delimiter length modulo 3,
+ # and for whether this closer can be an opener;
+ # https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460
+ if closer.marker not in openersBottom:
+ openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1]
+
+ minOpenerIdx = openersBottom[closer.marker][
+ (3 if closer.open else 0) + (closer.length % 3)
+ ]
+
+ openerIdx = closerIdx - closer.jump - 1
+
+ # avoid crash if `closer.jump` is pointing outside of the array,
+ # e.g. for strikethrough
+ if openerIdx < -1:
+ openerIdx = -1
+
+ newMinOpenerIdx = openerIdx
+
+ while openerIdx > minOpenerIdx:
+ opener = delimiters[openerIdx]
+
+ if opener.marker != closer.marker:
+ openerIdx -= opener.jump + 1
+ continue
+
+ if opener.open and opener.end < 0:
+
+ isOddMatch = False
+
+ # from spec:
+ #
+ # If one of the delimiters can both open and close emphasis, then the
+ # sum of the lengths of the delimiter runs containing the opening and
+ # closing delimiters must not be a multiple of 3 unless both lengths
+ # are multiples of 3.
+ #
+ if opener.close or closer.open:
+ if (opener.length + closer.length) % 3 == 0:
+ if opener.length % 3 != 0 or closer.length % 3 != 0:
+ isOddMatch = True
+
+ if not isOddMatch:
+ # If previous delimiter cannot be an opener, we can safely skip
+ # the entire sequence in future checks. This is required to make
+ # sure algorithm has linear complexity (see *_*_*_*_*_... case).
+ #
+ if openerIdx > 0 and not delimiters[openerIdx - 1].open:
+ lastJump = delimiters[openerIdx - 1].jump + 1
+ else:
+ lastJump = 0
+
+ closer.jump = closerIdx - openerIdx + lastJump
+ closer.open = False
+ opener.end = closerIdx
+ opener.jump = lastJump
+ opener.close = False
+ newMinOpenerIdx = -1
+ break
+
+ openerIdx -= opener.jump + 1
+
+ if newMinOpenerIdx != -1:
+ # If match for this delimiter run failed, we want to set lower bound for
+ # future lookups. This is required to make sure algorithm has linear
+ # complexity.
+ #
+ # See details here:
+ # https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442
+ #
+ openersBottom[closer.marker][
+ (3 if closer.open else 0) + ((closer.length or 0) % 3)
+ ] = newMinOpenerIdx
+
+ closerIdx += 1
+
+
+def link_pairs(state: StateInline) -> None:
+ tokens_meta = state.tokens_meta
+ maximum = len(state.tokens_meta)
+
+ processDelimiters(state, state.delimiters)
+
+ curr = 0
+ while curr < maximum:
+ curr_meta = tokens_meta[curr]
+ if curr_meta and "delimiters" in curr_meta:
+ processDelimiters(state, curr_meta["delimiters"])
+ curr += 1
diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py
new file mode 100644
index 0000000..9001b09
--- /dev/null
+++ b/markdown_it/rules_inline/emphasis.py
@@ -0,0 +1,102 @@
+# Process *this* and _that_
+#
+
+from .state_inline import Delimiter, StateInline
+
+
+def tokenize(state: StateInline, silent: bool):
+ """Insert each marker as a separate text token, and add it to delimiter list"""
+ start = state.pos
+ marker = state.srcCharCode[start]
+
+ if silent:
+ return False
+
+ # /* _ */ /* * */
+ if marker != 0x5F and marker != 0x2A:
+ return False
+
+ scanned = state.scanDelims(state.pos, marker == 0x2A)
+
+ for i in range(scanned.length):
+ token = state.push("text", "", 0)
+ token.content = chr(marker)
+ state.delimiters.append(
+ Delimiter(
+ marker=marker,
+ length=scanned.length,
+ jump=i,
+ token=len(state.tokens) - 1,
+ end=-1,
+ open=scanned.can_open,
+ close=scanned.can_close,
+ )
+ )
+
+ state.pos += scanned.length
+
+ return True
+
+
+def _postProcess(state, delimiters):
+
+ i = len(delimiters) - 1
+ while i >= 0:
+ startDelim = delimiters[i]
+
+ # /* _ */ /* * */
+ if startDelim.marker != 0x5F and startDelim.marker != 0x2A:
+ i -= 1
+ continue
+
+ # Process only opening markers
+ if startDelim.end == -1:
+ i -= 1
+ continue
+
+ endDelim = delimiters[startDelim.end]
+
+ # If the previous delimiter has the same marker and is adjacent to this one,
+ # merge those into one strong delimiter.
+ #
+ # `<em><em>whatever</em></em>` -> `<strong>whatever</strong>`
+ #
+ isStrong = (
+ i > 0
+ and delimiters[i - 1].end == startDelim.end + 1
+ and delimiters[i - 1].token == startDelim.token - 1
+ and delimiters[startDelim.end + 1].token == endDelim.token + 1
+ and delimiters[i - 1].marker == startDelim.marker
+ )
+
+ ch = chr(startDelim.marker)
+
+ token = state.tokens[startDelim.token]
+ token.type = "strong_open" if isStrong else "em_open"
+ token.tag = "strong" if isStrong else "em"
+ token.nesting = 1
+ token.markup = ch + ch if isStrong else ch
+ token.content = ""
+
+ token = state.tokens[endDelim.token]
+ token.type = "strong_close" if isStrong else "em_close"
+ token.tag = "strong" if isStrong else "em"
+ token.nesting = -1
+ token.markup = ch + ch if isStrong else ch
+ token.content = ""
+
+ if isStrong:
+ state.tokens[delimiters[i - 1].token].content = ""
+ state.tokens[delimiters[startDelim.end + 1].token].content = ""
+ i -= 1
+
+ i -= 1
+
+
+def postProcess(state: StateInline):
+ """Walk through delimiter list and replace text tokens with tags."""
+ _postProcess(state, state.delimiters)
+
+ for token in state.tokens_meta:
+ if token and "delimiters" in token:
+ _postProcess(state, token["delimiters"])
diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py
new file mode 100644
index 0000000..883a966
--- /dev/null
+++ b/markdown_it/rules_inline/entity.py
@@ -0,0 +1,54 @@
+# Process html entity - &#123;, &#xAF;, &quot;, ...
+import re
+
+from ..common.entities import entities
+from ..common.utils import fromCodePoint, isValidEntityCode
+from .state_inline import StateInline
+
+DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE)
+NAMED_RE = re.compile(r"^&([a-z][a-z0-9]{1,31});", re.IGNORECASE)
+
+
+def entity(state: StateInline, silent: bool):
+
+ pos = state.pos
+ maximum = state.posMax
+
+ if state.srcCharCode[pos] != 0x26: # /* & */
+ return False
+
+ if (pos + 1) < maximum:
+ ch = state.srcCharCode[pos + 1]
+
+ if ch == 0x23: # /* # */
+ match = DIGITAL_RE.search(state.src[pos:])
+ if match:
+ if not silent:
+ match1 = match.group(1)
+ code = (
+ int(match1[1:], 16)
+ if match1[0].lower() == "x"
+ else int(match1, 10)
+ )
+ state.pending += (
+ fromCodePoint(code)
+ if isValidEntityCode(code)
+ else fromCodePoint(0xFFFD)
+ )
+
+ state.pos += len(match.group(0))
+ return True
+
+ else:
+ match = NAMED_RE.search(state.src[pos:])
+ if match:
+ if match.group(1) in entities:
+ if not silent:
+ state.pending += entities[match.group(1)]
+ state.pos += len(match.group(0))
+ return True
+
+ if not silent:
+ state.pending += "&"
+ state.pos += 1
+ return True
diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py
new file mode 100644
index 0000000..36bd040
--- /dev/null
+++ b/markdown_it/rules_inline/escape.py
@@ -0,0 +1,49 @@
+"""
+Process escaped chars and hardbreaks
+"""
+from ..common.utils import isSpace
+from .state_inline import StateInline
+
+ESCAPED = [0 for _ in range(256)]
+for ch in "\\!\"#$%&'()*+,./:;<=>?@[]^_`{|}~-":
+ ESCAPED[ord(ch)] = 1
+
+
+def escape(state: StateInline, silent: bool):
+ pos = state.pos
+ maximum = state.posMax
+
+ # /* \ */
+ if state.srcCharCode[pos] != 0x5C:
+ return False
+
+ pos += 1
+
+ if pos < maximum:
+ ch = state.srcCharCode[pos]
+
+ if ch < 256 and ESCAPED[ch] != 0:
+ if not silent:
+ state.pending += state.src[pos]
+ state.pos += 2
+ return True
+
+ if ch == 0x0A:
+ if not silent:
+ state.push("hardbreak", "br", 0)
+
+ pos += 1
+ # skip leading whitespaces from next line
+ while pos < maximum:
+ ch = state.srcCharCode[pos]
+ if not isSpace(ch):
+ break
+ pos += 1
+
+ state.pos = pos
+ return True
+
+ if not silent:
+ state.pending += "\\"
+ state.pos += 1
+ return True
diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py
new file mode 100644
index 0000000..295cc5c
--- /dev/null
+++ b/markdown_it/rules_inline/html_inline.py
@@ -0,0 +1,43 @@
+# Process html tags
+from ..common.html_re import HTML_TAG_RE
+from .state_inline import StateInline
+
+
+def isLetter(ch: int):
+ lc = ch | 0x20 # to lower case
+ # /* a */ and /* z */
+ return (lc >= 0x61) and (lc <= 0x7A)
+
+
+def html_inline(state: StateInline, silent: bool):
+
+ pos = state.pos
+
+ if not state.md.options.get("html", None):
+ return False
+
+ # Check start
+ maximum = state.posMax
+ if state.srcCharCode[pos] != 0x3C or pos + 2 >= maximum: # /* < */
+ return False
+
+ # Quick fail on second char
+ ch = state.srcCharCode[pos + 1]
+ if (
+ ch != 0x21
+ and ch != 0x3F # /* ! */
+ and ch != 0x2F # /* ? */
+ and not isLetter(ch) # /* / */
+ ):
+ return False
+
+ match = HTML_TAG_RE.search(state.src[pos:])
+ if not match:
+ return False
+
+ if not silent:
+ token = state.push("html_inline", "", 0)
+ token.content = state.src[pos : pos + len(match.group(0))]
+
+ state.pos += len(match.group(0))
+ return True
diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py
new file mode 100644
index 0000000..d2a08d4
--- /dev/null
+++ b/markdown_it/rules_inline/image.py
@@ -0,0 +1,151 @@
+# Process ![image](<src> "title")
+from __future__ import annotations
+
+from ..common.utils import isSpace, normalizeReference
+from ..token import Token
+from .state_inline import StateInline
+
+
+def image(state: StateInline, silent: bool):
+
+ label = None
+ href = ""
+ oldPos = state.pos
+ max = state.posMax
+
+ # /* ! */
+ if state.srcCharCode[state.pos] != 0x21:
+ return False
+ # /* [ */
+ if state.pos + 1 < state.posMax and state.srcCharCode[state.pos + 1] != 0x5B:
+ return False
+
+ labelStart = state.pos + 2
+ labelEnd = state.md.helpers.parseLinkLabel(state, state.pos + 1, False)
+
+ # parser failed to find ']', so it's not a valid link
+ if labelEnd < 0:
+ return False
+
+ pos = labelEnd + 1
+ # /* ( */
+ if pos < max and state.srcCharCode[pos] == 0x28:
+ #
+ # Inline link
+ #
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ pos += 1
+ while pos < max:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+
+ if pos >= max:
+ return False
+
+ # [link]( <href> "title" )
+ # ^^^^^^ parsing link destination
+ start = pos
+ res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
+ if res.ok:
+ href = state.md.normalizeLink(res.str)
+ if state.md.validateLink(href):
+ pos = res.pos
+ else:
+ href = ""
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ start = pos
+ while pos < max:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+
+ # [link]( <href> "title" )
+ # ^^^^^^^ parsing link title
+ res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
+ if pos < max and start != pos and res.ok:
+ title = res.str
+ pos = res.pos
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ while pos < max:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+ else:
+ title = ""
+
+ # /* ) */
+ if pos >= max or state.srcCharCode[pos] != 0x29:
+ state.pos = oldPos
+ return False
+
+ pos += 1
+
+ else:
+ #
+ # Link reference
+ #
+ if "references" not in state.env:
+ return False
+
+ # /* [ */
+ if pos < max and state.srcCharCode[pos] == 0x5B:
+ start = pos + 1
+ pos = state.md.helpers.parseLinkLabel(state, pos)
+ if pos >= 0:
+ label = state.src[start:pos]
+ pos += 1
+ else:
+ pos = labelEnd + 1
+ else:
+ pos = labelEnd + 1
+
+ # covers label == '' and label == undefined
+ # (collapsed reference link and shortcut reference link respectively)
+ if not label:
+ label = state.src[labelStart:labelEnd]
+
+ label = normalizeReference(label)
+
+ ref = state.env["references"].get(label, None)
+ if not ref:
+ state.pos = oldPos
+ return False
+
+ href = ref["href"]
+ title = ref["title"]
+
+ #
+ # We found the end of the link, and know for a fact it's a valid link
+ # so all that's left to do is to call tokenizer.
+ #
+ if not silent:
+ content = state.src[labelStart:labelEnd]
+
+ tokens: list[Token] = []
+ state.md.inline.parse(content, state.md, state.env, tokens)
+
+ token = state.push("image", "img", 0)
+ token.attrs = {"src": href, "alt": ""}
+ token.children = tokens or None
+ token.content = content
+
+ if title:
+ token.attrSet("title", title)
+
+ # note, this is not part of markdown-it JS, but is useful for renderers
+ if label and state.md.options.get("store_labels", False):
+ token.meta["label"] = label
+
+ state.pos = pos
+ state.posMax = max
+ return True
diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py
new file mode 100644
index 0000000..2394d6c
--- /dev/null
+++ b/markdown_it/rules_inline/link.py
@@ -0,0 +1,150 @@
+# Process [link](<to> "stuff")
+
+from ..common.utils import isSpace, normalizeReference
+from .state_inline import StateInline
+
+
+def link(state: StateInline, silent: bool):
+
+ href = ""
+ title = ""
+ label = None
+ oldPos = state.pos
+ maximum = state.posMax
+ start = state.pos
+ parseReference = True
+
+ if state.srcCharCode[state.pos] != 0x5B: # /* [ */
+ return False
+
+ labelStart = state.pos + 1
+ labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, True)
+
+ # parser failed to find ']', so it's not a valid link
+ if labelEnd < 0:
+ return False
+
+ pos = labelEnd + 1
+
+ if pos < maximum and state.srcCharCode[pos] == 0x28: # /* ( */
+ #
+ # Inline link
+ #
+
+ # might have found a valid shortcut link, disable reference parsing
+ parseReference = False
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ pos += 1
+ while pos < maximum:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+
+ if pos >= maximum:
+ return False
+
+ # [link]( <href> "title" )
+ # ^^^^^^ parsing link destination
+ start = pos
+ res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
+ if res.ok:
+ href = state.md.normalizeLink(res.str)
+ if state.md.validateLink(href):
+ pos = res.pos
+ else:
+ href = ""
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ start = pos
+ while pos < maximum:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+
+ # [link]( <href> "title" )
+ # ^^^^^^^ parsing link title
+ res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
+ if pos < maximum and start != pos and res.ok:
+ title = res.str
+ pos = res.pos
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ while pos < maximum:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+
+ if pos >= maximum or state.srcCharCode[pos] != 0x29: # /* ) */
+ # parsing a valid shortcut link failed, fallback to reference
+ parseReference = True
+
+ pos += 1
+
+ if parseReference:
+ #
+ # Link reference
+ #
+ if "references" not in state.env:
+ return False
+
+ if pos < maximum and state.srcCharCode[pos] == 0x5B: # /* [ */
+ start = pos + 1
+ pos = state.md.helpers.parseLinkLabel(state, pos)
+ if pos >= 0:
+ label = state.src[start:pos]
+ pos += 1
+ else:
+ pos = labelEnd + 1
+
+ else:
+ pos = labelEnd + 1
+
+ # covers label == '' and label == undefined
+ # (collapsed reference link and shortcut reference link respectively)
+ if not label:
+ label = state.src[labelStart:labelEnd]
+
+ label = normalizeReference(label)
+
+ ref = (
+ state.env["references"][label] if label in state.env["references"] else None
+ )
+ if not ref:
+ state.pos = oldPos
+ return False
+
+ href = ref["href"]
+ title = ref["title"]
+
+ #
+ # We found the end of the link, and know for a fact it's a valid link
+ # so all that's left to do is to call tokenizer.
+ #
+ if not silent:
+ state.pos = labelStart
+ state.posMax = labelEnd
+
+ token = state.push("link_open", "a", 1)
+ token.attrs = {"href": href}
+
+ if title:
+ token.attrSet("title", title)
+
+ # note, this is not part of markdown-it JS, but is useful for renderers
+ if label and state.md.options.get("store_labels", False):
+ token.meta["label"] = label
+
+ state.md.inline.tokenize(state)
+
+ token = state.push("link_close", "a", -1)
+
+ state.pos = pos
+ state.posMax = maximum
+ return True
diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py
new file mode 100644
index 0000000..3034e40
--- /dev/null
+++ b/markdown_it/rules_inline/newline.py
@@ -0,0 +1,43 @@
+# Proceess '\n'
+import re
+
+from ..common.utils import charCodeAt, isSpace
+from .state_inline import StateInline
+
+endSpace = re.compile(r" +$")
+
+
+def newline(state: StateInline, silent: bool):
+ pos = state.pos
+
+ # /* \n */
+ if state.srcCharCode[pos] != 0x0A:
+ return False
+
+ pmax = len(state.pending) - 1
+ maximum = state.posMax
+
+ # ' \n' -> hardbreak
+ # Lookup in pending chars is bad practice! Don't copy to other rules!
+ # Pending string is stored in concat mode, indexed lookups will cause
+ # conversion to flat mode.
+ if not silent:
+ if pmax >= 0 and charCodeAt(state.pending, pmax) == 0x20:
+ if pmax >= 1 and charCodeAt(state.pending, pmax - 1) == 0x20:
+ state.pending = endSpace.sub("", state.pending)
+ state.push("hardbreak", "br", 0)
+ else:
+ state.pending = state.pending[:-1]
+ state.push("softbreak", "br", 0)
+
+ else:
+ state.push("softbreak", "br", 0)
+
+ pos += 1
+
+ # skip heading spaces for next line
+ while pos < maximum and isSpace(state.srcCharCode[pos]):
+ pos += 1
+
+ state.pos = pos
+ return True
diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py
new file mode 100644
index 0000000..283532c
--- /dev/null
+++ b/markdown_it/rules_inline/state_inline.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+
+from collections import namedtuple
+from collections.abc import MutableMapping
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from .._compat import DATACLASS_KWARGS
+from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
+from ..ruler import StateBase
+from ..token import Token
+
+if TYPE_CHECKING:
+ from markdown_it import MarkdownIt
+
+
+@dataclass(**DATACLASS_KWARGS)
+class Delimiter:
+ # Char code of the starting marker (number).
+ marker: int
+
+ # Total length of these series of delimiters.
+ length: int
+
+ # An amount of characters before this one that's equivalent to
+ # current one. In plain English: if this delimiter does not open
+ # an emphasis, neither do previous `jump` characters.
+ #
+ # Used to skip sequences like "*****" in one step, for 1st asterisk
+ # value will be 0, for 2nd it's 1 and so on.
+ jump: int
+
+ # A position of the token this delimiter corresponds to.
+ token: int
+
+ # If this delimiter is matched as a valid opener, `end` will be
+ # equal to its position, otherwise it's `-1`.
+ end: int
+
+ # Boolean flags that determine if this delimiter could open or close
+ # an emphasis.
+ open: bool
+ close: bool
+
+ level: bool | None = None
+
+
+Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
+
+
+class StateInline(StateBase):
+ def __init__(
+ self, src: str, md: MarkdownIt, env: MutableMapping, outTokens: list[Token]
+ ):
+ self.src = src
+ self.env = env
+ self.md = md
+ self.tokens = outTokens
+ self.tokens_meta: list[dict | None] = [None] * len(outTokens)
+
+ self.pos = 0
+ self.posMax = len(self.src)
+ self.level = 0
+ self.pending = ""
+ self.pendingLevel = 0
+
+ # Stores { start: end } pairs. Useful for backtrack
+ # optimization of pairs parse (emphasis, strikes).
+ self.cache: dict[int, int] = {}
+
+ # List of emphasis-like delimiters for current tag
+ self.delimiters: list[Delimiter] = []
+
+ # Stack of delimiter lists for upper level tags
+ self._prev_delimiters: list[list[Delimiter]] = []
+
+ # backticklength => last seen position
+ self.backticks: dict[int, int] = {}
+ self.backticksScanned = False
+
+ def __repr__(self):
+ return (
+ f"{self.__class__.__name__}"
+ f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
+ )
+
+ def pushPending(self):
+ token = Token("text", "", 0)
+ token.content = self.pending
+ token.level = self.pendingLevel
+ self.tokens.append(token)
+ self.pending = ""
+ return token
+
+ def push(self, ttype, tag, nesting):
+ """Push new token to "stream".
+ If pending text exists - flush it as text token
+ """
+ if self.pending:
+ self.pushPending()
+
+ token = Token(ttype, tag, nesting)
+ token_meta = None
+
+ if nesting < 0:
+ # closing tag
+ self.level -= 1
+ self.delimiters = self._prev_delimiters.pop()
+
+ token.level = self.level
+
+ if nesting > 0:
+ # opening tag
+ self.level += 1
+ self._prev_delimiters.append(self.delimiters)
+ self.delimiters = []
+ token_meta = {"delimiters": self.delimiters}
+
+ self.pendingLevel = self.level
+ self.tokens.append(token)
+ self.tokens_meta.append(token_meta)
+ return token
+
+ def scanDelims(self, start, canSplitWord):
+ """
+ Scan a sequence of emphasis-like markers, and determine whether
+ it can start an emphasis sequence or end an emphasis sequence.
+
+ - start - position to scan from (it should point at a valid marker);
+ - canSplitWord - determine if these markers can be found inside a word
+
+ """
+ pos = start
+ left_flanking = True
+ right_flanking = True
+ maximum = self.posMax
+ marker = self.srcCharCode[start]
+
+ # treat beginning of the line as a whitespace
+ lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20
+
+ while pos < maximum and self.srcCharCode[pos] == marker:
+ pos += 1
+
+ count = pos - start
+
+ # treat end of the line as a whitespace
+ nextChar = self.srcCharCode[pos] if pos < maximum else 0x20
+
+ isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
+ isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
+
+ isLastWhiteSpace = isWhiteSpace(lastChar)
+ isNextWhiteSpace = isWhiteSpace(nextChar)
+
+ if isNextWhiteSpace:
+ left_flanking = False
+ elif isNextPunctChar:
+ if not (isLastWhiteSpace or isLastPunctChar):
+ left_flanking = False
+
+ if isLastWhiteSpace:
+ right_flanking = False
+ elif isLastPunctChar:
+ if not (isNextWhiteSpace or isNextPunctChar):
+ right_flanking = False
+
+ if not canSplitWord:
+ can_open = left_flanking and ((not right_flanking) or isLastPunctChar)
+ can_close = right_flanking and ((not left_flanking) or isNextPunctChar)
+ else:
+ can_open = left_flanking
+ can_close = right_flanking
+
+ return Scanned(can_open, can_close, count)
diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py
new file mode 100644
index 0000000..107ea26
--- /dev/null
+++ b/markdown_it/rules_inline/strikethrough.py
@@ -0,0 +1,133 @@
+# ~~strike through~~
+from __future__ import annotations
+
+from .state_inline import Delimiter, StateInline
+
+
+def tokenize(state: StateInline, silent: bool):
+ """Insert each marker as a separate text token, and add it to delimiter list"""
+ start = state.pos
+ marker = state.srcCharCode[start]
+
+ if silent:
+ return False
+
+ if marker != 0x7E: # /* ~ */
+ return False
+
+ scanned = state.scanDelims(state.pos, True)
+ length = scanned.length
+ ch = chr(marker)
+
+ if length < 2:
+ return False
+
+ if length % 2:
+ token = state.push("text", "", 0)
+ token.content = ch
+ length -= 1
+
+ i = 0
+ while i < length:
+ token = state.push("text", "", 0)
+ token.content = ch + ch
+ state.delimiters.append(
+ Delimiter(
+ **{
+ "marker": marker,
+ "length": 0, # disable "rule of 3" length checks meant for emphasis
+ "jump": i // 2, # for `~~` 1 marker = 2 characters
+ "token": len(state.tokens) - 1,
+ "end": -1,
+ "open": scanned.can_open,
+ "close": scanned.can_close,
+ }
+ )
+ )
+
+ i += 2
+
+ state.pos += scanned.length
+
+ return True
+
+
+def _postProcess(state: StateInline, delimiters: list[Delimiter]):
+
+ loneMarkers = []
+ maximum = len(delimiters)
+
+ i = 0
+ while i < maximum:
+ startDelim = delimiters[i]
+
+ if startDelim.marker != 0x7E: # /* ~ */
+ i += 1
+ continue
+
+ if startDelim.end == -1:
+ i += 1
+ continue
+
+ endDelim = delimiters[startDelim.end]
+
+ token = state.tokens[startDelim.token]
+ token.type = "s_open"
+ token.tag = "s"
+ token.nesting = 1
+ token.markup = "~~"
+ token.content = ""
+
+ token = state.tokens[endDelim.token]
+ token.type = "s_close"
+ token.tag = "s"
+ token.nesting = -1
+ token.markup = "~~"
+ token.content = ""
+
+ if (
+ state.tokens[endDelim.token - 1].type == "text"
+ and state.tokens[endDelim.token - 1].content == "~"
+ ):
+
+ loneMarkers.append(endDelim.token - 1)
+
+ i += 1
+
+ # If a marker sequence has an odd number of characters, it's split
+ # like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the
+ # start of the sequence.
+ #
+ # So, we have to move all those markers after subsequent s_close tags.
+ #
+ while loneMarkers:
+ i = loneMarkers.pop()
+ j = i + 1
+
+ while (j < len(state.tokens)) and (state.tokens[j].type == "s_close"):
+ j += 1
+
+ j -= 1
+
+ if i != j:
+ token = state.tokens[j]
+ state.tokens[j] = state.tokens[i]
+ state.tokens[i] = token
+
+
+def postProcess(state: StateInline):
+ """Walk through delimiter list and replace text tokens with tags."""
+ tokens_meta = state.tokens_meta
+ maximum = len(state.tokens_meta)
+ _postProcess(state, state.delimiters)
+
+ curr = 0
+ while curr < maximum:
+ try:
+ curr_meta = tokens_meta[curr]
+ except IndexError:
+ pass
+ else:
+ if curr_meta and "delimiters" in curr_meta:
+ _postProcess(state, curr_meta["delimiters"])
+ curr += 1
diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py
new file mode 100644
index 0000000..ec6ee0f
--- /dev/null
+++ b/markdown_it/rules_inline/text.py
@@ -0,0 +1,57 @@
+# Skip text characters for text token, place those to pending buffer
+# and increment current pos
+
+from .state_inline import StateInline
+
+# Rule to skip pure text
+# '{}$%@~+=:' reserved for extensions
+
+# !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
+
+# !!!! Don't confuse with "Markdown ASCII Punctuation" chars
+# http://spec.commonmark.org/0.15/#ascii-punctuation-character
+
+
+def isTerminatorChar(ch):
+ return ch in {
+ 0x0A, # /* \n */:
+ 0x21, # /* ! */:
+ 0x23, # /* # */:
+ 0x24, # /* $ */:
+ 0x25, # /* % */:
+ 0x26, # /* & */:
+ 0x2A, # /* * */:
+ 0x2B, # /* + */:
+ 0x2D, # /* - */:
+ 0x3A, # /* : */:
+ 0x3C, # /* < */:
+ 0x3D, # /* = */:
+ 0x3E, # /* > */:
+ 0x40, # /* @ */:
+ 0x5B, # /* [ */:
+ 0x5C, # /* \ */:
+ 0x5D, # /* ] */:
+ 0x5E, # /* ^ */:
+ 0x5F, # /* _ */:
+ 0x60, # /* ` */:
+ 0x7B, # /* { */:
+ 0x7D, # /* } */:
+ 0x7E, # /* ~ */:
+ }
+
+
+def text(state: StateInline, silent: bool, **args):
+ pos = state.pos
+ posMax = state.posMax
+ while (pos < posMax) and not isTerminatorChar(state.srcCharCode[pos]):
+ pos += 1
+
+ if pos == state.pos:
+ return False
+
+ if not silent:
+ state.pending += state.src[state.pos : pos]
+
+ state.pos = pos
+
+ return True
diff --git a/markdown_it/rules_inline/text_collapse.py b/markdown_it/rules_inline/text_collapse.py
new file mode 100644
index 0000000..6d0c0ab
--- /dev/null
+++ b/markdown_it/rules_inline/text_collapse.py
@@ -0,0 +1,43 @@
+from .state_inline import StateInline
+
+
+def text_collapse(state: StateInline, *args):
+ """
+ Clean up tokens after emphasis and strikethrough postprocessing:
+ merge adjacent text nodes into one and re-calculate all token levels
+
+ This is necessary because initially emphasis delimiter markers (``*, _, ~``)
+ are treated as their own separate text tokens. Then emphasis rule either
+ leaves them as text (needed to merge with adjacent text) or turns them
+ into opening/closing tags (which messes up levels inside).
+ """
+ level = 0
+ maximum = len(state.tokens)
+
+ curr = last = 0
+ while curr < maximum:
+ # re-calculate levels after emphasis/strikethrough turns some text nodes
+ # into opening/closing tags
+ if state.tokens[curr].nesting < 0:
+ level -= 1 # closing tag
+ state.tokens[curr].level = level
+ if state.tokens[curr].nesting > 0:
+ level += 1 # opening tag
+
+ if (
+ state.tokens[curr].type == "text"
+ and curr + 1 < maximum
+ and state.tokens[curr + 1].type == "text"
+ ):
+ # collapse two adjacent text nodes
+ state.tokens[curr + 1].content = (
+ state.tokens[curr].content + state.tokens[curr + 1].content
+ )
+ else:
+ if curr != last:
+ state.tokens[last] = state.tokens[curr]
+ last += 1
+ curr += 1
+
+ if curr != last:
+ del state.tokens[last:]
diff --git a/markdown_it/token.py b/markdown_it/token.py
new file mode 100644
index 0000000..b20875b
--- /dev/null
+++ b/markdown_it/token.py
@@ -0,0 +1,181 @@
+from __future__ import annotations
+
+from collections.abc import Callable, MutableMapping
+import dataclasses as dc
+from typing import Any
+import warnings
+
+from markdown_it._compat import DATACLASS_KWARGS
+
+
+def convert_attrs(value: Any) -> Any:
+ """Convert Token.attrs set as ``None`` or ``[[key, value], ...]`` to a dict.
+
+ This improves compatibility with upstream markdown-it.
+ """
+ if not value:
+ return {}
+ if isinstance(value, list):
+ return dict(value)
+ return value
+
+
+@dc.dataclass(**DATACLASS_KWARGS)
+class Token:
+
+ type: str
+ """Type of the token (string, e.g. "paragraph_open")"""
+
+ tag: str
+ """HTML tag name, e.g. 'p'"""
+
+ nesting: int
+ """Level change (number in {-1, 0, 1} set), where:
+ - `1` means the tag is opening
+ - `0` means the tag is self-closing
+ - `-1` means the tag is closing
+ """
+
+ attrs: dict[str, str | int | float] = dc.field(default_factory=dict)
+ """HTML attributes.
+ Note this differs from the upstream "list of lists" format,
+ although than an instance can still be initialised with this format.
+ """
+
+ map: list[int] | None = None
+ """Source map info. Format: `[ line_begin, line_end ]`"""
+
+ level: int = 0
+ """Nesting level, the same as `state.level`"""
+
+ children: list[Token] | None = None
+ """Array of child nodes (inline and img tokens)."""
+
+ content: str = ""
+ """Inner content, in the case of a self-closing tag (code, html, fence, etc.),"""
+
+ markup: str = ""
+ """'*' or '_' for emphasis, fence string for fence, etc."""
+
+ info: str = ""
+ """Additional information:
+ - Info string for "fence" tokens
+ - The value "auto" for autolink "link_open" and "link_close" tokens
+ - The string value of the item marker for ordered-list "list_item_open" tokens
+ """
+
+ meta: dict = dc.field(default_factory=dict)
+ """A place for plugins to store any arbitrary data"""
+
+ block: bool = False
+ """True for block-level tokens, false for inline tokens.
+ Used in renderer to calculate line breaks
+ """
+
+ hidden: bool = False
+ """If true, ignore this element when rendering.
+ Used for tight lists to hide paragraphs.
+ """
+
+ def __post_init__(self):
+ self.attrs = convert_attrs(self.attrs)
+
+ def attrIndex(self, name: str) -> int:
+ warnings.warn(
+ "Token.attrIndex should not be used, since Token.attrs is a dictionary",
+ UserWarning,
+ )
+ if name not in self.attrs:
+ return -1
+ return list(self.attrs.keys()).index(name)
+
+ def attrItems(self) -> list[tuple[str, str | int | float]]:
+ """Get (key, value) list of attrs."""
+ return list(self.attrs.items())
+
+ def attrPush(self, attrData: tuple[str, str | int | float]) -> None:
+ """Add `[ name, value ]` attribute to list. Init attrs if necessary."""
+ name, value = attrData
+ self.attrSet(name, value)
+
+ def attrSet(self, name: str, value: str | int | float) -> None:
+ """Set `name` attribute to `value`. Override old value if exists."""
+ self.attrs[name] = value
+
+ def attrGet(self, name: str) -> None | str | int | float:
+ """Get the value of attribute `name`, or null if it does not exist."""
+ return self.attrs.get(name, None)
+
+ def attrJoin(self, name: str, value: str) -> None:
+ """Join value to existing attribute via space.
+ Or create new attribute if not exists.
+ Useful to operate with token classes.
+ """
+ if name in self.attrs:
+ current = self.attrs[name]
+ if not isinstance(current, str):
+ raise TypeError(
+ f"existing attr 'name' is not a str: {self.attrs[name]}"
+ )
+ self.attrs[name] = f"{current} {value}"
+ else:
+ self.attrs[name] = value
+
+ def copy(self, **changes: Any) -> Token:
+ """Return a shallow copy of the instance."""
+ return dc.replace(self, **changes)
+
+ def as_dict(
+ self,
+ *,
+ children: bool = True,
+ as_upstream: bool = True,
+ meta_serializer: Callable[[dict], Any] | None = None,
+ filter: Callable[[str, Any], bool] | None = None,
+ dict_factory: Callable[..., MutableMapping[str, Any]] = dict,
+ ) -> MutableMapping[str, Any]:
+ """Return the token as a dictionary.
+
+ :param children: Also convert children to dicts
+ :param as_upstream: Ensure the output dictionary is equal to that created by markdown-it
+ For example, attrs are converted to null or lists
+ :param meta_serializer: hook for serializing ``Token.meta``
+ :param filter: A callable whose return code determines whether an
+ attribute or element is included (``True``) or dropped (``False``).
+ Is called with the (key, value) pair.
+ :param dict_factory: A callable to produce dictionaries from.
+ For example, to produce ordered dictionaries instead of normal Python
+ dictionaries, pass in ``collections.OrderedDict``.
+
+ """
+ mapping = dict_factory((f.name, getattr(self, f.name)) for f in dc.fields(self))
+ if filter:
+ mapping = dict_factory((k, v) for k, v in mapping.items() if filter(k, v))
+ if as_upstream and "attrs" in mapping:
+ mapping["attrs"] = (
+ None
+ if not mapping["attrs"]
+ else [[k, v] for k, v in mapping["attrs"].items()]
+ )
+ if meta_serializer and "meta" in mapping:
+ mapping["meta"] = meta_serializer(mapping["meta"])
+ if children and mapping.get("children", None):
+ mapping["children"] = [
+ child.as_dict(
+ children=children,
+ filter=filter,
+ dict_factory=dict_factory,
+ as_upstream=as_upstream,
+ meta_serializer=meta_serializer,
+ )
+ for child in mapping["children"]
+ ]
+ return mapping
+
+ @classmethod
+ def from_dict(cls, dct: MutableMapping[str, Any]) -> Token:
+ """Convert a dict to a Token."""
+ token = cls(**dct)
+ if token.children:
+ token.children = [cls.from_dict(c) for c in token.children] # type: ignore[arg-type]
+ return token
diff --git a/markdown_it/tree.py b/markdown_it/tree.py
new file mode 100644
index 0000000..09476b2
--- /dev/null
+++ b/markdown_it/tree.py
@@ -0,0 +1,330 @@
+"""A tree representation of a linear markdown-it token stream.
+
+This module is not part of upstream JavaScript markdown-it.
+"""
+from __future__ import annotations
+
+from collections.abc import Generator, Sequence
+import textwrap
+from typing import Any, NamedTuple, TypeVar, overload
+
+from .token import Token
+from .utils import _removesuffix
+
+
+class _NesterTokens(NamedTuple):
+ opening: Token
+ closing: Token
+
+
+_NodeType = TypeVar("_NodeType", bound="SyntaxTreeNode")
+
+
+class SyntaxTreeNode:
+ """A Markdown syntax tree node.
+
+ A class that can be used to construct a tree representation of a linear
+ `markdown-it-py` token stream.
+
+ Each node in the tree represents either:
+ - root of the Markdown document
+ - a single unnested `Token`
+ - a `Token` "_open" and "_close" token pair, and the tokens nested in
+ between
+ """
+
+ def __init__(
+ self, tokens: Sequence[Token] = (), *, create_root: bool = True
+ ) -> None:
+ """Initialize a `SyntaxTreeNode` from a token stream.
+
+ If `create_root` is True, create a root node for the document.
+ """
+ # Only nodes representing an unnested token have self.token
+ self.token: Token | None = None
+
+ # Only containers have nester tokens
+ self.nester_tokens: _NesterTokens | None = None
+
+ # Root node does not have self.parent
+ self._parent: Any = None
+
+ # Empty list unless a non-empty container, or unnested token that has
+ # children (i.e. inline or img)
+ self._children: list = []
+
+ if create_root:
+ self._set_children_from_tokens(tokens)
+ return
+
+ if not tokens:
+ raise ValueError(
+ "Can only create root from empty token sequence."
+ " Set `create_root=True`."
+ )
+ elif len(tokens) == 1:
+ inline_token = tokens[0]
+ if inline_token.nesting:
+ raise ValueError(
+ "Unequal nesting level at the start and end of token stream."
+ )
+ self.token = inline_token
+ if inline_token.children:
+ self._set_children_from_tokens(inline_token.children)
+ else:
+ self.nester_tokens = _NesterTokens(tokens[0], tokens[-1])
+ self._set_children_from_tokens(tokens[1:-1])
+
+ def __repr__(self) -> str:
+ return f"{type(self).__name__}({self.type})"
+
+ @overload
+ def __getitem__(self: _NodeType, item: int) -> _NodeType:
+ ...
+
+ @overload
+ def __getitem__(self: _NodeType, item: slice) -> list[_NodeType]:
+ ...
+
+ def __getitem__(self: _NodeType, item: int | slice) -> _NodeType | list[_NodeType]:
+ return self.children[item]
+
+ def to_tokens(self: _NodeType) -> list[Token]:
+ """Recover the linear token stream."""
+
+ def recursive_collect_tokens(node: _NodeType, token_list: list[Token]) -> None:
+ if node.type == "root":
+ for child in node.children:
+ recursive_collect_tokens(child, token_list)
+ elif node.token:
+ token_list.append(node.token)
+ else:
+ assert node.nester_tokens
+ token_list.append(node.nester_tokens.opening)
+ for child in node.children:
+ recursive_collect_tokens(child, token_list)
+ token_list.append(node.nester_tokens.closing)
+
+ tokens: list[Token] = []
+ recursive_collect_tokens(self, tokens)
+ return tokens
+
+ @property
+ def children(self: _NodeType) -> list[_NodeType]:
+ return self._children
+
+ @children.setter
+ def children(self: _NodeType, value: list[_NodeType]) -> None:
+ self._children = value
+
+ @property
+ def parent(self: _NodeType) -> _NodeType | None:
+ return self._parent
+
+ @parent.setter
+ def parent(self: _NodeType, value: _NodeType | None) -> None:
+ self._parent = value
+
+ @property
+ def is_root(self) -> bool:
+ """Is the node a special root node?"""
+ return not (self.token or self.nester_tokens)
+
+ @property
+ def is_nested(self) -> bool:
+ """Is this node nested?.
+
+ Returns `True` if the node represents a `Token` pair and tokens in the
+ sequence between them, where `Token.nesting` of the first `Token` in
+ the pair is 1 and nesting of the other `Token` is -1.
+ """
+ return bool(self.nester_tokens)
+
+ @property
+ def siblings(self: _NodeType) -> Sequence[_NodeType]:
+ """Get siblings of the node.
+
+ Gets the whole group of siblings, including self.
+ """
+ if not self.parent:
+ return [self]
+ return self.parent.children
+
+ @property
+ def type(self) -> str:
+ """Get a string type of the represented syntax.
+
+ - "root" for root nodes
+ - `Token.type` if the node represents an unnested token
+ - `Token.type` of the opening token, with "_open" suffix stripped, if
+ the node represents a nester token pair
+ """
+ if self.is_root:
+ return "root"
+ if self.token:
+ return self.token.type
+ assert self.nester_tokens
+ return _removesuffix(self.nester_tokens.opening.type, "_open")
+
+ @property
+ def next_sibling(self: _NodeType) -> _NodeType | None:
+ """Get the next node in the sequence of siblings.
+
+ Returns `None` if this is the last sibling.
+ """
+ self_index = self.siblings.index(self)
+ if self_index + 1 < len(self.siblings):
+ return self.siblings[self_index + 1]
+ return None
+
+ @property
+ def previous_sibling(self: _NodeType) -> _NodeType | None:
+ """Get the previous node in the sequence of siblings.
+
+ Returns `None` if this is the first sibling.
+ """
+ self_index = self.siblings.index(self)
+ if self_index - 1 >= 0:
+ return self.siblings[self_index - 1]
+ return None
+
+ def _add_child(
+ self,
+ tokens: Sequence[Token],
+ ) -> None:
+ """Make a child node for `self`."""
+ child = type(self)(tokens, create_root=False)
+ child.parent = self
+ self.children.append(child)
+
+ def _set_children_from_tokens(self, tokens: Sequence[Token]) -> None:
+ """Convert the token stream to a tree structure and set the resulting
+ nodes as children of `self`."""
+ reversed_tokens = list(reversed(tokens))
+ while reversed_tokens:
+ token = reversed_tokens.pop()
+
+ if not token.nesting:
+ self._add_child([token])
+ continue
+ if token.nesting != 1:
+ raise ValueError("Invalid token nesting")
+
+ nested_tokens = [token]
+ nesting = 1
+ while reversed_tokens and nesting:
+ token = reversed_tokens.pop()
+ nested_tokens.append(token)
+ nesting += token.nesting
+ if nesting:
+ raise ValueError(f"unclosed tokens starting {nested_tokens[0]}")
+
+ self._add_child(nested_tokens)
+
+ def pretty(
+ self, *, indent: int = 2, show_text: bool = False, _current: int = 0
+ ) -> str:
+ """Create an XML style string of the tree."""
+ prefix = " " * _current
+ text = prefix + f"<{self.type}"
+ if not self.is_root and self.attrs:
+ text += " " + " ".join(f"{k}={v!r}" for k, v in self.attrs.items())
+ text += ">"
+ if show_text and not self.is_root and self.type == "text" and self.content:
+ text += "\n" + textwrap.indent(self.content, prefix + " " * indent)
+ for child in self.children:
+ text += "\n" + child.pretty(
+ indent=indent, show_text=show_text, _current=_current + indent
+ )
+ return text
+
+ def walk(
+ self: _NodeType, *, include_self: bool = True
+ ) -> Generator[_NodeType, None, None]:
+ """Recursively yield all descendant nodes in the tree starting at self.
+
+ The order mimics the order of the underlying linear token
+ stream (i.e. depth first).
+ """
+ if include_self:
+ yield self
+ for child in self.children:
+ yield from child.walk(include_self=True)
+
+ # NOTE:
+ # The values of the properties defined below directly map to properties
+ # of the underlying `Token`s. A root node does not translate to a `Token`
+ # object, so calling these property getters on a root node will raise an
+ # `AttributeError`.
+ #
+ # There is no mapping for `Token.nesting` because the `is_nested` property
+ # provides that data, and can be called on any node type, including root.
+
+ def _attribute_token(self) -> Token:
+ """Return the `Token` that is used as the data source for the
+ properties defined below."""
+ if self.token:
+ return self.token
+ if self.nester_tokens:
+ return self.nester_tokens.opening
+ raise AttributeError("Root node does not have the accessed attribute")
+
+ @property
+ def tag(self) -> str:
+ """html tag name, e.g. \"p\" """
+ return self._attribute_token().tag
+
+ @property
+ def attrs(self) -> dict[str, str | int | float]:
+ """Html attributes."""
+ return self._attribute_token().attrs
+
+ def attrGet(self, name: str) -> None | str | int | float:
+ """Get the value of attribute `name`, or null if it does not exist."""
+ return self._attribute_token().attrGet(name)
+
+ @property
+ def map(self) -> tuple[int, int] | None:
+ """Source map info. Format: `tuple[ line_begin, line_end ]`"""
+ map_ = self._attribute_token().map
+ if map_:
+ # Type ignore because `Token`s attribute types are not perfect
+ return tuple(map_) # type: ignore
+ return None
+
+ @property
+ def level(self) -> int:
+ """nesting level, the same as `state.level`"""
+ return self._attribute_token().level
+
+ @property
+ def content(self) -> str:
+ """In a case of self-closing tag (code, html, fence, etc.), it
+ has contents of this tag."""
+ return self._attribute_token().content
+
+ @property
+ def markup(self) -> str:
+ """'*' or '_' for emphasis, fence string for fence, etc."""
+ return self._attribute_token().markup
+
+ @property
+ def info(self) -> str:
+ """fence infostring"""
+ return self._attribute_token().info
+
+ @property
+ def meta(self) -> dict:
+ """A place for plugins to store an arbitrary data."""
+ return self._attribute_token().meta
+
+ @property
+ def block(self) -> bool:
+ """True for block-level tokens, false for inline tokens."""
+ return self._attribute_token().block
+
+ @property
+ def hidden(self) -> bool:
+ """If it's true, ignore this element when rendering.
+ Used for tight lists to hide paragraphs."""
+ return self._attribute_token().hidden
diff --git a/markdown_it/utils.py b/markdown_it/utils.py
new file mode 100644
index 0000000..2ba2995
--- /dev/null
+++ b/markdown_it/utils.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from pathlib import Path
+
+
+class OptionsDict(dict):
+ """A dictionary, with attribute access to core markdownit configuration options."""
+
+ @property
+ def maxNesting(self) -> int:
+ """Internal protection, recursion limit."""
+ return self["maxNesting"]
+
+ @maxNesting.setter
+ def maxNesting(self, value: int):
+ self["maxNesting"] = value
+
+ @property
+ def html(self) -> bool:
+ """Enable HTML tags in source."""
+ return self["html"]
+
+ @html.setter
+ def html(self, value: bool):
+ self["html"] = value
+
+ @property
+ def linkify(self) -> bool:
+ """Enable autoconversion of URL-like texts to links."""
+ return self["linkify"]
+
+ @linkify.setter
+ def linkify(self, value: bool):
+ self["linkify"] = value
+
+ @property
+ def typographer(self) -> bool:
+ """Enable smartquotes and replacements."""
+ return self["typographer"]
+
+ @typographer.setter
+ def typographer(self, value: bool):
+ self["typographer"] = value
+
+ @property
+ def quotes(self) -> str:
+ """Quote characters."""
+ return self["quotes"]
+
+ @quotes.setter
+ def quotes(self, value: str):
+ self["quotes"] = value
+
+ @property
+ def xhtmlOut(self) -> bool:
+ """Use '/' to close single tags (<br />)."""
+ return self["xhtmlOut"]
+
+ @xhtmlOut.setter
+ def xhtmlOut(self, value: bool):
+ self["xhtmlOut"] = value
+
+ @property
+ def breaks(self) -> bool:
+ """Convert newlines in paragraphs into <br>."""
+ return self["breaks"]
+
+ @breaks.setter
+ def breaks(self, value: bool):
+ self["breaks"] = value
+
+ @property
+ def langPrefix(self) -> str:
+ """CSS language prefix for fenced blocks."""
+ return self["langPrefix"]
+
+ @langPrefix.setter
+ def langPrefix(self, value: str):
+ self["langPrefix"] = value
+
+ @property
+ def highlight(self) -> Callable[[str, str, str], str] | None:
+ """Highlighter function: (content, langName, langAttrs) -> escaped HTML."""
+ return self["highlight"]
+
+ @highlight.setter
+ def highlight(self, value: Callable[[str, str, str], str] | None):
+ self["highlight"] = value
+
+
+def read_fixture_file(path: str | Path) -> list[list]:
+ text = Path(path).read_text(encoding="utf-8")
+ tests = []
+ section = 0
+ last_pos = 0
+ lines = text.splitlines(keepends=True)
+ for i in range(len(lines)):
+ if lines[i].rstrip() == ".":
+ if section == 0:
+ tests.append([i, lines[i - 1].strip()])
+ section = 1
+ elif section == 1:
+ tests[-1].append("".join(lines[last_pos + 1 : i]))
+ section = 2
+ elif section == 2:
+ tests[-1].append("".join(lines[last_pos + 1 : i]))
+ section = 0
+
+ last_pos = i
+ return tests
+
+
+def _removesuffix(string: str, suffix: str) -> str:
+ """Remove a suffix from a string.
+
+ Replace this with str.removesuffix() from stdlib when minimum Python
+ version is 3.9.
+ """
+ if suffix and string.endswith(suffix):
+ return string[: -len(suffix)]
+ return string