summaryrefslogtreecommitdiffstats
path: root/markdown_it/rules_inline
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:24:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:24:24 +0000
commit12e8343068b906f8b2afddc5569968a8a91fa5b0 (patch)
tree75cc5e05a4392ea0292251898f992a15a16b172b /markdown_it/rules_inline
parentInitial commit. (diff)
downloadmarkdown-it-py-12e8343068b906f8b2afddc5569968a8a91fa5b0.tar.xz
markdown-it-py-12e8343068b906f8b2afddc5569968a8a91fa5b0.zip
Adding upstream version 2.1.0.upstream/2.1.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'markdown_it/rules_inline')
-rw-r--r--markdown_it/rules_inline/__init__.py29
-rw-r--r--markdown_it/rules_inline/autolink.py78
-rw-r--r--markdown_it/rules_inline/backticks.py75
-rw-r--r--markdown_it/rules_inline/balance_pairs.py114
-rw-r--r--markdown_it/rules_inline/emphasis.py102
-rw-r--r--markdown_it/rules_inline/entity.py54
-rw-r--r--markdown_it/rules_inline/escape.py49
-rw-r--r--markdown_it/rules_inline/html_inline.py43
-rw-r--r--markdown_it/rules_inline/image.py151
-rw-r--r--markdown_it/rules_inline/link.py150
-rw-r--r--markdown_it/rules_inline/newline.py43
-rw-r--r--markdown_it/rules_inline/state_inline.py175
-rw-r--r--markdown_it/rules_inline/strikethrough.py133
-rw-r--r--markdown_it/rules_inline/text.py57
-rw-r--r--markdown_it/rules_inline/text_collapse.py43
15 files changed, 1296 insertions, 0 deletions
diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py
new file mode 100644
index 0000000..f27907c
--- /dev/null
+++ b/markdown_it/rules_inline/__init__.py
@@ -0,0 +1,29 @@
+__all__ = (
+ "StateInline",
+ "text",
+ "text_collapse",
+ "link_pairs",
+ "escape",
+ "newline",
+ "backtick",
+ "emphasis",
+ "image",
+ "link",
+ "autolink",
+ "entity",
+ "html_inline",
+ "strikethrough",
+)
+from . import emphasis, strikethrough
+from .autolink import autolink
+from .backticks import backtick
+from .balance_pairs import link_pairs
+from .entity import entity
+from .escape import escape
+from .html_inline import html_inline
+from .image import image
+from .link import link
+from .newline import newline
+from .state_inline import StateInline
+from .text import text
+from .text_collapse import text_collapse
diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py
new file mode 100644
index 0000000..a4ee61c
--- /dev/null
+++ b/markdown_it/rules_inline/autolink.py
@@ -0,0 +1,78 @@
+# Process autolinks '<protocol:...>'
+import re
+
+from .state_inline import StateInline
+
+EMAIL_RE = re.compile(
+ r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$" # noqa: E501
+)
+AUTOLINK_RE = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)$")
+
+
+def autolink(state: StateInline, silent: bool) -> bool:
+
+ pos = state.pos
+
+ if state.srcCharCode[pos] != 0x3C: # /* < */
+ return False
+
+ start = state.pos
+ maximum = state.posMax
+
+ while True:
+ pos += 1
+ if pos >= maximum:
+ return False
+
+ ch = state.srcCharCode[pos]
+
+ if ch == 0x3C: # /* < */
+ return False
+ if ch == 0x3E: # /* > */
+ break
+
+ url = state.src[start + 1 : pos]
+
+ if AUTOLINK_RE.search(url) is not None:
+ fullUrl = state.md.normalizeLink(url)
+ if not state.md.validateLink(fullUrl):
+ return False
+
+ if not silent:
+ token = state.push("link_open", "a", 1)
+ token.attrs = {"href": fullUrl}
+ token.markup = "autolink"
+ token.info = "auto"
+
+ token = state.push("text", "", 0)
+ token.content = state.md.normalizeLinkText(url)
+
+ token = state.push("link_close", "a", -1)
+ token.markup = "autolink"
+ token.info = "auto"
+
+ state.pos += len(url) + 2
+ return True
+
+ if EMAIL_RE.search(url) is not None:
+ fullUrl = state.md.normalizeLink("mailto:" + url)
+ if not state.md.validateLink(fullUrl):
+ return False
+
+ if not silent:
+ token = state.push("link_open", "a", 1)
+ token.attrs = {"href": fullUrl}
+ token.markup = "autolink"
+ token.info = "auto"
+
+ token = state.push("text", "", 0)
+ token.content = state.md.normalizeLinkText(url)
+
+ token = state.push("link_close", "a", -1)
+ token.markup = "autolink"
+ token.info = "auto"
+
+ state.pos += len(url) + 2
+ return True
+
+ return False
diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py
new file mode 100644
index 0000000..7bff12f
--- /dev/null
+++ b/markdown_it/rules_inline/backticks.py
@@ -0,0 +1,75 @@
+# Parse backticks
+import re
+
+from .state_inline import StateInline
+
+regex = re.compile("^ (.+) $")
+
+
+def backtick(state: StateInline, silent: bool) -> bool:
+
+ pos = state.pos
+ ch = state.srcCharCode[pos]
+
+ # /* ` */
+ if ch != 0x60:
+ return False
+
+ start = pos
+ pos += 1
+ maximum = state.posMax
+
+ # scan marker length
+ while pos < maximum and (state.srcCharCode[pos] == 0x60): # /* ` */
+ pos += 1
+
+ marker = state.src[start:pos]
+ openerLength = len(marker)
+
+ if state.backticksScanned and state.backticks.get(openerLength, 0) <= start:
+ if not silent:
+ state.pending += marker
+ state.pos += openerLength
+ return True
+
+ matchStart = matchEnd = pos
+
+ # Nothing found in the cache, scan until the end of the line (or until marker is found)
+ while True:
+ try:
+ matchStart = state.src.index("`", matchEnd)
+ except ValueError:
+ break
+ matchEnd = matchStart + 1
+
+ # scan marker length
+ while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60): # /* ` */
+ matchEnd += 1
+
+ closerLength = matchEnd - matchStart
+
+ if closerLength == openerLength:
+ # Found matching closer length.
+ if not silent:
+ token = state.push("code_inline", "code", 0)
+ token.markup = marker
+ token.content = state.src[pos:matchStart].replace("\n", " ")
+ if (
+ token.content.startswith(" ")
+ and token.content.endswith(" ")
+ and len(token.content.strip()) > 0
+ ):
+ token.content = token.content[1:-1]
+ state.pos = matchEnd
+ return True
+
+ # Some different length found, put it in cache as upper limit of where closer can be found
+ state.backticks[closerLength] = matchStart
+
+ # Scanned through the end, didn't find anything
+ state.backticksScanned = True
+
+ if not silent:
+ state.pending += marker
+ state.pos += openerLength
+ return True
diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py
new file mode 100644
index 0000000..db622f0
--- /dev/null
+++ b/markdown_it/rules_inline/balance_pairs.py
@@ -0,0 +1,114 @@
+# For each opening emphasis-like marker find a matching closing one
+#
+from .state_inline import StateInline
+
+
+def processDelimiters(state: StateInline, delimiters, *args):
+
+ openersBottom = {}
+ maximum = len(delimiters)
+
+ closerIdx = 0
+ while closerIdx < maximum:
+ closer = delimiters[closerIdx]
+
+ # Length is only used for emphasis-specific "rule of 3",
+ # if it's not defined (in strikethrough or 3rd party plugins),
+ # we can default it to 0 to disable those checks.
+ #
+ closer.length = closer.length or 0
+
+ if not closer.close:
+ closerIdx += 1
+ continue
+
+ # Previously calculated lower bounds (previous fails)
+ # for each marker, each delimiter length modulo 3,
+ # and for whether this closer can be an opener;
+ # https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460
+ if closer.marker not in openersBottom:
+ openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1]
+
+ minOpenerIdx = openersBottom[closer.marker][
+ (3 if closer.open else 0) + (closer.length % 3)
+ ]
+
+ openerIdx = closerIdx - closer.jump - 1
+
+ # avoid crash if `closer.jump` is pointing outside of the array,
+ # e.g. for strikethrough
+ if openerIdx < -1:
+ openerIdx = -1
+
+ newMinOpenerIdx = openerIdx
+
+ while openerIdx > minOpenerIdx:
+ opener = delimiters[openerIdx]
+
+ if opener.marker != closer.marker:
+ openerIdx -= opener.jump + 1
+ continue
+
+ if opener.open and opener.end < 0:
+
+ isOddMatch = False
+
+ # from spec:
+ #
+ # If one of the delimiters can both open and close emphasis, then the
+ # sum of the lengths of the delimiter runs containing the opening and
+ # closing delimiters must not be a multiple of 3 unless both lengths
+ # are multiples of 3.
+ #
+ if opener.close or closer.open:
+ if (opener.length + closer.length) % 3 == 0:
+ if opener.length % 3 != 0 or closer.length % 3 != 0:
+ isOddMatch = True
+
+ if not isOddMatch:
+ # If previous delimiter cannot be an opener, we can safely skip
+ # the entire sequence in future checks. This is required to make
+ # sure algorithm has linear complexity (see *_*_*_*_*_... case).
+ #
+ if openerIdx > 0 and not delimiters[openerIdx - 1].open:
+ lastJump = delimiters[openerIdx - 1].jump + 1
+ else:
+ lastJump = 0
+
+ closer.jump = closerIdx - openerIdx + lastJump
+ closer.open = False
+ opener.end = closerIdx
+ opener.jump = lastJump
+ opener.close = False
+ newMinOpenerIdx = -1
+ break
+
+ openerIdx -= opener.jump + 1
+
+ if newMinOpenerIdx != -1:
+ # If match for this delimiter run failed, we want to set lower bound for
+ # future lookups. This is required to make sure algorithm has linear
+ # complexity.
+ #
+ # See details here:
+ # https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442
+ #
+ openersBottom[closer.marker][
+ (3 if closer.open else 0) + ((closer.length or 0) % 3)
+ ] = newMinOpenerIdx
+
+ closerIdx += 1
+
+
+def link_pairs(state: StateInline) -> None:
+ tokens_meta = state.tokens_meta
+ maximum = len(state.tokens_meta)
+
+ processDelimiters(state, state.delimiters)
+
+ curr = 0
+ while curr < maximum:
+ curr_meta = tokens_meta[curr]
+ if curr_meta and "delimiters" in curr_meta:
+ processDelimiters(state, curr_meta["delimiters"])
+ curr += 1
diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py
new file mode 100644
index 0000000..9001b09
--- /dev/null
+++ b/markdown_it/rules_inline/emphasis.py
@@ -0,0 +1,102 @@
+# Process *this* and _that_
+#
+
+from .state_inline import Delimiter, StateInline
+
+
+def tokenize(state: StateInline, silent: bool):
+ """Insert each marker as a separate text token, and add it to delimiter list"""
+ start = state.pos
+ marker = state.srcCharCode[start]
+
+ if silent:
+ return False
+
+ # /* _ */ /* * */
+ if marker != 0x5F and marker != 0x2A:
+ return False
+
+ scanned = state.scanDelims(state.pos, marker == 0x2A)
+
+ for i in range(scanned.length):
+ token = state.push("text", "", 0)
+ token.content = chr(marker)
+ state.delimiters.append(
+ Delimiter(
+ marker=marker,
+ length=scanned.length,
+ jump=i,
+ token=len(state.tokens) - 1,
+ end=-1,
+ open=scanned.can_open,
+ close=scanned.can_close,
+ )
+ )
+
+ state.pos += scanned.length
+
+ return True
+
+
+def _postProcess(state, delimiters):
+
+ i = len(delimiters) - 1
+ while i >= 0:
+ startDelim = delimiters[i]
+
+ # /* _ */ /* * */
+ if startDelim.marker != 0x5F and startDelim.marker != 0x2A:
+ i -= 1
+ continue
+
+ # Process only opening markers
+ if startDelim.end == -1:
+ i -= 1
+ continue
+
+ endDelim = delimiters[startDelim.end]
+
+ # If the previous delimiter has the same marker and is adjacent to this one,
+ # merge those into one strong delimiter.
+ #
+ # `<em><em>whatever</em></em>` -> `<strong>whatever</strong>`
+ #
+ isStrong = (
+ i > 0
+ and delimiters[i - 1].end == startDelim.end + 1
+ and delimiters[i - 1].token == startDelim.token - 1
+ and delimiters[startDelim.end + 1].token == endDelim.token + 1
+ and delimiters[i - 1].marker == startDelim.marker
+ )
+
+ ch = chr(startDelim.marker)
+
+ token = state.tokens[startDelim.token]
+ token.type = "strong_open" if isStrong else "em_open"
+ token.tag = "strong" if isStrong else "em"
+ token.nesting = 1
+ token.markup = ch + ch if isStrong else ch
+ token.content = ""
+
+ token = state.tokens[endDelim.token]
+ token.type = "strong_close" if isStrong else "em_close"
+ token.tag = "strong" if isStrong else "em"
+ token.nesting = -1
+ token.markup = ch + ch if isStrong else ch
+ token.content = ""
+
+ if isStrong:
+ state.tokens[delimiters[i - 1].token].content = ""
+ state.tokens[delimiters[startDelim.end + 1].token].content = ""
+ i -= 1
+
+ i -= 1
+
+
+def postProcess(state: StateInline):
+ """Walk through delimiter list and replace text tokens with tags."""
+ _postProcess(state, state.delimiters)
+
+ for token in state.tokens_meta:
+ if token and "delimiters" in token:
+ _postProcess(state, token["delimiters"])
diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py
new file mode 100644
index 0000000..883a966
--- /dev/null
+++ b/markdown_it/rules_inline/entity.py
@@ -0,0 +1,54 @@
+# Process html entity - &#123;, &#xAF;, &quot;, ...
+import re
+
+from ..common.entities import entities
+from ..common.utils import fromCodePoint, isValidEntityCode
+from .state_inline import StateInline
+
+DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE)
+NAMED_RE = re.compile(r"^&([a-z][a-z0-9]{1,31});", re.IGNORECASE)
+
+
+def entity(state: StateInline, silent: bool):
+
+ pos = state.pos
+ maximum = state.posMax
+
+ if state.srcCharCode[pos] != 0x26: # /* & */
+ return False
+
+ if (pos + 1) < maximum:
+ ch = state.srcCharCode[pos + 1]
+
+ if ch == 0x23: # /* # */
+ match = DIGITAL_RE.search(state.src[pos:])
+ if match:
+ if not silent:
+ match1 = match.group(1)
+ code = (
+ int(match1[1:], 16)
+ if match1[0].lower() == "x"
+ else int(match1, 10)
+ )
+ state.pending += (
+ fromCodePoint(code)
+ if isValidEntityCode(code)
+ else fromCodePoint(0xFFFD)
+ )
+
+ state.pos += len(match.group(0))
+ return True
+
+ else:
+ match = NAMED_RE.search(state.src[pos:])
+ if match:
+ if match.group(1) in entities:
+ if not silent:
+ state.pending += entities[match.group(1)]
+ state.pos += len(match.group(0))
+ return True
+
+ if not silent:
+ state.pending += "&"
+ state.pos += 1
+ return True
diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py
new file mode 100644
index 0000000..36bd040
--- /dev/null
+++ b/markdown_it/rules_inline/escape.py
@@ -0,0 +1,49 @@
+"""
+Process escaped chars and hardbreaks
+"""
+from ..common.utils import isSpace
+from .state_inline import StateInline
+
+ESCAPED = [0 for _ in range(256)]
+for ch in "\\!\"#$%&'()*+,./:;<=>?@[]^_`{|}~-":
+ ESCAPED[ord(ch)] = 1
+
+
+def escape(state: StateInline, silent: bool):
+ pos = state.pos
+ maximum = state.posMax
+
+ # /* \ */
+ if state.srcCharCode[pos] != 0x5C:
+ return False
+
+ pos += 1
+
+ if pos < maximum:
+ ch = state.srcCharCode[pos]
+
+ if ch < 256 and ESCAPED[ch] != 0:
+ if not silent:
+ state.pending += state.src[pos]
+ state.pos += 2
+ return True
+
+ if ch == 0x0A:
+ if not silent:
+ state.push("hardbreak", "br", 0)
+
+ pos += 1
+ # skip leading whitespaces from next line
+ while pos < maximum:
+ ch = state.srcCharCode[pos]
+ if not isSpace(ch):
+ break
+ pos += 1
+
+ state.pos = pos
+ return True
+
+ if not silent:
+ state.pending += "\\"
+ state.pos += 1
+ return True
diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py
new file mode 100644
index 0000000..295cc5c
--- /dev/null
+++ b/markdown_it/rules_inline/html_inline.py
@@ -0,0 +1,43 @@
+# Process html tags
+from ..common.html_re import HTML_TAG_RE
+from .state_inline import StateInline
+
+
+def isLetter(ch: int):
+ lc = ch | 0x20 # to lower case
+ # /* a */ and /* z */
+ return (lc >= 0x61) and (lc <= 0x7A)
+
+
+def html_inline(state: StateInline, silent: bool):
+
+ pos = state.pos
+
+ if not state.md.options.get("html", None):
+ return False
+
+ # Check start
+ maximum = state.posMax
+ if state.srcCharCode[pos] != 0x3C or pos + 2 >= maximum: # /* < */
+ return False
+
+ # Quick fail on second char
+ ch = state.srcCharCode[pos + 1]
+ if (
+ ch != 0x21
+ and ch != 0x3F # /* ! */
+ and ch != 0x2F # /* ? */
+ and not isLetter(ch) # /* / */
+ ):
+ return False
+
+ match = HTML_TAG_RE.search(state.src[pos:])
+ if not match:
+ return False
+
+ if not silent:
+ token = state.push("html_inline", "", 0)
+ token.content = state.src[pos : pos + len(match.group(0))]
+
+ state.pos += len(match.group(0))
+ return True
diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py
new file mode 100644
index 0000000..d2a08d4
--- /dev/null
+++ b/markdown_it/rules_inline/image.py
@@ -0,0 +1,151 @@
+# Process ![image](<src> "title")
+from __future__ import annotations
+
+from ..common.utils import isSpace, normalizeReference
+from ..token import Token
+from .state_inline import StateInline
+
+
+def image(state: StateInline, silent: bool):
+
+ label = None
+ href = ""
+ oldPos = state.pos
+ max = state.posMax
+
+ # /* ! */
+ if state.srcCharCode[state.pos] != 0x21:
+ return False
+ # /* [ */
+ if state.pos + 1 < state.posMax and state.srcCharCode[state.pos + 1] != 0x5B:
+ return False
+
+ labelStart = state.pos + 2
+ labelEnd = state.md.helpers.parseLinkLabel(state, state.pos + 1, False)
+
+ # parser failed to find ']', so it's not a valid link
+ if labelEnd < 0:
+ return False
+
+ pos = labelEnd + 1
+ # /* ( */
+ if pos < max and state.srcCharCode[pos] == 0x28:
+ #
+ # Inline link
+ #
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ pos += 1
+ while pos < max:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+
+ if pos >= max:
+ return False
+
+ # [link]( <href> "title" )
+ # ^^^^^^ parsing link destination
+ start = pos
+ res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
+ if res.ok:
+ href = state.md.normalizeLink(res.str)
+ if state.md.validateLink(href):
+ pos = res.pos
+ else:
+ href = ""
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ start = pos
+ while pos < max:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+
+ # [link]( <href> "title" )
+ # ^^^^^^^ parsing link title
+ res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
+ if pos < max and start != pos and res.ok:
+ title = res.str
+ pos = res.pos
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ while pos < max:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+ else:
+ title = ""
+
+ # /* ) */
+ if pos >= max or state.srcCharCode[pos] != 0x29:
+ state.pos = oldPos
+ return False
+
+ pos += 1
+
+ else:
+ #
+ # Link reference
+ #
+ if "references" not in state.env:
+ return False
+
+ # /* [ */
+ if pos < max and state.srcCharCode[pos] == 0x5B:
+ start = pos + 1
+ pos = state.md.helpers.parseLinkLabel(state, pos)
+ if pos >= 0:
+ label = state.src[start:pos]
+ pos += 1
+ else:
+ pos = labelEnd + 1
+ else:
+ pos = labelEnd + 1
+
+ # covers label == '' and label == undefined
+ # (collapsed reference link and shortcut reference link respectively)
+ if not label:
+ label = state.src[labelStart:labelEnd]
+
+ label = normalizeReference(label)
+
+ ref = state.env["references"].get(label, None)
+ if not ref:
+ state.pos = oldPos
+ return False
+
+ href = ref["href"]
+ title = ref["title"]
+
+ #
+ # We found the end of the link, and know for a fact it's a valid link
+ # so all that's left to do is to call tokenizer.
+ #
+ if not silent:
+ content = state.src[labelStart:labelEnd]
+
+ tokens: list[Token] = []
+ state.md.inline.parse(content, state.md, state.env, tokens)
+
+ token = state.push("image", "img", 0)
+ token.attrs = {"src": href, "alt": ""}
+ token.children = tokens or None
+ token.content = content
+
+ if title:
+ token.attrSet("title", title)
+
+ # note, this is not part of markdown-it JS, but is useful for renderers
+ if label and state.md.options.get("store_labels", False):
+ token.meta["label"] = label
+
+ state.pos = pos
+ state.posMax = max
+ return True
diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py
new file mode 100644
index 0000000..2394d6c
--- /dev/null
+++ b/markdown_it/rules_inline/link.py
@@ -0,0 +1,150 @@
+# Process [link](<to> "stuff")
+
+from ..common.utils import isSpace, normalizeReference
+from .state_inline import StateInline
+
+
+def link(state: StateInline, silent: bool):
+
+ href = ""
+ title = ""
+ label = None
+ oldPos = state.pos
+ maximum = state.posMax
+ start = state.pos
+ parseReference = True
+
+ if state.srcCharCode[state.pos] != 0x5B: # /* [ */
+ return False
+
+ labelStart = state.pos + 1
+ labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, True)
+
+ # parser failed to find ']', so it's not a valid link
+ if labelEnd < 0:
+ return False
+
+ pos = labelEnd + 1
+
+ if pos < maximum and state.srcCharCode[pos] == 0x28: # /* ( */
+ #
+ # Inline link
+ #
+
+ # might have found a valid shortcut link, disable reference parsing
+ parseReference = False
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ pos += 1
+ while pos < maximum:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+
+ if pos >= maximum:
+ return False
+
+ # [link]( <href> "title" )
+ # ^^^^^^ parsing link destination
+ start = pos
+ res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
+ if res.ok:
+ href = state.md.normalizeLink(res.str)
+ if state.md.validateLink(href):
+ pos = res.pos
+ else:
+ href = ""
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ start = pos
+ while pos < maximum:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+
+ # [link]( <href> "title" )
+ # ^^^^^^^ parsing link title
+ res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
+ if pos < maximum and start != pos and res.ok:
+ title = res.str
+ pos = res.pos
+
+ # [link]( <href> "title" )
+ # ^^ skipping these spaces
+ while pos < maximum:
+ code = state.srcCharCode[pos]
+ if not isSpace(code) and code != 0x0A:
+ break
+ pos += 1
+
+ if pos >= maximum or state.srcCharCode[pos] != 0x29: # /* ) */
+ # parsing a valid shortcut link failed, fallback to reference
+ parseReference = True
+
+ pos += 1
+
+ if parseReference:
+ #
+ # Link reference
+ #
+ if "references" not in state.env:
+ return False
+
+ if pos < maximum and state.srcCharCode[pos] == 0x5B: # /* [ */
+ start = pos + 1
+ pos = state.md.helpers.parseLinkLabel(state, pos)
+ if pos >= 0:
+ label = state.src[start:pos]
+ pos += 1
+ else:
+ pos = labelEnd + 1
+
+ else:
+ pos = labelEnd + 1
+
+ # covers label == '' and label == undefined
+ # (collapsed reference link and shortcut reference link respectively)
+ if not label:
+ label = state.src[labelStart:labelEnd]
+
+ label = normalizeReference(label)
+
+ ref = (
+ state.env["references"][label] if label in state.env["references"] else None
+ )
+ if not ref:
+ state.pos = oldPos
+ return False
+
+ href = ref["href"]
+ title = ref["title"]
+
+ #
+ # We found the end of the link, and know for a fact it's a valid link
+ # so all that's left to do is to call tokenizer.
+ #
+ if not silent:
+ state.pos = labelStart
+ state.posMax = labelEnd
+
+ token = state.push("link_open", "a", 1)
+ token.attrs = {"href": href}
+
+ if title:
+ token.attrSet("title", title)
+
+ # note, this is not part of markdown-it JS, but is useful for renderers
+ if label and state.md.options.get("store_labels", False):
+ token.meta["label"] = label
+
+ state.md.inline.tokenize(state)
+
+ token = state.push("link_close", "a", -1)
+
+ state.pos = pos
+ state.posMax = maximum
+ return True
diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py
new file mode 100644
index 0000000..3034e40
--- /dev/null
+++ b/markdown_it/rules_inline/newline.py
@@ -0,0 +1,43 @@
+# Proceess '\n'
+import re
+
+from ..common.utils import charCodeAt, isSpace
+from .state_inline import StateInline
+
+endSpace = re.compile(r" +$")
+
+
+def newline(state: StateInline, silent: bool):
+ pos = state.pos
+
+ # /* \n */
+ if state.srcCharCode[pos] != 0x0A:
+ return False
+
+ pmax = len(state.pending) - 1
+ maximum = state.posMax
+
+ # ' \n' -> hardbreak
+ # Lookup in pending chars is bad practice! Don't copy to other rules!
+ # Pending string is stored in concat mode, indexed lookups will cause
+ # conversion to flat mode.
+ if not silent:
+ if pmax >= 0 and charCodeAt(state.pending, pmax) == 0x20:
+ if pmax >= 1 and charCodeAt(state.pending, pmax - 1) == 0x20:
+ state.pending = endSpace.sub("", state.pending)
+ state.push("hardbreak", "br", 0)
+ else:
+ state.pending = state.pending[:-1]
+ state.push("softbreak", "br", 0)
+
+ else:
+ state.push("softbreak", "br", 0)
+
+ pos += 1
+
+ # skip heading spaces for next line
+ while pos < maximum and isSpace(state.srcCharCode[pos]):
+ pos += 1
+
+ state.pos = pos
+ return True
diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py
new file mode 100644
index 0000000..283532c
--- /dev/null
+++ b/markdown_it/rules_inline/state_inline.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+
+from collections import namedtuple
+from collections.abc import MutableMapping
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from .._compat import DATACLASS_KWARGS
+from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
+from ..ruler import StateBase
+from ..token import Token
+
+if TYPE_CHECKING:
+ from markdown_it import MarkdownIt
+
+
+@dataclass(**DATACLASS_KWARGS)
+class Delimiter:
+ # Char code of the starting marker (number).
+ marker: int
+
+ # Total length of these series of delimiters.
+ length: int
+
+ # An amount of characters before this one that's equivalent to
+ # current one. In plain English: if this delimiter does not open
+ # an emphasis, neither do previous `jump` characters.
+ #
+ # Used to skip sequences like "*****" in one step, for 1st asterisk
+ # value will be 0, for 2nd it's 1 and so on.
+ jump: int
+
+ # A position of the token this delimiter corresponds to.
+ token: int
+
+ # If this delimiter is matched as a valid opener, `end` will be
+ # equal to its position, otherwise it's `-1`.
+ end: int
+
+ # Boolean flags that determine if this delimiter could open or close
+ # an emphasis.
+ open: bool
+ close: bool
+
+ level: bool | None = None
+
+
+Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
+
+
+class StateInline(StateBase):
+ def __init__(
+ self, src: str, md: MarkdownIt, env: MutableMapping, outTokens: list[Token]
+ ):
+ self.src = src
+ self.env = env
+ self.md = md
+ self.tokens = outTokens
+ self.tokens_meta: list[dict | None] = [None] * len(outTokens)
+
+ self.pos = 0
+ self.posMax = len(self.src)
+ self.level = 0
+ self.pending = ""
+ self.pendingLevel = 0
+
+ # Stores { start: end } pairs. Useful for backtrack
+ # optimization of pairs parse (emphasis, strikes).
+ self.cache: dict[int, int] = {}
+
+ # List of emphasis-like delimiters for current tag
+ self.delimiters: list[Delimiter] = []
+
+ # Stack of delimiter lists for upper level tags
+ self._prev_delimiters: list[list[Delimiter]] = []
+
+ # backticklength => last seen position
+ self.backticks: dict[int, int] = {}
+ self.backticksScanned = False
+
+ def __repr__(self):
+ return (
+ f"{self.__class__.__name__}"
+ f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
+ )
+
+ def pushPending(self):
+ token = Token("text", "", 0)
+ token.content = self.pending
+ token.level = self.pendingLevel
+ self.tokens.append(token)
+ self.pending = ""
+ return token
+
+ def push(self, ttype, tag, nesting):
+ """Push new token to "stream".
+ If pending text exists - flush it as text token
+ """
+ if self.pending:
+ self.pushPending()
+
+ token = Token(ttype, tag, nesting)
+ token_meta = None
+
+ if nesting < 0:
+ # closing tag
+ self.level -= 1
+ self.delimiters = self._prev_delimiters.pop()
+
+ token.level = self.level
+
+ if nesting > 0:
+ # opening tag
+ self.level += 1
+ self._prev_delimiters.append(self.delimiters)
+ self.delimiters = []
+ token_meta = {"delimiters": self.delimiters}
+
+ self.pendingLevel = self.level
+ self.tokens.append(token)
+ self.tokens_meta.append(token_meta)
+ return token
+
+ def scanDelims(self, start, canSplitWord):
+ """
+ Scan a sequence of emphasis-like markers, and determine whether
+ it can start an emphasis sequence or end an emphasis sequence.
+
+ - start - position to scan from (it should point at a valid marker);
+ - canSplitWord - determine if these markers can be found inside a word
+
+ """
+ pos = start
+ left_flanking = True
+ right_flanking = True
+ maximum = self.posMax
+ marker = self.srcCharCode[start]
+
+ # treat beginning of the line as a whitespace
+ lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20
+
+ while pos < maximum and self.srcCharCode[pos] == marker:
+ pos += 1
+
+ count = pos - start
+
+ # treat end of the line as a whitespace
+ nextChar = self.srcCharCode[pos] if pos < maximum else 0x20
+
+ isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
+ isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
+
+ isLastWhiteSpace = isWhiteSpace(lastChar)
+ isNextWhiteSpace = isWhiteSpace(nextChar)
+
+ if isNextWhiteSpace:
+ left_flanking = False
+ elif isNextPunctChar:
+ if not (isLastWhiteSpace or isLastPunctChar):
+ left_flanking = False
+
+ if isLastWhiteSpace:
+ right_flanking = False
+ elif isLastPunctChar:
+ if not (isNextWhiteSpace or isNextPunctChar):
+ right_flanking = False
+
+ if not canSplitWord:
+ can_open = left_flanking and ((not right_flanking) or isLastPunctChar)
+ can_close = right_flanking and ((not left_flanking) or isNextPunctChar)
+ else:
+ can_open = left_flanking
+ can_close = right_flanking
+
+ return Scanned(can_open, can_close, count)
diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py
new file mode 100644
index 0000000..107ea26
--- /dev/null
+++ b/markdown_it/rules_inline/strikethrough.py
@@ -0,0 +1,133 @@
+# ~~strike through~~
+from __future__ import annotations
+
+from .state_inline import Delimiter, StateInline
+
+
+def tokenize(state: StateInline, silent: bool):
+ """Insert each marker as a separate text token, and add it to delimiter list"""
+ start = state.pos
+ marker = state.srcCharCode[start]
+
+ if silent:
+ return False
+
+ if marker != 0x7E: # /* ~ */
+ return False
+
+ scanned = state.scanDelims(state.pos, True)
+ length = scanned.length
+ ch = chr(marker)
+
+ if length < 2:
+ return False
+
+ if length % 2:
+ token = state.push("text", "", 0)
+ token.content = ch
+ length -= 1
+
+ i = 0
+ while i < length:
+ token = state.push("text", "", 0)
+ token.content = ch + ch
+ state.delimiters.append(
+ Delimiter(
+ **{
+ "marker": marker,
+ "length": 0, # disable "rule of 3" length checks meant for emphasis
+ "jump": i // 2, # for `~~` 1 marker = 2 characters
+ "token": len(state.tokens) - 1,
+ "end": -1,
+ "open": scanned.can_open,
+ "close": scanned.can_close,
+ }
+ )
+ )
+
+ i += 2
+
+ state.pos += scanned.length
+
+ return True
+
+
+def _postProcess(state: StateInline, delimiters: list[Delimiter]):
+
+ loneMarkers = []
+ maximum = len(delimiters)
+
+ i = 0
+ while i < maximum:
+ startDelim = delimiters[i]
+
+ if startDelim.marker != 0x7E: # /* ~ */
+ i += 1
+ continue
+
+ if startDelim.end == -1:
+ i += 1
+ continue
+
+ endDelim = delimiters[startDelim.end]
+
+ token = state.tokens[startDelim.token]
+ token.type = "s_open"
+ token.tag = "s"
+ token.nesting = 1
+ token.markup = "~~"
+ token.content = ""
+
+ token = state.tokens[endDelim.token]
+ token.type = "s_close"
+ token.tag = "s"
+ token.nesting = -1
+ token.markup = "~~"
+ token.content = ""
+
+ if (
+ state.tokens[endDelim.token - 1].type == "text"
+ and state.tokens[endDelim.token - 1].content == "~"
+ ):
+
+ loneMarkers.append(endDelim.token - 1)
+
+ i += 1
+
+ # If a marker sequence has an odd number of characters, it's split
+ # like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the
+ # start of the sequence.
+ #
+ # So, we have to move all those markers after subsequent s_close tags.
+ #
+ while loneMarkers:
+ i = loneMarkers.pop()
+ j = i + 1
+
+ while (j < len(state.tokens)) and (state.tokens[j].type == "s_close"):
+ j += 1
+
+ j -= 1
+
+ if i != j:
+ token = state.tokens[j]
+ state.tokens[j] = state.tokens[i]
+ state.tokens[i] = token
+
+
+def postProcess(state: StateInline):
+ """Walk through delimiter list and replace text tokens with tags."""
+ tokens_meta = state.tokens_meta
+ maximum = len(state.tokens_meta)
+ _postProcess(state, state.delimiters)
+
+ curr = 0
+ while curr < maximum:
+ try:
+ curr_meta = tokens_meta[curr]
+ except IndexError:
+ pass
+ else:
+ if curr_meta and "delimiters" in curr_meta:
+ _postProcess(state, curr_meta["delimiters"])
+ curr += 1
diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py
new file mode 100644
index 0000000..ec6ee0f
--- /dev/null
+++ b/markdown_it/rules_inline/text.py
@@ -0,0 +1,57 @@
+# Skip text characters for text token, place those to pending buffer
+# and increment current pos
+
+from .state_inline import StateInline
+
+# Rule to skip pure text
+# '{}$%@~+=:' reserved for extensions
+
+# !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
+
+# !!!! Don't confuse with "Markdown ASCII Punctuation" chars
+# http://spec.commonmark.org/0.15/#ascii-punctuation-character
+
+
+def isTerminatorChar(ch):
+ return ch in {
+ 0x0A, # /* \n */:
+ 0x21, # /* ! */:
+ 0x23, # /* # */:
+ 0x24, # /* $ */:
+ 0x25, # /* % */:
+ 0x26, # /* & */:
+ 0x2A, # /* * */:
+ 0x2B, # /* + */:
+ 0x2D, # /* - */:
+ 0x3A, # /* : */:
+ 0x3C, # /* < */:
+ 0x3D, # /* = */:
+ 0x3E, # /* > */:
+ 0x40, # /* @ */:
+ 0x5B, # /* [ */:
+ 0x5C, # /* \ */:
+ 0x5D, # /* ] */:
+ 0x5E, # /* ^ */:
+ 0x5F, # /* _ */:
+ 0x60, # /* ` */:
+ 0x7B, # /* { */:
+ 0x7D, # /* } */:
+ 0x7E, # /* ~ */:
+ }
+
+
+def text(state: StateInline, silent: bool, **args):
+ pos = state.pos
+ posMax = state.posMax
+ while (pos < posMax) and not isTerminatorChar(state.srcCharCode[pos]):
+ pos += 1
+
+ if pos == state.pos:
+ return False
+
+ if not silent:
+ state.pending += state.src[state.pos : pos]
+
+ state.pos = pos
+
+ return True
diff --git a/markdown_it/rules_inline/text_collapse.py b/markdown_it/rules_inline/text_collapse.py
new file mode 100644
index 0000000..6d0c0ab
--- /dev/null
+++ b/markdown_it/rules_inline/text_collapse.py
@@ -0,0 +1,43 @@
+from .state_inline import StateInline
+
+
+def text_collapse(state: StateInline, *args):
+ """
+ Clean up tokens after emphasis and strikethrough postprocessing:
+ merge adjacent text nodes into one and re-calculate all token levels
+
+ This is necessary because initially emphasis delimiter markers (``*, _, ~``)
+ are treated as their own separate text tokens. Then emphasis rule either
+ leaves them as text (needed to merge with adjacent text) or turns them
+ into opening/closing tags (which messes up levels inside).
+ """
+ level = 0
+ maximum = len(state.tokens)
+
+ curr = last = 0
+ while curr < maximum:
+ # re-calculate levels after emphasis/strikethrough turns some text nodes
+ # into opening/closing tags
+ if state.tokens[curr].nesting < 0:
+ level -= 1 # closing tag
+ state.tokens[curr].level = level
+ if state.tokens[curr].nesting > 0:
+ level += 1 # opening tag
+
+ if (
+ state.tokens[curr].type == "text"
+ and curr + 1 < maximum
+ and state.tokens[curr + 1].type == "text"
+ ):
+ # collapse two adjacent text nodes
+ state.tokens[curr + 1].content = (
+ state.tokens[curr].content + state.tokens[curr + 1].content
+ )
+ else:
+ if curr != last:
+ state.tokens[last] = state.tokens[curr]
+ last += 1
+ curr += 1
+
+ if curr != last:
+ del state.tokens[last:]