Adding upstream version 2.1.0.upstream/2.1.0 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-29 04:24:24 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-29 04:24:24 +0000
commit: 12e8343068b906f8b2afddc5569968a8a91fa5b0 (patch)
tree: 75cc5e05a4392ea0292251898f992a15a16b172b /markdown_it/rules_inline
parent: Initial commit. (diff)
download: markdown-it-py-12e8343068b906f8b2afddc5569968a8a91fa5b0.tar.xz
markdown-it-py-12e8343068b906f8b2afddc5569968a8a91fa5b0.zip
15 files changed, 1296 insertions, 0 deletions
diff --git a/markdown_it/rules_inline/__init__.py b/markdown_it/rules_inline/__init__.py
new file mode 100644
index 0000000..f27907c
--- /dev/null
+++ b/markdown_it/rules_inline/__init__.py
@@ -0,0 +1,29 @@
+__all__ = (
+    "StateInline",
+    "text",
+    "text_collapse",
+    "link_pairs",
+    "escape",
+    "newline",
+    "backtick",
+    "emphasis",
+    "image",
+    "link",
+    "autolink",
+    "entity",
+    "html_inline",
+    "strikethrough",
+)
+from . import emphasis, strikethrough
+from .autolink import autolink
+from .backticks import backtick
+from .balance_pairs import link_pairs
+from .entity import entity
+from .escape import escape
+from .html_inline import html_inline
+from .image import image
+from .link import link
+from .newline import newline
+from .state_inline import StateInline
+from .text import text
+from .text_collapse import text_collapse
diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py
new file mode 100644
index 0000000..a4ee61c
--- /dev/null
+++ b/markdown_it/rules_inline/autolink.py
@@ -0,0 +1,78 @@
+# Process autolinks '<protocol:...>'
+import re
+
+from .state_inline import StateInline
+
+EMAIL_RE = re.compile(
+    r"^([a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$"  # noqa: E501
+)
+AUTOLINK_RE = re.compile(r"^([a-zA-Z][a-zA-Z0-9+.\-]{1,31}):([^<>\x00-\x20]*)$")
+
+
+def autolink(state: StateInline, silent: bool) -> bool:
+
+    pos = state.pos
+
+    if state.srcCharCode[pos] != 0x3C:  # /* < */
+        return False
+
+    start = state.pos
+    maximum = state.posMax
+
+    while True:
+        pos += 1
+        if pos >= maximum:
+            return False
+
+        ch = state.srcCharCode[pos]
+
+        if ch == 0x3C:  # /* < */
+            return False
+        if ch == 0x3E:  # /* > */
+            break
+
+    url = state.src[start + 1 : pos]
+
+    if AUTOLINK_RE.search(url) is not None:
+        fullUrl = state.md.normalizeLink(url)
+        if not state.md.validateLink(fullUrl):
+            return False
+
+        if not silent:
+            token = state.push("link_open", "a", 1)
+            token.attrs = {"href": fullUrl}
+            token.markup = "autolink"
+            token.info = "auto"
+
+            token = state.push("text", "", 0)
+            token.content = state.md.normalizeLinkText(url)
+
+            token = state.push("link_close", "a", -1)
+            token.markup = "autolink"
+            token.info = "auto"
+
+        state.pos += len(url) + 2
+        return True
+
+    if EMAIL_RE.search(url) is not None:
+        fullUrl = state.md.normalizeLink("mailto:" + url)
+        if not state.md.validateLink(fullUrl):
+            return False
+
+        if not silent:
+            token = state.push("link_open", "a", 1)
+            token.attrs = {"href": fullUrl}
+            token.markup = "autolink"
+            token.info = "auto"
+
+            token = state.push("text", "", 0)
+            token.content = state.md.normalizeLinkText(url)
+
+            token = state.push("link_close", "a", -1)
+            token.markup = "autolink"
+            token.info = "auto"
+
+        state.pos += len(url) + 2
+        return True
+
+    return False
diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py
new file mode 100644
index 0000000..7bff12f
--- /dev/null
+++ b/markdown_it/rules_inline/backticks.py
@@ -0,0 +1,75 @@
+# Parse backticks
+import re
+
+from .state_inline import StateInline
+
+regex = re.compile("^ (.+) $")
+
+
+def backtick(state: StateInline, silent: bool) -> bool:
+
+    pos = state.pos
+    ch = state.srcCharCode[pos]
+
+    # /* ` */
+    if ch != 0x60:
+        return False
+
+    start = pos
+    pos += 1
+    maximum = state.posMax
+
+    # scan marker length
+    while pos < maximum and (state.srcCharCode[pos] == 0x60):  # /* ` */
+        pos += 1
+
+    marker = state.src[start:pos]
+    openerLength = len(marker)
+
+    if state.backticksScanned and state.backticks.get(openerLength, 0) <= start:
+        if not silent:
+            state.pending += marker
+        state.pos += openerLength
+        return True
+
+    matchStart = matchEnd = pos
+
+    # Nothing found in the cache, scan until the end of the line (or until marker is found)
+    while True:
+        try:
+            matchStart = state.src.index("`", matchEnd)
+        except ValueError:
+            break
+        matchEnd = matchStart + 1
+
+        # scan marker length
+        while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60):  # /* ` */
+            matchEnd += 1
+
+        closerLength = matchEnd - matchStart
+
+        if closerLength == openerLength:
+            # Found matching closer length.
+            if not silent:
+                token = state.push("code_inline", "code", 0)
+                token.markup = marker
+                token.content = state.src[pos:matchStart].replace("\n", " ")
+                if (
+                    token.content.startswith(" ")
+                    and token.content.endswith(" ")
+                    and len(token.content.strip()) > 0
+                ):
+                    token.content = token.content[1:-1]
+            state.pos = matchEnd
+            return True
+
+        # Some different length found, put it in cache as upper limit of where closer can be found
+        state.backticks[closerLength] = matchStart
+
+    # Scanned through the end, didn't find anything
+    state.backticksScanned = True
+
+    if not silent:
+        state.pending += marker
+    state.pos += openerLength
+    return True
diff --git a/markdown_it/rules_inline/balance_pairs.py b/markdown_it/rules_inline/balance_pairs.py
new file mode 100644
index 0000000..db622f0
--- /dev/null
+++ b/markdown_it/rules_inline/balance_pairs.py
@@ -0,0 +1,114 @@
+# For each opening emphasis-like marker find a matching closing one
+#
+from .state_inline import StateInline
+
+
+def processDelimiters(state: StateInline, delimiters, *args):
+
+    openersBottom = {}
+    maximum = len(delimiters)
+
+    closerIdx = 0
+    while closerIdx < maximum:
+        closer = delimiters[closerIdx]
+
+        # Length is only used for emphasis-specific "rule of 3",
+        # if it's not defined (in strikethrough or 3rd party plugins),
+        # we can default it to 0 to disable those checks.
+        #
+        closer.length = closer.length or 0
+
+        if not closer.close:
+            closerIdx += 1
+            continue
+
+        # Previously calculated lower bounds (previous fails)
+        # for each marker, each delimiter length modulo 3,
+        # and for whether this closer can be an opener;
+        # https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460
+        if closer.marker not in openersBottom:
+            openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1]
+
+        minOpenerIdx = openersBottom[closer.marker][
+            (3 if closer.open else 0) + (closer.length % 3)
+        ]
+
+        openerIdx = closerIdx - closer.jump - 1
+
+        # avoid crash if `closer.jump` is pointing outside of the array,
+        # e.g. for strikethrough
+        if openerIdx < -1:
+            openerIdx = -1
+
+        newMinOpenerIdx = openerIdx
+
+        while openerIdx > minOpenerIdx:
+            opener = delimiters[openerIdx]
+
+            if opener.marker != closer.marker:
+                openerIdx -= opener.jump + 1
+                continue
+
+            if opener.open and opener.end < 0:
+
+                isOddMatch = False
+
+                # from spec:
+                #
+                # If one of the delimiters can both open and close emphasis, then the
+                # sum of the lengths of the delimiter runs containing the opening and
+                # closing delimiters must not be a multiple of 3 unless both lengths
+                # are multiples of 3.
+                #
+                if opener.close or closer.open:
+                    if (opener.length + closer.length) % 3 == 0:
+                        if opener.length % 3 != 0 or closer.length % 3 != 0:
+                            isOddMatch = True
+
+                if not isOddMatch:
+                    # If previous delimiter cannot be an opener, we can safely skip
+                    # the entire sequence in future checks. This is required to make
+                    # sure algorithm has linear complexity (see *_*_*_*_*_... case).
+                    #
+                    if openerIdx > 0 and not delimiters[openerIdx - 1].open:
+                        lastJump = delimiters[openerIdx - 1].jump + 1
+                    else:
+                        lastJump = 0
+
+                    closer.jump = closerIdx - openerIdx + lastJump
+                    closer.open = False
+                    opener.end = closerIdx
+                    opener.jump = lastJump
+                    opener.close = False
+                    newMinOpenerIdx = -1
+                    break
+
+            openerIdx -= opener.jump + 1
+
+        if newMinOpenerIdx != -1:
+            # If match for this delimiter run failed, we want to set lower bound for
+            # future lookups. This is required to make sure algorithm has linear
+            # complexity.
+            #
+            # See details here:
+            # https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442
+            #
+            openersBottom[closer.marker][
+                (3 if closer.open else 0) + ((closer.length or 0) % 3)
+            ] = newMinOpenerIdx
+
+        closerIdx += 1
+
+
+def link_pairs(state: StateInline) -> None:
+    tokens_meta = state.tokens_meta
+    maximum = len(state.tokens_meta)
+
+    processDelimiters(state, state.delimiters)
+
+    curr = 0
+    while curr < maximum:
+        curr_meta = tokens_meta[curr]
+        if curr_meta and "delimiters" in curr_meta:
+            processDelimiters(state, curr_meta["delimiters"])
+        curr += 1
diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py
new file mode 100644
index 0000000..9001b09
--- /dev/null
+++ b/markdown_it/rules_inline/emphasis.py
@@ -0,0 +1,102 @@
+# Process *this* and _that_
+#
+
+from .state_inline import Delimiter, StateInline
+
+
+def tokenize(state: StateInline, silent: bool):
+    """Insert each marker as a separate text token, and add it to delimiter list"""
+    start = state.pos
+    marker = state.srcCharCode[start]
+
+    if silent:
+        return False
+
+    # /* _ */  /* * */
+    if marker != 0x5F and marker != 0x2A:
+        return False
+
+    scanned = state.scanDelims(state.pos, marker == 0x2A)
+
+    for i in range(scanned.length):
+        token = state.push("text", "", 0)
+        token.content = chr(marker)
+        state.delimiters.append(
+            Delimiter(
+                marker=marker,
+                length=scanned.length,
+                jump=i,
+                token=len(state.tokens) - 1,
+                end=-1,
+                open=scanned.can_open,
+                close=scanned.can_close,
+            )
+        )
+
+    state.pos += scanned.length
+
+    return True
+
+
+def _postProcess(state, delimiters):
+
+    i = len(delimiters) - 1
+    while i >= 0:
+        startDelim = delimiters[i]
+
+        # /* _ */  /* * */
+        if startDelim.marker != 0x5F and startDelim.marker != 0x2A:
+            i -= 1
+            continue
+
+        # Process only opening markers
+        if startDelim.end == -1:
+            i -= 1
+            continue
+
+        endDelim = delimiters[startDelim.end]
+
+        # If the previous delimiter has the same marker and is adjacent to this one,
+        # merge those into one strong delimiter.
+        #
+        # `<em><em>whatever</em></em>` -> `<strong>whatever</strong>`
+        #
+        isStrong = (
+            i > 0
+            and delimiters[i - 1].end == startDelim.end + 1
+            and delimiters[i - 1].token == startDelim.token - 1
+            and delimiters[startDelim.end + 1].token == endDelim.token + 1
+            and delimiters[i - 1].marker == startDelim.marker
+        )
+
+        ch = chr(startDelim.marker)
+
+        token = state.tokens[startDelim.token]
+        token.type = "strong_open" if isStrong else "em_open"
+        token.tag = "strong" if isStrong else "em"
+        token.nesting = 1
+        token.markup = ch + ch if isStrong else ch
+        token.content = ""
+
+        token = state.tokens[endDelim.token]
+        token.type = "strong_close" if isStrong else "em_close"
+        token.tag = "strong" if isStrong else "em"
+        token.nesting = -1
+        token.markup = ch + ch if isStrong else ch
+        token.content = ""
+
+        if isStrong:
+            state.tokens[delimiters[i - 1].token].content = ""
+            state.tokens[delimiters[startDelim.end + 1].token].content = ""
+            i -= 1
+
+        i -= 1
+
+
+def postProcess(state: StateInline):
+    """Walk through delimiter list and replace text tokens with tags."""
+    _postProcess(state, state.delimiters)
+
+    for token in state.tokens_meta:
+        if token and "delimiters" in token:
+            _postProcess(state, token["delimiters"])
diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py
new file mode 100644
index 0000000..883a966
--- /dev/null
+++ b/markdown_it/rules_inline/entity.py
@@ -0,0 +1,54 @@
+# Process html entity - &#123;, &#xAF;, &quot;, ...
+import re
+
+from ..common.entities import entities
+from ..common.utils import fromCodePoint, isValidEntityCode
+from .state_inline import StateInline
+
+DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE)
+NAMED_RE = re.compile(r"^&([a-z][a-z0-9]{1,31});", re.IGNORECASE)
+
+
+def entity(state: StateInline, silent: bool):
+
+    pos = state.pos
+    maximum = state.posMax
+
+    if state.srcCharCode[pos] != 0x26:  # /* & */
+        return False
+
+    if (pos + 1) < maximum:
+        ch = state.srcCharCode[pos + 1]
+
+        if ch == 0x23:  # /* # */
+            match = DIGITAL_RE.search(state.src[pos:])
+            if match:
+                if not silent:
+                    match1 = match.group(1)
+                    code = (
+                        int(match1[1:], 16)
+                        if match1[0].lower() == "x"
+                        else int(match1, 10)
+                    )
+                    state.pending += (
+                        fromCodePoint(code)
+                        if isValidEntityCode(code)
+                        else fromCodePoint(0xFFFD)
+                    )
+
+                state.pos += len(match.group(0))
+                return True
+
+        else:
+            match = NAMED_RE.search(state.src[pos:])
+            if match:
+                if match.group(1) in entities:
+                    if not silent:
+                        state.pending += entities[match.group(1)]
+                    state.pos += len(match.group(0))
+                    return True
+
+    if not silent:
+        state.pending += "&"
+    state.pos += 1
+    return True
diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py
new file mode 100644
index 0000000..36bd040
--- /dev/null
+++ b/markdown_it/rules_inline/escape.py
@@ -0,0 +1,49 @@
+"""
+Process escaped chars and hardbreaks
+"""
+from ..common.utils import isSpace
+from .state_inline import StateInline
+
+ESCAPED = [0 for _ in range(256)]
+for ch in "\\!\"#$%&'()*+,./:;<=>?@[]^_`{|}~-":
+    ESCAPED[ord(ch)] = 1
+
+
+def escape(state: StateInline, silent: bool):
+    pos = state.pos
+    maximum = state.posMax
+
+    # /* \ */
+    if state.srcCharCode[pos] != 0x5C:
+        return False
+
+    pos += 1
+
+    if pos < maximum:
+        ch = state.srcCharCode[pos]
+
+        if ch < 256 and ESCAPED[ch] != 0:
+            if not silent:
+                state.pending += state.src[pos]
+            state.pos += 2
+            return True
+
+        if ch == 0x0A:
+            if not silent:
+                state.push("hardbreak", "br", 0)
+
+            pos += 1
+            # skip leading whitespaces from next line
+            while pos < maximum:
+                ch = state.srcCharCode[pos]
+                if not isSpace(ch):
+                    break
+                pos += 1
+
+            state.pos = pos
+            return True
+
+    if not silent:
+        state.pending += "\\"
+    state.pos += 1
+    return True
diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py
new file mode 100644
index 0000000..295cc5c
--- /dev/null
+++ b/markdown_it/rules_inline/html_inline.py
@@ -0,0 +1,43 @@
+# Process html tags
+from ..common.html_re import HTML_TAG_RE
+from .state_inline import StateInline
+
+
+def isLetter(ch: int):
+    lc = ch | 0x20  # to lower case
+    # /* a */ and /* z */
+    return (lc >= 0x61) and (lc <= 0x7A)
+
+
+def html_inline(state: StateInline, silent: bool):
+
+    pos = state.pos
+
+    if not state.md.options.get("html", None):
+        return False
+
+    # Check start
+    maximum = state.posMax
+    if state.srcCharCode[pos] != 0x3C or pos + 2 >= maximum:  # /* < */
+        return False
+
+    # Quick fail on second char
+    ch = state.srcCharCode[pos + 1]
+    if (
+        ch != 0x21
+        and ch != 0x3F  # /* ! */
+        and ch != 0x2F  # /* ? */
+        and not isLetter(ch)  # /* / */
+    ):
+        return False
+
+    match = HTML_TAG_RE.search(state.src[pos:])
+    if not match:
+        return False
+
+    if not silent:
+        token = state.push("html_inline", "", 0)
+        token.content = state.src[pos : pos + len(match.group(0))]
+
+    state.pos += len(match.group(0))
+    return True
diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py
new file mode 100644
index 0000000..d2a08d4
--- /dev/null
+++ b/markdown_it/rules_inline/image.py
@@ -0,0 +1,151 @@
+# Process ![image](<src> "title")
+from __future__ import annotations
+
+from ..common.utils import isSpace, normalizeReference
+from ..token import Token
+from .state_inline import StateInline
+
+
+def image(state: StateInline, silent: bool):
+
+    label = None
+    href = ""
+    oldPos = state.pos
+    max = state.posMax
+
+    # /* ! */
+    if state.srcCharCode[state.pos] != 0x21:
+        return False
+    # /* [ */
+    if state.pos + 1 < state.posMax and state.srcCharCode[state.pos + 1] != 0x5B:
+        return False
+
+    labelStart = state.pos + 2
+    labelEnd = state.md.helpers.parseLinkLabel(state, state.pos + 1, False)
+
+    # parser failed to find ']', so it's not a valid link
+    if labelEnd < 0:
+        return False
+
+    pos = labelEnd + 1
+    # /* ( */
+    if pos < max and state.srcCharCode[pos] == 0x28:
+        #
+        # Inline link
+        #
+
+        # [link](  <href>  "title"  )
+        #        ^^ skipping these spaces
+        pos += 1
+        while pos < max:
+            code = state.srcCharCode[pos]
+            if not isSpace(code) and code != 0x0A:
+                break
+            pos += 1
+
+        if pos >= max:
+            return False
+
+        # [link](  <href>  "title"  )
+        #          ^^^^^^ parsing link destination
+        start = pos
+        res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
+        if res.ok:
+            href = state.md.normalizeLink(res.str)
+            if state.md.validateLink(href):
+                pos = res.pos
+            else:
+                href = ""
+
+        # [link](  <href>  "title"  )
+        #                ^^ skipping these spaces
+        start = pos
+        while pos < max:
+            code = state.srcCharCode[pos]
+            if not isSpace(code) and code != 0x0A:
+                break
+            pos += 1
+
+        # [link](  <href>  "title"  )
+        #                  ^^^^^^^ parsing link title
+        res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
+        if pos < max and start != pos and res.ok:
+            title = res.str
+            pos = res.pos
+
+            # [link](  <href>  "title"  )
+            #                         ^^ skipping these spaces
+            while pos < max:
+                code = state.srcCharCode[pos]
+                if not isSpace(code) and code != 0x0A:
+                    break
+                pos += 1
+        else:
+            title = ""
+
+        # /* ) */
+        if pos >= max or state.srcCharCode[pos] != 0x29:
+            state.pos = oldPos
+            return False
+
+        pos += 1
+
+    else:
+        #
+        # Link reference
+        #
+        if "references" not in state.env:
+            return False
+
+        # /* [ */
+        if pos < max and state.srcCharCode[pos] == 0x5B:
+            start = pos + 1
+            pos = state.md.helpers.parseLinkLabel(state, pos)
+            if pos >= 0:
+                label = state.src[start:pos]
+                pos += 1
+            else:
+                pos = labelEnd + 1
+        else:
+            pos = labelEnd + 1
+
+        # covers label == '' and label == undefined
+        # (collapsed reference link and shortcut reference link respectively)
+        if not label:
+            label = state.src[labelStart:labelEnd]
+
+        label = normalizeReference(label)
+
+        ref = state.env["references"].get(label, None)
+        if not ref:
+            state.pos = oldPos
+            return False
+
+        href = ref["href"]
+        title = ref["title"]
+
+    #
+    # We found the end of the link, and know for a fact it's a valid link
+    # so all that's left to do is to call tokenizer.
+    #
+    if not silent:
+        content = state.src[labelStart:labelEnd]
+
+        tokens: list[Token] = []
+        state.md.inline.parse(content, state.md, state.env, tokens)
+
+        token = state.push("image", "img", 0)
+        token.attrs = {"src": href, "alt": ""}
+        token.children = tokens or None
+        token.content = content
+
+        if title:
+            token.attrSet("title", title)
+
+        # note, this is not part of markdown-it JS, but is useful for renderers
+        if label and state.md.options.get("store_labels", False):
+            token.meta["label"] = label
+
+    state.pos = pos
+    state.posMax = max
+    return True
diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py
new file mode 100644
index 0000000..2394d6c
--- /dev/null
+++ b/markdown_it/rules_inline/link.py
@@ -0,0 +1,150 @@
+# Process [link](<to> "stuff")
+
+from ..common.utils import isSpace, normalizeReference
+from .state_inline import StateInline
+
+
+def link(state: StateInline, silent: bool):
+
+    href = ""
+    title = ""
+    label = None
+    oldPos = state.pos
+    maximum = state.posMax
+    start = state.pos
+    parseReference = True
+
+    if state.srcCharCode[state.pos] != 0x5B:  # /* [ */
+        return False
+
+    labelStart = state.pos + 1
+    labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, True)
+
+    # parser failed to find ']', so it's not a valid link
+    if labelEnd < 0:
+        return False
+
+    pos = labelEnd + 1
+
+    if pos < maximum and state.srcCharCode[pos] == 0x28:  # /* ( */
+        #
+        # Inline link
+        #
+
+        # might have found a valid shortcut link, disable reference parsing
+        parseReference = False
+
+        # [link](  <href>  "title"  )
+        #        ^^ skipping these spaces
+        pos += 1
+        while pos < maximum:
+            code = state.srcCharCode[pos]
+            if not isSpace(code) and code != 0x0A:
+                break
+            pos += 1
+
+        if pos >= maximum:
+            return False
+
+        # [link](  <href>  "title"  )
+        #          ^^^^^^ parsing link destination
+        start = pos
+        res = state.md.helpers.parseLinkDestination(state.src, pos, state.posMax)
+        if res.ok:
+            href = state.md.normalizeLink(res.str)
+            if state.md.validateLink(href):
+                pos = res.pos
+            else:
+                href = ""
+
+            # [link](  <href>  "title"  )
+            #                ^^ skipping these spaces
+            start = pos
+            while pos < maximum:
+                code = state.srcCharCode[pos]
+                if not isSpace(code) and code != 0x0A:
+                    break
+                pos += 1
+
+            # [link](  <href>  "title"  )
+            #                  ^^^^^^^ parsing link title
+            res = state.md.helpers.parseLinkTitle(state.src, pos, state.posMax)
+            if pos < maximum and start != pos and res.ok:
+                title = res.str
+                pos = res.pos
+
+                # [link](  <href>  "title"  )
+                #                         ^^ skipping these spaces
+                while pos < maximum:
+                    code = state.srcCharCode[pos]
+                    if not isSpace(code) and code != 0x0A:
+                        break
+                    pos += 1
+
+        if pos >= maximum or state.srcCharCode[pos] != 0x29:  # /* ) */
+            # parsing a valid shortcut link failed, fallback to reference
+            parseReference = True
+
+        pos += 1
+
+    if parseReference:
+        #
+        # Link reference
+        #
+        if "references" not in state.env:
+            return False
+
+        if pos < maximum and state.srcCharCode[pos] == 0x5B:  # /* [ */
+            start = pos + 1
+            pos = state.md.helpers.parseLinkLabel(state, pos)
+            if pos >= 0:
+                label = state.src[start:pos]
+                pos += 1
+            else:
+                pos = labelEnd + 1
+
+        else:
+            pos = labelEnd + 1
+
+        # covers label == '' and label == undefined
+        # (collapsed reference link and shortcut reference link respectively)
+        if not label:
+            label = state.src[labelStart:labelEnd]
+
+        label = normalizeReference(label)
+
+        ref = (
+            state.env["references"][label] if label in state.env["references"] else None
+        )
+        if not ref:
+            state.pos = oldPos
+            return False
+
+        href = ref["href"]
+        title = ref["title"]
+
+    #
+    # We found the end of the link, and know for a fact it's a valid link
+    # so all that's left to do is to call tokenizer.
+    #
+    if not silent:
+        state.pos = labelStart
+        state.posMax = labelEnd
+
+        token = state.push("link_open", "a", 1)
+        token.attrs = {"href": href}
+
+        if title:
+            token.attrSet("title", title)
+
+        # note, this is not part of markdown-it JS, but is useful for renderers
+        if label and state.md.options.get("store_labels", False):
+            token.meta["label"] = label
+
+        state.md.inline.tokenize(state)
+
+        token = state.push("link_close", "a", -1)
+
+    state.pos = pos
+    state.posMax = maximum
+    return True
diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py
new file mode 100644
index 0000000..3034e40
--- /dev/null
+++ b/markdown_it/rules_inline/newline.py
@@ -0,0 +1,43 @@
+# Proceess '\n'
+import re
+
+from ..common.utils import charCodeAt, isSpace
+from .state_inline import StateInline
+
+endSpace = re.compile(r" +$")
+
+
+def newline(state: StateInline, silent: bool):
+    pos = state.pos
+
+    # /* \n */
+    if state.srcCharCode[pos] != 0x0A:
+        return False
+
+    pmax = len(state.pending) - 1
+    maximum = state.posMax
+
+    # '  \n' -> hardbreak
+    # Lookup in pending chars is bad practice! Don't copy to other rules!
+    # Pending string is stored in concat mode, indexed lookups will cause
+    # conversion to flat mode.
+    if not silent:
+        if pmax >= 0 and charCodeAt(state.pending, pmax) == 0x20:
+            if pmax >= 1 and charCodeAt(state.pending, pmax - 1) == 0x20:
+                state.pending = endSpace.sub("", state.pending)
+                state.push("hardbreak", "br", 0)
+            else:
+                state.pending = state.pending[:-1]
+                state.push("softbreak", "br", 0)
+
+        else:
+            state.push("softbreak", "br", 0)
+
+    pos += 1
+
+    # skip heading spaces for next line
+    while pos < maximum and isSpace(state.srcCharCode[pos]):
+        pos += 1
+
+    state.pos = pos
+    return True
diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py
new file mode 100644
index 0000000..283532c
--- /dev/null
+++ b/markdown_it/rules_inline/state_inline.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+
+from collections import namedtuple
+from collections.abc import MutableMapping
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from .._compat import DATACLASS_KWARGS
+from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
+from ..ruler import StateBase
+from ..token import Token
+
+if TYPE_CHECKING:
+    from markdown_it import MarkdownIt
+
+
+@dataclass(**DATACLASS_KWARGS)
+class Delimiter:
+    # Char code of the starting marker (number).
+    marker: int
+
+    # Total length of these series of delimiters.
+    length: int
+
+    # An amount of characters before this one that's equivalent to
+    # current one. In plain English: if this delimiter does not open
+    # an emphasis, neither do previous `jump` characters.
+    #
+    # Used to skip sequences like "*****" in one step, for 1st asterisk
+    # value will be 0, for 2nd it's 1 and so on.
+    jump: int
+
+    # A position of the token this delimiter corresponds to.
+    token: int
+
+    # If this delimiter is matched as a valid opener, `end` will be
+    # equal to its position, otherwise it's `-1`.
+    end: int
+
+    # Boolean flags that determine if this delimiter could open or close
+    # an emphasis.
+    open: bool
+    close: bool
+
+    level: bool | None = None
+
+
+Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
+
+
+class StateInline(StateBase):
+    def __init__(
+        self, src: str, md: MarkdownIt, env: MutableMapping, outTokens: list[Token]
+    ):
+        self.src = src
+        self.env = env
+        self.md = md
+        self.tokens = outTokens
+        self.tokens_meta: list[dict | None] = [None] * len(outTokens)
+
+        self.pos = 0
+        self.posMax = len(self.src)
+        self.level = 0
+        self.pending = ""
+        self.pendingLevel = 0
+
+        # Stores { start: end } pairs. Useful for backtrack
+        # optimization of pairs parse (emphasis, strikes).
+        self.cache: dict[int, int] = {}
+
+        # List of emphasis-like delimiters for current tag
+        self.delimiters: list[Delimiter] = []
+
+        # Stack of delimiter lists for upper level tags
+        self._prev_delimiters: list[list[Delimiter]] = []
+
+        # backticklength => last seen position
+        self.backticks: dict[int, int] = {}
+        self.backticksScanned = False
+
+    def __repr__(self):
+        return (
+            f"{self.__class__.__name__}"
+            f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
+        )
+
+    def pushPending(self):
+        token = Token("text", "", 0)
+        token.content = self.pending
+        token.level = self.pendingLevel
+        self.tokens.append(token)
+        self.pending = ""
+        return token
+
+    def push(self, ttype, tag, nesting):
+        """Push new token to "stream".
+        If pending text exists - flush it as text token
+        """
+        if self.pending:
+            self.pushPending()
+
+        token = Token(ttype, tag, nesting)
+        token_meta = None
+
+        if nesting < 0:
+            # closing tag
+            self.level -= 1
+            self.delimiters = self._prev_delimiters.pop()
+
+        token.level = self.level
+
+        if nesting > 0:
+            # opening tag
+            self.level += 1
+            self._prev_delimiters.append(self.delimiters)
+            self.delimiters = []
+            token_meta = {"delimiters": self.delimiters}
+
+        self.pendingLevel = self.level
+        self.tokens.append(token)
+        self.tokens_meta.append(token_meta)
+        return token
+
+    def scanDelims(self, start, canSplitWord):
+        """
+        Scan a sequence of emphasis-like markers, and determine whether
+        it can start an emphasis sequence or end an emphasis sequence.
+
+         - start - position to scan from (it should point at a valid marker);
+         - canSplitWord - determine if these markers can be found inside a word
+
+        """
+        pos = start
+        left_flanking = True
+        right_flanking = True
+        maximum = self.posMax
+        marker = self.srcCharCode[start]
+
+        # treat beginning of the line as a whitespace
+        lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20
+
+        while pos < maximum and self.srcCharCode[pos] == marker:
+            pos += 1
+
+        count = pos - start
+
+        # treat end of the line as a whitespace
+        nextChar = self.srcCharCode[pos] if pos < maximum else 0x20
+
+        isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
+        isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
+
+        isLastWhiteSpace = isWhiteSpace(lastChar)
+        isNextWhiteSpace = isWhiteSpace(nextChar)
+
+        if isNextWhiteSpace:
+            left_flanking = False
+        elif isNextPunctChar:
+            if not (isLastWhiteSpace or isLastPunctChar):
+                left_flanking = False
+
+        if isLastWhiteSpace:
+            right_flanking = False
+        elif isLastPunctChar:
+            if not (isNextWhiteSpace or isNextPunctChar):
+                right_flanking = False
+
+        if not canSplitWord:
+            can_open = left_flanking and ((not right_flanking) or isLastPunctChar)
+            can_close = right_flanking and ((not left_flanking) or isNextPunctChar)
+        else:
+            can_open = left_flanking
+            can_close = right_flanking
+
+        return Scanned(can_open, can_close, count)
diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py
new file mode 100644
index 0000000..107ea26
--- /dev/null
+++ b/markdown_it/rules_inline/strikethrough.py
@@ -0,0 +1,133 @@
+# ~~strike through~~
+from __future__ import annotations
+
+from .state_inline import Delimiter, StateInline
+
+
+def tokenize(state: StateInline, silent: bool):
+    """Insert each marker as a separate text token, and add it to delimiter list"""
+    start = state.pos
+    marker = state.srcCharCode[start]
+
+    if silent:
+        return False
+
+    if marker != 0x7E:  # /* ~ */
+        return False
+
+    scanned = state.scanDelims(state.pos, True)
+    length = scanned.length
+    ch = chr(marker)
+
+    if length < 2:
+        return False
+
+    if length % 2:
+        token = state.push("text", "", 0)
+        token.content = ch
+        length -= 1
+
+    i = 0
+    while i < length:
+        token = state.push("text", "", 0)
+        token.content = ch + ch
+        state.delimiters.append(
+            Delimiter(
+                **{
+                    "marker": marker,
+                    "length": 0,  # disable "rule of 3" length checks meant for emphasis
+                    "jump": i // 2,  # for `~~` 1 marker = 2 characters
+                    "token": len(state.tokens) - 1,
+                    "end": -1,
+                    "open": scanned.can_open,
+                    "close": scanned.can_close,
+                }
+            )
+        )
+
+        i += 2
+
+    state.pos += scanned.length
+
+    return True
+
+
+def _postProcess(state: StateInline, delimiters: list[Delimiter]):
+
+    loneMarkers = []
+    maximum = len(delimiters)
+
+    i = 0
+    while i < maximum:
+        startDelim = delimiters[i]
+
+        if startDelim.marker != 0x7E:  # /* ~ */
+            i += 1
+            continue
+
+        if startDelim.end == -1:
+            i += 1
+            continue
+
+        endDelim = delimiters[startDelim.end]
+
+        token = state.tokens[startDelim.token]
+        token.type = "s_open"
+        token.tag = "s"
+        token.nesting = 1
+        token.markup = "~~"
+        token.content = ""
+
+        token = state.tokens[endDelim.token]
+        token.type = "s_close"
+        token.tag = "s"
+        token.nesting = -1
+        token.markup = "~~"
+        token.content = ""
+
+        if (
+            state.tokens[endDelim.token - 1].type == "text"
+            and state.tokens[endDelim.token - 1].content == "~"
+        ):
+
+            loneMarkers.append(endDelim.token - 1)
+
+        i += 1
+
+    # If a marker sequence has an odd number of characters, it's split
+    # like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the
+    # start of the sequence.
+    #
+    # So, we have to move all those markers after subsequent s_close tags.
+    #
+    while loneMarkers:
+        i = loneMarkers.pop()
+        j = i + 1
+
+        while (j < len(state.tokens)) and (state.tokens[j].type == "s_close"):
+            j += 1
+
+        j -= 1
+
+        if i != j:
+            token = state.tokens[j]
+            state.tokens[j] = state.tokens[i]
+            state.tokens[i] = token
+
+
+def postProcess(state: StateInline):
+    """Walk through delimiter list and replace text tokens with tags."""
+    tokens_meta = state.tokens_meta
+    maximum = len(state.tokens_meta)
+    _postProcess(state, state.delimiters)
+
+    curr = 0
+    while curr < maximum:
+        try:
+            curr_meta = tokens_meta[curr]
+        except IndexError:
+            pass
+        else:
+            if curr_meta and "delimiters" in curr_meta:
+                _postProcess(state, curr_meta["delimiters"])
+        curr += 1
diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py
new file mode 100644
index 0000000..ec6ee0f
--- /dev/null
+++ b/markdown_it/rules_inline/text.py
@@ -0,0 +1,57 @@
+# Skip text characters for text token, place those to pending buffer
+# and increment current pos
+
+from .state_inline import StateInline
+
+# Rule to skip pure text
+# '{}$%@~+=:' reserved for extensions
+
+# !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
+
+# !!!! Don't confuse with "Markdown ASCII Punctuation" chars
+# http://spec.commonmark.org/0.15/#ascii-punctuation-character
+
+
+def isTerminatorChar(ch):
+    return ch in {
+        0x0A,  # /* \n */:
+        0x21,  # /* ! */:
+        0x23,  # /* # */:
+        0x24,  # /* $ */:
+        0x25,  # /* % */:
+        0x26,  # /* & */:
+        0x2A,  # /* * */:
+        0x2B,  # /* + */:
+        0x2D,  # /* - */:
+        0x3A,  # /* : */:
+        0x3C,  # /* < */:
+        0x3D,  # /* = */:
+        0x3E,  # /* > */:
+        0x40,  # /* @ */:
+        0x5B,  # /* [ */:
+        0x5C,  # /* \ */:
+        0x5D,  # /* ] */:
+        0x5E,  # /* ^ */:
+        0x5F,  # /* _ */:
+        0x60,  # /* ` */:
+        0x7B,  # /* { */:
+        0x7D,  # /* } */:
+        0x7E,  # /* ~ */:
+    }
+
+
+def text(state: StateInline, silent: bool, **args):
+    pos = state.pos
+    posMax = state.posMax
+    while (pos < posMax) and not isTerminatorChar(state.srcCharCode[pos]):
+        pos += 1
+
+    if pos == state.pos:
+        return False
+
+    if not silent:
+        state.pending += state.src[state.pos : pos]
+
+    state.pos = pos
+
+    return True
diff --git a/markdown_it/rules_inline/text_collapse.py b/markdown_it/rules_inline/text_collapse.py
new file mode 100644
index 0000000..6d0c0ab
--- /dev/null
+++ b/markdown_it/rules_inline/text_collapse.py
@@ -0,0 +1,43 @@
+from .state_inline import StateInline
+
+
+def text_collapse(state: StateInline, *args):
+    """
+    Clean up tokens after emphasis and strikethrough postprocessing:
+    merge adjacent text nodes into one and re-calculate all token levels
+
+    This is necessary because initially emphasis delimiter markers (``*, _, ~``)
+    are treated as their own separate text tokens. Then emphasis rule either
+    leaves them as text (needed to merge with adjacent text) or turns them
+    into opening/closing tags (which messes up levels inside).
+    """
+    level = 0
+    maximum = len(state.tokens)
+
+    curr = last = 0
+    while curr < maximum:
+        # re-calculate levels after emphasis/strikethrough turns some text nodes
+        # into opening/closing tags
+        if state.tokens[curr].nesting < 0:
+            level -= 1  # closing tag
+        state.tokens[curr].level = level
+        if state.tokens[curr].nesting > 0:
+            level += 1  # opening tag
+
+        if (
+            state.tokens[curr].type == "text"
+            and curr + 1 < maximum
+            and state.tokens[curr + 1].type == "text"
+        ):
+            # collapse two adjacent text nodes
+            state.tokens[curr + 1].content = (
+                state.tokens[curr].content + state.tokens[curr + 1].content
+            )
+        else:
+            if curr != last:
+                state.tokens[last] = state.tokens[curr]
+            last += 1
+        curr += 1
+
+    if curr != last:
+        del state.tokens[last:]
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-29 04:24:24 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-29 04:24:24 +0000
commit	12e8343068b906f8b2afddc5569968a8a91fa5b0 (patch)
tree	75cc5e05a4392ea0292251898f992a15a16b172b /markdown_it/rules_inline
parent	Initial commit. (diff)
download	markdown-it-py-12e8343068b906f8b2afddc5569968a8a91fa5b0.tar.xz markdown-it-py-12e8343068b906f8b2afddc5569968a8a91fa5b0.zip