Adding upstream version 0.3.3.upstream/0.3.3 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-29 04:29:52 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-29 04:29:52 +0000
commit: fcb2f10732db61d216e2105c8154486f66b3e3ff (patch)
tree: efda929db4b1543eecc583e3b7d9c0bad4cd86a6 /mdit_py_plugins/attrs
parent: Initial commit. (diff)
download: mdit-py-plugins-upstream.tar.xz
mdit-py-plugins-upstream.zip
3 files changed, 389 insertions, 0 deletions
diff --git a/mdit_py_plugins/attrs/__init__.py b/mdit_py_plugins/attrs/__init__.py
new file mode 100644
index 0000000..9359cf8
--- /dev/null
+++ b/mdit_py_plugins/attrs/__init__.py
@@ -0,0 +1 @@
+from .index import attrs_plugin  # noqa: F401
diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py
new file mode 100644
index 0000000..1adea3f
--- /dev/null
+++ b/mdit_py_plugins/attrs/index.py
@@ -0,0 +1,123 @@
+from typing import List, Optional
+
+from markdown_it import MarkdownIt
+from markdown_it.rules_inline import StateInline
+from markdown_it.token import Token
+
+from .parse import ParseError, parse
+
+
+def attrs_plugin(
+    md: MarkdownIt,
+    *,
+    after=("image", "code_inline", "link_close", "span_close"),
+    spans=False,
+    span_after="link",
+):
+    """Parse inline attributes that immediately follow certain inline elements::
+
+        ![alt](https://image.com){#id .a b=c}
+
+    This syntax is inspired by
+    `Djot spans
+    <https://htmlpreview.github.io/?https://github.com/jgm/djot/blob/master/doc/syntax.html#inline-attributes>`_.
+
+    Inside the curly braces, the following syntax is possible:
+
+    - `.foo` specifies foo as a class.
+      Multiple classes may be given in this way; they will be combined.
+    - `#foo` specifies foo as an identifier.
+      An element may have only one identifier;
+      if multiple identifiers are given, the last one is used.
+    - `key="value"` or `key=value` specifies a key-value attribute.
+       Quotes are not needed when the value consists entirely of
+       ASCII alphanumeric characters or `_` or `:` or `-`.
+       Backslash escapes may be used inside quoted values.
+    - `%` begins a comment, which ends with the next `%` or the end of the attribute (`}`).
+
+    Multiple attribute blocks are merged.
+
+    :param md: The MarkdownIt instance to modify.
+    :param after: The names of inline elements after which attributes may be specified.
+        This plugin does not support attributes after emphasis, strikethrough or text elements,
+        which all require post-parse processing.
+    :param spans: If True, also parse attributes after spans of text, encapsulated by `[]`.
+        Note Markdown link references take precedence over this syntax.
+    :param span_after: The name of an inline rule after which spans may be specified.
+    """
+
+    def _attr_rule(state: StateInline, silent: bool):
+        if state.pending or not state.tokens:
+            return False
+        token = state.tokens[-1]
+        if token.type not in after:
+            return False
+        try:
+            new_pos, attrs = parse(state.src[state.pos :])
+        except ParseError:
+            return False
+        token_index = _find_opening(state.tokens, len(state.tokens) - 1)
+        if token_index is None:
+            return False
+        state.pos += new_pos + 1
+        if not silent:
+            attr_token = state.tokens[token_index]
+            if "class" in attrs and "class" in token.attrs:
+                attrs["class"] = f"{attr_token.attrs['class']} {attrs['class']}"
+            attr_token.attrs.update(attrs)
+        return True
+
+    if spans:
+        md.inline.ruler.after(span_after, "span", _span_rule)
+    md.inline.ruler.push("attr", _attr_rule)
+
+
+def _find_opening(tokens: List[Token], index: int) -> Optional[int]:
+    """Find the opening token index, if the token is closing."""
+    if tokens[index].nesting != -1:
+        return index
+    level = 0
+    while index >= 0:
+        level += tokens[index].nesting
+        if level == 0:
+            return index
+        index -= 1
+    return None
+
+
+def _span_rule(state: StateInline, silent: bool):
+    if state.srcCharCode[state.pos] != 0x5B:  # /* [ */
+        return False
+
+    maximum = state.posMax
+    labelStart = state.pos + 1
+    labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, False)
+
+    # parser failed to find ']', so it's not a valid span
+    if labelEnd < 0:
+        return False
+
+    pos = labelEnd + 1
+
+    # check not at end of inline
+    if pos >= maximum:
+        return False
+
+    try:
+        new_pos, attrs = parse(state.src[pos:])
+    except ParseError:
+        return False
+
+    pos += new_pos + 1
+
+    if not silent:
+        state.pos = labelStart
+        state.posMax = labelEnd
+        token = state.push("span_open", "span", 1)
+        token.attrs = attrs
+        state.md.inline.tokenize(state)
+        token = state.push("span_close", "span", -1)
+
+    state.pos = pos
+    state.posMax = maximum
+    return True
diff --git a/mdit_py_plugins/attrs/parse.py b/mdit_py_plugins/attrs/parse.py
new file mode 100644
index 0000000..4a30353
--- /dev/null
+++ b/mdit_py_plugins/attrs/parse.py
@@ -0,0 +1,265 @@
+"""Parser for attributes::
+
+    attributes { id = "foo", class = "bar baz",
+                key1 = "val1", key2 = "val2" }
+
+Adapted from:
+https://github.com/jgm/djot/blob/fae7364b86bfce69bc6d5b5eede1f5196d845fd6/djot/attributes.lua#L1
+
+syntax:
+
+attributes <- '{' whitespace* attribute (whitespace attribute)* whitespace* '}'
+attribute <- identifier | class | keyval
+identifier <- '#' name
+class <- '.' name
+name <- (nonspace, nonpunctuation other than ':', '_', '-')+
+keyval <- key '=' val
+key <- (ASCII_ALPHANUM | ':' | '_' | '-')+
+val <- bareval | quotedval
+bareval <- (ASCII_ALPHANUM | ':' | '_' | '-')+
+quotedval <- '"' ([^"] | '\"') '"'
+"""
+from __future__ import annotations
+
+from enum import Enum
+import re
+from typing import Callable
+
+
+class State(Enum):
+    START = 0
+    SCANNING = 1
+    SCANNING_ID = 2
+    SCANNING_CLASS = 3
+    SCANNING_KEY = 4
+    SCANNING_VALUE = 5
+    SCANNING_BARE_VALUE = 6
+    SCANNING_QUOTED_VALUE = 7
+    SCANNING_COMMENT = 8
+    SCANNING_ESCAPED = 9
+    DONE = 10
+
+
+REGEX_SPACE = re.compile(r"\s")
+REGEX_SPACE_PUNCTUATION = re.compile(r"[\s!\"#$%&'()*+,./;<=>?@[\]^`{|}~]")
+REGEX_KEY_CHARACTERS = re.compile(r"[a-zA-Z\d_:-]")
+
+
+class TokenState:
+    def __init__(self):
+        self._tokens = []
+        self.start: int = 0
+
+    def set_start(self, start: int) -> None:
+        self.start = start
+
+    def append(self, start: int, end: int, ttype: str):
+        self._tokens.append((start, end, ttype))
+
+    def compile(self, string: str) -> dict[str, str]:
+        """compile the tokens into a dictionary"""
+        attributes = {}
+        classes = []
+        idx = 0
+        while idx < len(self._tokens):
+            start, end, ttype = self._tokens[idx]
+            if ttype == "id":
+                attributes["id"] = string[start:end]
+            elif ttype == "class":
+                classes.append(string[start:end])
+            elif ttype == "key":
+                key = string[start:end]
+                if idx + 1 < len(self._tokens):
+                    start, end, ttype = self._tokens[idx + 1]
+                    if ttype == "value":
+                        if key == "class":
+                            classes.append(string[start:end])
+                        else:
+                            attributes[key] = string[start:end]
+                        idx += 1
+            idx += 1
+        if classes:
+            attributes["class"] = " ".join(classes)
+        return attributes
+
+    def __str__(self) -> str:
+        return str(self._tokens)
+
+    def __repr__(self) -> str:
+        return repr(self._tokens)
+
+
+class ParseError(Exception):
+    def __init__(self, msg: str, pos: int) -> None:
+        self.pos = pos
+        super().__init__(msg + f" at position {pos}")
+
+
+def parse(string: str) -> tuple[int, dict[str, str]]:
+    """Parse attributes from start of string.
+
+    :returns: (length of parsed string, dict of attributes)
+    """
+    pos = 0
+    state: State = State.START
+    tokens = TokenState()
+    while pos < len(string):
+        state = HANDLERS[state](string[pos], pos, tokens)
+        if state == State.DONE:
+            return pos, tokens.compile(string)
+        pos = pos + 1
+
+    return pos, tokens.compile(string)
+
+
+def handle_start(char: str, pos: int, tokens: TokenState) -> State:
+
+    if char == "{":
+        return State.SCANNING
+    raise ParseError("Attributes must start with '{'", pos)
+
+
+def handle_scanning(char: str, pos: int, tokens: TokenState) -> State:
+
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return State.SCANNING
+    if char == "}":
+        return State.DONE
+    if char == "#":
+        tokens.set_start(pos)
+        return State.SCANNING_ID
+    if char == "%":
+        tokens.set_start(pos)
+        return State.SCANNING_COMMENT
+    if char == ".":
+        tokens.set_start(pos)
+        return State.SCANNING_CLASS
+    if REGEX_KEY_CHARACTERS.fullmatch(char):
+        tokens.set_start(pos)
+        return State.SCANNING_KEY
+
+    raise ParseError(f"Unexpected character whilst scanning: {char}", pos)
+
+
+def handle_scanning_comment(char: str, pos: int, tokens: TokenState) -> State:
+
+    if char == "%":
+        return State.SCANNING
+
+    return State.SCANNING_COMMENT
+
+
+def handle_scanning_id(char: str, pos: int, tokens: TokenState) -> State:
+
+    if not REGEX_SPACE_PUNCTUATION.fullmatch(char):
+        return State.SCANNING_ID
+
+    if char == "}":
+        if (pos - 1) > tokens.start:
+            tokens.append(tokens.start + 1, pos, "id")
+        return State.DONE
+
+    if REGEX_SPACE.fullmatch(char):
+        if (pos - 1) > tokens.start:
+            tokens.append(tokens.start + 1, pos, "id")
+        return State.SCANNING
+
+    raise ParseError(f"Unexpected character whilst scanning id: {char}", pos)
+
+
+def handle_scanning_class(char: str, pos: int, tokens: TokenState) -> State:
+
+    if not REGEX_SPACE_PUNCTUATION.fullmatch(char):
+        return State.SCANNING_CLASS
+
+    if char == "}":
+        if (pos - 1) > tokens.start:
+            tokens.append(tokens.start + 1, pos, "class")
+        return State.DONE
+
+    if REGEX_SPACE.fullmatch(char):
+        if (pos - 1) > tokens.start:
+            tokens.append(tokens.start + 1, pos, "class")
+        return State.SCANNING
+
+    raise ParseError(f"Unexpected character whilst scanning class: {char}", pos)
+
+
+def handle_scanning_key(char: str, pos: int, tokens: TokenState) -> State:
+
+    if char == "=":
+        tokens.append(tokens.start, pos, "key")
+        return State.SCANNING_VALUE
+
+    if REGEX_KEY_CHARACTERS.fullmatch(char):
+        return State.SCANNING_KEY
+
+    raise ParseError(f"Unexpected character whilst scanning key: {char}", pos)
+
+
+def handle_scanning_value(char: str, pos: int, tokens: TokenState) -> State:
+
+    if char == '"':
+        tokens.set_start(pos)
+        return State.SCANNING_QUOTED_VALUE
+
+    if REGEX_KEY_CHARACTERS.fullmatch(char):
+        tokens.set_start(pos)
+        return State.SCANNING_BARE_VALUE
+
+    raise ParseError(f"Unexpected character whilst scanning value: {char}", pos)
+
+
+def handle_scanning_bare_value(char: str, pos: int, tokens: TokenState) -> State:
+
+    if REGEX_KEY_CHARACTERS.fullmatch(char):
+        return State.SCANNING_BARE_VALUE
+
+    if char == "}":
+        tokens.append(tokens.start, pos, "value")
+        return State.DONE
+
+    if REGEX_SPACE.fullmatch(char):
+        tokens.append(tokens.start, pos, "value")
+        return State.SCANNING
+
+    raise ParseError(f"Unexpected character whilst scanning bare value: {char}", pos)
+
+
+def handle_scanning_escaped(char: str, pos: int, tokens: TokenState) -> State:
+    return State.SCANNING_QUOTED_VALUE
+
+
+def handle_scanning_quoted_value(char: str, pos: int, tokens: TokenState) -> State:
+
+    if char == '"':
+        tokens.append(tokens.start + 1, pos, "value")
+        return State.SCANNING
+
+    if char == "\\":
+        return State.SCANNING_ESCAPED
+
+    if char == "{" or char == "}":
+        raise ParseError(
+            f"Unexpected character whilst scanning quoted value: {char}", pos
+        )
+
+    if char == "\n":
+        tokens.append(tokens.start + 1, pos, "value")
+        return State.SCANNING_QUOTED_VALUE
+
+    return State.SCANNING_QUOTED_VALUE
+
+
+HANDLERS: dict[State, Callable[[str, int, TokenState], State]] = {
+    State.START: handle_start,
+    State.SCANNING: handle_scanning,
+    State.SCANNING_COMMENT: handle_scanning_comment,
+    State.SCANNING_ID: handle_scanning_id,
+    State.SCANNING_CLASS: handle_scanning_class,
+    State.SCANNING_KEY: handle_scanning_key,
+    State.SCANNING_VALUE: handle_scanning_value,
+    State.SCANNING_BARE_VALUE: handle_scanning_bare_value,
+    State.SCANNING_QUOTED_VALUE: handle_scanning_quoted_value,
+    State.SCANNING_ESCAPED: handle_scanning_escaped,
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-29 04:29:52 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-29 04:29:52 +0000
commit	fcb2f10732db61d216e2105c8154486f66b3e3ff (patch)
tree	efda929db4b1543eecc583e3b7d9c0bad4cd86a6 /mdit_py_plugins/attrs
parent	Initial commit. (diff)
download	mdit-py-plugins-upstream.tar.xz mdit-py-plugins-upstream.zip