summaryrefslogtreecommitdiffstats
path: root/mdit_py_plugins/attrs
diff options
context:
space:
mode:
Diffstat (limited to 'mdit_py_plugins/attrs')
-rw-r--r--mdit_py_plugins/attrs/__init__.py1
-rw-r--r--mdit_py_plugins/attrs/index.py123
-rw-r--r--mdit_py_plugins/attrs/parse.py265
3 files changed, 389 insertions, 0 deletions
diff --git a/mdit_py_plugins/attrs/__init__.py b/mdit_py_plugins/attrs/__init__.py
new file mode 100644
index 0000000..9359cf8
--- /dev/null
+++ b/mdit_py_plugins/attrs/__init__.py
@@ -0,0 +1 @@
+from .index import attrs_plugin # noqa: F401
diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py
new file mode 100644
index 0000000..1adea3f
--- /dev/null
+++ b/mdit_py_plugins/attrs/index.py
@@ -0,0 +1,123 @@
+from typing import List, Optional
+
+from markdown_it import MarkdownIt
+from markdown_it.rules_inline import StateInline
+from markdown_it.token import Token
+
+from .parse import ParseError, parse
+
+
+def attrs_plugin(
+ md: MarkdownIt,
+ *,
+ after=("image", "code_inline", "link_close", "span_close"),
+ spans=False,
+ span_after="link",
+):
+ """Parse inline attributes that immediately follow certain inline elements::
+
+ ![alt](https://image.com){#id .a b=c}
+
+ This syntax is inspired by
+ `Djot spans
+ <https://htmlpreview.github.io/?https://github.com/jgm/djot/blob/master/doc/syntax.html#inline-attributes>`_.
+
+ Inside the curly braces, the following syntax is possible:
+
+ - `.foo` specifies foo as a class.
+ Multiple classes may be given in this way; they will be combined.
+ - `#foo` specifies foo as an identifier.
+ An element may have only one identifier;
+ if multiple identifiers are given, the last one is used.
+ - `key="value"` or `key=value` specifies a key-value attribute.
+ Quotes are not needed when the value consists entirely of
+ ASCII alphanumeric characters or `_` or `:` or `-`.
+ Backslash escapes may be used inside quoted values.
+ - `%` begins a comment, which ends with the next `%` or the end of the attribute (`}`).
+
+ Multiple attribute blocks are merged.
+
+ :param md: The MarkdownIt instance to modify.
+ :param after: The names of inline elements after which attributes may be specified.
+ This plugin does not support attributes after emphasis, strikethrough or text elements,
+ which all require post-parse processing.
+ :param spans: If True, also parse attributes after spans of text, encapsulated by `[]`.
+ Note Markdown link references take precedence over this syntax.
+ :param span_after: The name of an inline rule after which spans may be specified.
+ """
+
+ def _attr_rule(state: StateInline, silent: bool):
+ if state.pending or not state.tokens:
+ return False
+ token = state.tokens[-1]
+ if token.type not in after:
+ return False
+ try:
+ new_pos, attrs = parse(state.src[state.pos :])
+ except ParseError:
+ return False
+ token_index = _find_opening(state.tokens, len(state.tokens) - 1)
+ if token_index is None:
+ return False
+ state.pos += new_pos + 1
+ if not silent:
+ attr_token = state.tokens[token_index]
+ if "class" in attrs and "class" in token.attrs:
+ attrs["class"] = f"{attr_token.attrs['class']} {attrs['class']}"
+ attr_token.attrs.update(attrs)
+ return True
+
+ if spans:
+ md.inline.ruler.after(span_after, "span", _span_rule)
+ md.inline.ruler.push("attr", _attr_rule)
+
+
+def _find_opening(tokens: List[Token], index: int) -> Optional[int]:
+ """Find the opening token index, if the token is closing."""
+ if tokens[index].nesting != -1:
+ return index
+ level = 0
+ while index >= 0:
+ level += tokens[index].nesting
+ if level == 0:
+ return index
+ index -= 1
+ return None
+
+
+def _span_rule(state: StateInline, silent: bool):
+ if state.srcCharCode[state.pos] != 0x5B: # /* [ */
+ return False
+
+ maximum = state.posMax
+ labelStart = state.pos + 1
+ labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, False)
+
+ # parser failed to find ']', so it's not a valid span
+ if labelEnd < 0:
+ return False
+
+ pos = labelEnd + 1
+
+ # check not at end of inline
+ if pos >= maximum:
+ return False
+
+ try:
+ new_pos, attrs = parse(state.src[pos:])
+ except ParseError:
+ return False
+
+ pos += new_pos + 1
+
+ if not silent:
+ state.pos = labelStart
+ state.posMax = labelEnd
+ token = state.push("span_open", "span", 1)
+ token.attrs = attrs
+ state.md.inline.tokenize(state)
+ token = state.push("span_close", "span", -1)
+
+ state.pos = pos
+ state.posMax = maximum
+ return True
diff --git a/mdit_py_plugins/attrs/parse.py b/mdit_py_plugins/attrs/parse.py
new file mode 100644
index 0000000..4a30353
--- /dev/null
+++ b/mdit_py_plugins/attrs/parse.py
@@ -0,0 +1,265 @@
+"""Parser for attributes::
+
+ attributes { id = "foo", class = "bar baz",
+ key1 = "val1", key2 = "val2" }
+
+Adapted from:
+https://github.com/jgm/djot/blob/fae7364b86bfce69bc6d5b5eede1f5196d845fd6/djot/attributes.lua#L1
+
+syntax:
+
+attributes <- '{' whitespace* attribute (whitespace attribute)* whitespace* '}'
+attribute <- identifier | class | keyval
+identifier <- '#' name
+class <- '.' name
+name <- (nonspace, nonpunctuation other than ':', '_', '-')+
+keyval <- key '=' val
+key <- (ASCII_ALPHANUM | ':' | '_' | '-')+
+val <- bareval | quotedval
+bareval <- (ASCII_ALPHANUM | ':' | '_' | '-')+
+quotedval <- '"' ([^"] | '\"') '"'
+"""
+from __future__ import annotations
+
+from enum import Enum
+import re
+from typing import Callable
+
+
+class State(Enum):
+ START = 0
+ SCANNING = 1
+ SCANNING_ID = 2
+ SCANNING_CLASS = 3
+ SCANNING_KEY = 4
+ SCANNING_VALUE = 5
+ SCANNING_BARE_VALUE = 6
+ SCANNING_QUOTED_VALUE = 7
+ SCANNING_COMMENT = 8
+ SCANNING_ESCAPED = 9
+ DONE = 10
+
+
+REGEX_SPACE = re.compile(r"\s")
+REGEX_SPACE_PUNCTUATION = re.compile(r"[\s!\"#$%&'()*+,./;<=>?@[\]^`{|}~]")
+REGEX_KEY_CHARACTERS = re.compile(r"[a-zA-Z\d_:-]")
+
+
+class TokenState:
+ def __init__(self):
+ self._tokens = []
+ self.start: int = 0
+
+ def set_start(self, start: int) -> None:
+ self.start = start
+
+ def append(self, start: int, end: int, ttype: str):
+ self._tokens.append((start, end, ttype))
+
+ def compile(self, string: str) -> dict[str, str]:
+ """compile the tokens into a dictionary"""
+ attributes = {}
+ classes = []
+ idx = 0
+ while idx < len(self._tokens):
+ start, end, ttype = self._tokens[idx]
+ if ttype == "id":
+ attributes["id"] = string[start:end]
+ elif ttype == "class":
+ classes.append(string[start:end])
+ elif ttype == "key":
+ key = string[start:end]
+ if idx + 1 < len(self._tokens):
+ start, end, ttype = self._tokens[idx + 1]
+ if ttype == "value":
+ if key == "class":
+ classes.append(string[start:end])
+ else:
+ attributes[key] = string[start:end]
+ idx += 1
+ idx += 1
+ if classes:
+ attributes["class"] = " ".join(classes)
+ return attributes
+
+ def __str__(self) -> str:
+ return str(self._tokens)
+
+ def __repr__(self) -> str:
+ return repr(self._tokens)
+
+
+class ParseError(Exception):
+ def __init__(self, msg: str, pos: int) -> None:
+ self.pos = pos
+ super().__init__(msg + f" at position {pos}")
+
+
+def parse(string: str) -> tuple[int, dict[str, str]]:
+ """Parse attributes from start of string.
+
+ :returns: (length of parsed string, dict of attributes)
+ """
+ pos = 0
+ state: State = State.START
+ tokens = TokenState()
+ while pos < len(string):
+ state = HANDLERS[state](string[pos], pos, tokens)
+ if state == State.DONE:
+ return pos, tokens.compile(string)
+ pos = pos + 1
+
+ return pos, tokens.compile(string)
+
+
+def handle_start(char: str, pos: int, tokens: TokenState) -> State:
+
+ if char == "{":
+ return State.SCANNING
+ raise ParseError("Attributes must start with '{'", pos)
+
+
+def handle_scanning(char: str, pos: int, tokens: TokenState) -> State:
+
+ if char == " " or char == "\t" or char == "\n" or char == "\r":
+ return State.SCANNING
+ if char == "}":
+ return State.DONE
+ if char == "#":
+ tokens.set_start(pos)
+ return State.SCANNING_ID
+ if char == "%":
+ tokens.set_start(pos)
+ return State.SCANNING_COMMENT
+ if char == ".":
+ tokens.set_start(pos)
+ return State.SCANNING_CLASS
+ if REGEX_KEY_CHARACTERS.fullmatch(char):
+ tokens.set_start(pos)
+ return State.SCANNING_KEY
+
+ raise ParseError(f"Unexpected character whilst scanning: {char}", pos)
+
+
+def handle_scanning_comment(char: str, pos: int, tokens: TokenState) -> State:
+
+ if char == "%":
+ return State.SCANNING
+
+ return State.SCANNING_COMMENT
+
+
+def handle_scanning_id(char: str, pos: int, tokens: TokenState) -> State:
+
+ if not REGEX_SPACE_PUNCTUATION.fullmatch(char):
+ return State.SCANNING_ID
+
+ if char == "}":
+ if (pos - 1) > tokens.start:
+ tokens.append(tokens.start + 1, pos, "id")
+ return State.DONE
+
+ if REGEX_SPACE.fullmatch(char):
+ if (pos - 1) > tokens.start:
+ tokens.append(tokens.start + 1, pos, "id")
+ return State.SCANNING
+
+ raise ParseError(f"Unexpected character whilst scanning id: {char}", pos)
+
+
+def handle_scanning_class(char: str, pos: int, tokens: TokenState) -> State:
+
+ if not REGEX_SPACE_PUNCTUATION.fullmatch(char):
+ return State.SCANNING_CLASS
+
+ if char == "}":
+ if (pos - 1) > tokens.start:
+ tokens.append(tokens.start + 1, pos, "class")
+ return State.DONE
+
+ if REGEX_SPACE.fullmatch(char):
+ if (pos - 1) > tokens.start:
+ tokens.append(tokens.start + 1, pos, "class")
+ return State.SCANNING
+
+ raise ParseError(f"Unexpected character whilst scanning class: {char}", pos)
+
+
+def handle_scanning_key(char: str, pos: int, tokens: TokenState) -> State:
+
+ if char == "=":
+ tokens.append(tokens.start, pos, "key")
+ return State.SCANNING_VALUE
+
+ if REGEX_KEY_CHARACTERS.fullmatch(char):
+ return State.SCANNING_KEY
+
+ raise ParseError(f"Unexpected character whilst scanning key: {char}", pos)
+
+
+def handle_scanning_value(char: str, pos: int, tokens: TokenState) -> State:
+
+ if char == '"':
+ tokens.set_start(pos)
+ return State.SCANNING_QUOTED_VALUE
+
+ if REGEX_KEY_CHARACTERS.fullmatch(char):
+ tokens.set_start(pos)
+ return State.SCANNING_BARE_VALUE
+
+ raise ParseError(f"Unexpected character whilst scanning value: {char}", pos)
+
+
+def handle_scanning_bare_value(char: str, pos: int, tokens: TokenState) -> State:
+
+ if REGEX_KEY_CHARACTERS.fullmatch(char):
+ return State.SCANNING_BARE_VALUE
+
+ if char == "}":
+ tokens.append(tokens.start, pos, "value")
+ return State.DONE
+
+ if REGEX_SPACE.fullmatch(char):
+ tokens.append(tokens.start, pos, "value")
+ return State.SCANNING
+
+ raise ParseError(f"Unexpected character whilst scanning bare value: {char}", pos)
+
+
+def handle_scanning_escaped(char: str, pos: int, tokens: TokenState) -> State:
+ return State.SCANNING_QUOTED_VALUE
+
+
+def handle_scanning_quoted_value(char: str, pos: int, tokens: TokenState) -> State:
+
+ if char == '"':
+ tokens.append(tokens.start + 1, pos, "value")
+ return State.SCANNING
+
+ if char == "\\":
+ return State.SCANNING_ESCAPED
+
+ if char == "{" or char == "}":
+ raise ParseError(
+ f"Unexpected character whilst scanning quoted value: {char}", pos
+ )
+
+ if char == "\n":
+ tokens.append(tokens.start + 1, pos, "value")
+ return State.SCANNING_QUOTED_VALUE
+
+ return State.SCANNING_QUOTED_VALUE
+
+
+HANDLERS: dict[State, Callable[[str, int, TokenState], State]] = {
+ State.START: handle_start,
+ State.SCANNING: handle_scanning,
+ State.SCANNING_COMMENT: handle_scanning_comment,
+ State.SCANNING_ID: handle_scanning_id,
+ State.SCANNING_CLASS: handle_scanning_class,
+ State.SCANNING_KEY: handle_scanning_key,
+ State.SCANNING_VALUE: handle_scanning_value,
+ State.SCANNING_BARE_VALUE: handle_scanning_bare_value,
+ State.SCANNING_QUOTED_VALUE: handle_scanning_quoted_value,
+ State.SCANNING_ESCAPED: handle_scanning_escaped,
+}