summaryrefslogtreecommitdiffstats
path: root/markdown_it/tree.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:24:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:24:24 +0000
commit12e8343068b906f8b2afddc5569968a8a91fa5b0 (patch)
tree75cc5e05a4392ea0292251898f992a15a16b172b /markdown_it/tree.py
parentInitial commit. (diff)
downloadmarkdown-it-py-12e8343068b906f8b2afddc5569968a8a91fa5b0.tar.xz
markdown-it-py-12e8343068b906f8b2afddc5569968a8a91fa5b0.zip
Adding upstream version 2.1.0.upstream/2.1.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'markdown_it/tree.py')
-rw-r--r--markdown_it/tree.py330
1 files changed, 330 insertions, 0 deletions
diff --git a/markdown_it/tree.py b/markdown_it/tree.py
new file mode 100644
index 0000000..09476b2
--- /dev/null
+++ b/markdown_it/tree.py
@@ -0,0 +1,330 @@
+"""A tree representation of a linear markdown-it token stream.
+
+This module is not part of upstream JavaScript markdown-it.
+"""
+from __future__ import annotations
+
+from collections.abc import Generator, Sequence
+import textwrap
+from typing import Any, NamedTuple, TypeVar, overload
+
+from .token import Token
+from .utils import _removesuffix
+
+
+class _NesterTokens(NamedTuple):
+ opening: Token
+ closing: Token
+
+
+_NodeType = TypeVar("_NodeType", bound="SyntaxTreeNode")
+
+
+class SyntaxTreeNode:
+ """A Markdown syntax tree node.
+
+ A class that can be used to construct a tree representation of a linear
+ `markdown-it-py` token stream.
+
+ Each node in the tree represents either:
+ - root of the Markdown document
+ - a single unnested `Token`
+ - a `Token` "_open" and "_close" token pair, and the tokens nested in
+ between
+ """
+
+ def __init__(
+ self, tokens: Sequence[Token] = (), *, create_root: bool = True
+ ) -> None:
+ """Initialize a `SyntaxTreeNode` from a token stream.
+
+ If `create_root` is True, create a root node for the document.
+ """
+ # Only nodes representing an unnested token have self.token
+ self.token: Token | None = None
+
+ # Only containers have nester tokens
+ self.nester_tokens: _NesterTokens | None = None
+
+ # Root node does not have self.parent
+ self._parent: Any = None
+
+ # Empty list unless a non-empty container, or unnested token that has
+ # children (i.e. inline or img)
+ self._children: list = []
+
+ if create_root:
+ self._set_children_from_tokens(tokens)
+ return
+
+ if not tokens:
+ raise ValueError(
+ "Can only create root from empty token sequence."
+ " Set `create_root=True`."
+ )
+ elif len(tokens) == 1:
+ inline_token = tokens[0]
+ if inline_token.nesting:
+ raise ValueError(
+ "Unequal nesting level at the start and end of token stream."
+ )
+ self.token = inline_token
+ if inline_token.children:
+ self._set_children_from_tokens(inline_token.children)
+ else:
+ self.nester_tokens = _NesterTokens(tokens[0], tokens[-1])
+ self._set_children_from_tokens(tokens[1:-1])
+
+ def __repr__(self) -> str:
+ return f"{type(self).__name__}({self.type})"
+
+ @overload
+ def __getitem__(self: _NodeType, item: int) -> _NodeType:
+ ...
+
+ @overload
+ def __getitem__(self: _NodeType, item: slice) -> list[_NodeType]:
+ ...
+
+ def __getitem__(self: _NodeType, item: int | slice) -> _NodeType | list[_NodeType]:
+ return self.children[item]
+
+ def to_tokens(self: _NodeType) -> list[Token]:
+ """Recover the linear token stream."""
+
+ def recursive_collect_tokens(node: _NodeType, token_list: list[Token]) -> None:
+ if node.type == "root":
+ for child in node.children:
+ recursive_collect_tokens(child, token_list)
+ elif node.token:
+ token_list.append(node.token)
+ else:
+ assert node.nester_tokens
+ token_list.append(node.nester_tokens.opening)
+ for child in node.children:
+ recursive_collect_tokens(child, token_list)
+ token_list.append(node.nester_tokens.closing)
+
+ tokens: list[Token] = []
+ recursive_collect_tokens(self, tokens)
+ return tokens
+
+ @property
+ def children(self: _NodeType) -> list[_NodeType]:
+ return self._children
+
+ @children.setter
+ def children(self: _NodeType, value: list[_NodeType]) -> None:
+ self._children = value
+
+ @property
+ def parent(self: _NodeType) -> _NodeType | None:
+ return self._parent
+
+ @parent.setter
+ def parent(self: _NodeType, value: _NodeType | None) -> None:
+ self._parent = value
+
+ @property
+ def is_root(self) -> bool:
+ """Is the node a special root node?"""
+ return not (self.token or self.nester_tokens)
+
+ @property
+ def is_nested(self) -> bool:
+ """Is this node nested?.
+
+ Returns `True` if the node represents a `Token` pair and tokens in the
+ sequence between them, where `Token.nesting` of the first `Token` in
+ the pair is 1 and nesting of the other `Token` is -1.
+ """
+ return bool(self.nester_tokens)
+
+ @property
+ def siblings(self: _NodeType) -> Sequence[_NodeType]:
+ """Get siblings of the node.
+
+ Gets the whole group of siblings, including self.
+ """
+ if not self.parent:
+ return [self]
+ return self.parent.children
+
+ @property
+ def type(self) -> str:
+ """Get a string type of the represented syntax.
+
+ - "root" for root nodes
+ - `Token.type` if the node represents an unnested token
+ - `Token.type` of the opening token, with "_open" suffix stripped, if
+ the node represents a nester token pair
+ """
+ if self.is_root:
+ return "root"
+ if self.token:
+ return self.token.type
+ assert self.nester_tokens
+ return _removesuffix(self.nester_tokens.opening.type, "_open")
+
+ @property
+ def next_sibling(self: _NodeType) -> _NodeType | None:
+ """Get the next node in the sequence of siblings.
+
+ Returns `None` if this is the last sibling.
+ """
+ self_index = self.siblings.index(self)
+ if self_index + 1 < len(self.siblings):
+ return self.siblings[self_index + 1]
+ return None
+
+ @property
+ def previous_sibling(self: _NodeType) -> _NodeType | None:
+ """Get the previous node in the sequence of siblings.
+
+ Returns `None` if this is the first sibling.
+ """
+ self_index = self.siblings.index(self)
+ if self_index - 1 >= 0:
+ return self.siblings[self_index - 1]
+ return None
+
+ def _add_child(
+ self,
+ tokens: Sequence[Token],
+ ) -> None:
+ """Make a child node for `self`."""
+ child = type(self)(tokens, create_root=False)
+ child.parent = self
+ self.children.append(child)
+
+ def _set_children_from_tokens(self, tokens: Sequence[Token]) -> None:
+ """Convert the token stream to a tree structure and set the resulting
+ nodes as children of `self`."""
+ reversed_tokens = list(reversed(tokens))
+ while reversed_tokens:
+ token = reversed_tokens.pop()
+
+ if not token.nesting:
+ self._add_child([token])
+ continue
+ if token.nesting != 1:
+ raise ValueError("Invalid token nesting")
+
+ nested_tokens = [token]
+ nesting = 1
+ while reversed_tokens and nesting:
+ token = reversed_tokens.pop()
+ nested_tokens.append(token)
+ nesting += token.nesting
+ if nesting:
+ raise ValueError(f"unclosed tokens starting {nested_tokens[0]}")
+
+ self._add_child(nested_tokens)
+
+ def pretty(
+ self, *, indent: int = 2, show_text: bool = False, _current: int = 0
+ ) -> str:
+ """Create an XML style string of the tree."""
+ prefix = " " * _current
+ text = prefix + f"<{self.type}"
+ if not self.is_root and self.attrs:
+ text += " " + " ".join(f"{k}={v!r}" for k, v in self.attrs.items())
+ text += ">"
+ if show_text and not self.is_root and self.type == "text" and self.content:
+ text += "\n" + textwrap.indent(self.content, prefix + " " * indent)
+ for child in self.children:
+ text += "\n" + child.pretty(
+ indent=indent, show_text=show_text, _current=_current + indent
+ )
+ return text
+
+ def walk(
+ self: _NodeType, *, include_self: bool = True
+ ) -> Generator[_NodeType, None, None]:
+ """Recursively yield all descendant nodes in the tree starting at self.
+
+ The order mimics the order of the underlying linear token
+ stream (i.e. depth first).
+ """
+ if include_self:
+ yield self
+ for child in self.children:
+ yield from child.walk(include_self=True)
+
+ # NOTE:
+ # The values of the properties defined below directly map to properties
+ # of the underlying `Token`s. A root node does not translate to a `Token`
+ # object, so calling these property getters on a root node will raise an
+ # `AttributeError`.
+ #
+ # There is no mapping for `Token.nesting` because the `is_nested` property
+ # provides that data, and can be called on any node type, including root.
+
+ def _attribute_token(self) -> Token:
+ """Return the `Token` that is used as the data source for the
+ properties defined below."""
+ if self.token:
+ return self.token
+ if self.nester_tokens:
+ return self.nester_tokens.opening
+ raise AttributeError("Root node does not have the accessed attribute")
+
+ @property
+ def tag(self) -> str:
+ """html tag name, e.g. \"p\" """
+ return self._attribute_token().tag
+
+ @property
+ def attrs(self) -> dict[str, str | int | float]:
+ """Html attributes."""
+ return self._attribute_token().attrs
+
+ def attrGet(self, name: str) -> None | str | int | float:
+ """Get the value of attribute `name`, or null if it does not exist."""
+ return self._attribute_token().attrGet(name)
+
+ @property
+ def map(self) -> tuple[int, int] | None:
+ """Source map info. Format: `tuple[ line_begin, line_end ]`"""
+ map_ = self._attribute_token().map
+ if map_:
+ # Type ignore because `Token`s attribute types are not perfect
+ return tuple(map_) # type: ignore
+ return None
+
+ @property
+ def level(self) -> int:
+ """nesting level, the same as `state.level`"""
+ return self._attribute_token().level
+
+ @property
+ def content(self) -> str:
+ """In a case of self-closing tag (code, html, fence, etc.), it
+ has contents of this tag."""
+ return self._attribute_token().content
+
+ @property
+ def markup(self) -> str:
+ """'*' or '_' for emphasis, fence string for fence, etc."""
+ return self._attribute_token().markup
+
+ @property
+ def info(self) -> str:
+ """fence infostring"""
+ return self._attribute_token().info
+
+ @property
+ def meta(self) -> dict:
+ """A place for plugins to store an arbitrary data."""
+ return self._attribute_token().meta
+
+ @property
+ def block(self) -> bool:
+ """True for block-level tokens, false for inline tokens."""
+ return self._attribute_token().block
+
+ @property
+ def hidden(self) -> bool:
+ """If it's true, ignore this element when rendering.
+ Used for tight lists to hide paragraphs."""
+ return self._attribute_token().hidden