summaryrefslogtreecommitdiffstats
path: root/src/prompt_toolkit/contrib/regular_languages/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/prompt_toolkit/contrib/regular_languages/lexer.py')
-rw-r--r--src/prompt_toolkit/contrib/regular_languages/lexer.py93
1 files changed, 93 insertions, 0 deletions
diff --git a/src/prompt_toolkit/contrib/regular_languages/lexer.py b/src/prompt_toolkit/contrib/regular_languages/lexer.py
new file mode 100644
index 0000000..b0a4deb
--- /dev/null
+++ b/src/prompt_toolkit/contrib/regular_languages/lexer.py
@@ -0,0 +1,93 @@
+"""
+`GrammarLexer` is compatible with other lexers and can be used to highlight
+the input using a regular grammar with annotations.
+"""
+from __future__ import annotations
+
+from typing import Callable
+
+from prompt_toolkit.document import Document
+from prompt_toolkit.formatted_text.base import StyleAndTextTuples
+from prompt_toolkit.formatted_text.utils import split_lines
+from prompt_toolkit.lexers import Lexer
+
+from .compiler import _CompiledGrammar
+
+__all__ = [
+ "GrammarLexer",
+]
+
+
+class GrammarLexer(Lexer):
+ """
+ Lexer which can be used for highlighting of fragments according to variables in the grammar.
+
+ (It does not actual lexing of the string, but it exposes an API, compatible
+ with the Pygments lexer class.)
+
+ :param compiled_grammar: Grammar as returned by the `compile()` function.
+ :param lexers: Dictionary mapping variable names of the regular grammar to
+ the lexers that should be used for this part. (This can
+ call other lexers recursively.) If you wish a part of the
+ grammar to just get one fragment, use a
+ `prompt_toolkit.lexers.SimpleLexer`.
+ """
+
+ def __init__(
+ self,
+ compiled_grammar: _CompiledGrammar,
+ default_style: str = "",
+ lexers: dict[str, Lexer] | None = None,
+ ) -> None:
+ self.compiled_grammar = compiled_grammar
+ self.default_style = default_style
+ self.lexers = lexers or {}
+
+ def _get_text_fragments(self, text: str) -> StyleAndTextTuples:
+ m = self.compiled_grammar.match_prefix(text)
+
+ if m:
+ characters: StyleAndTextTuples = [(self.default_style, c) for c in text]
+
+ for v in m.variables():
+ # If we have a `Lexer` instance for this part of the input.
+ # Tokenize recursively and apply tokens.
+ lexer = self.lexers.get(v.varname)
+
+ if lexer:
+ document = Document(text[v.start : v.stop])
+ lexer_tokens_for_line = lexer.lex_document(document)
+ text_fragments: StyleAndTextTuples = []
+ for i in range(len(document.lines)):
+ text_fragments.extend(lexer_tokens_for_line(i))
+ text_fragments.append(("", "\n"))
+ if text_fragments:
+ text_fragments.pop()
+
+ i = v.start
+ for t, s, *_ in text_fragments:
+ for c in s:
+ if characters[i][0] == self.default_style:
+ characters[i] = (t, characters[i][1])
+ i += 1
+
+ # Highlight trailing input.
+ trailing_input = m.trailing_input()
+ if trailing_input:
+ for i in range(trailing_input.start, trailing_input.stop):
+ characters[i] = ("class:trailing-input", characters[i][1])
+
+ return characters
+ else:
+ return [("", text)]
+
+ def lex_document(self, document: Document) -> Callable[[int], StyleAndTextTuples]:
+ lines = list(split_lines(self._get_text_fragments(document.text)))
+
+ def get_line(lineno: int) -> StyleAndTextTuples:
+ try:
+ return lines[lineno]
+ except IndexError:
+ return []
+
+ return get_line