diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /vendor/ungrammar/src/lib.rs | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/ungrammar/src/lib.rs')
-rw-r--r-- | vendor/ungrammar/src/lib.rs | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/vendor/ungrammar/src/lib.rs b/vendor/ungrammar/src/lib.rs new file mode 100644 index 000000000..7aa0ce9c8 --- /dev/null +++ b/vendor/ungrammar/src/lib.rs @@ -0,0 +1,137 @@ +//! Ungrammar -- a DSL for specifying concrete syntax tree grammar. +//! +//! Producing a parser is an explicit non-goal -- it's ok for this grammar to be +//! ambiguous, non LL, non LR, etc. +//! +//! See this +//! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html) +//! for details. + +#![deny(missing_debug_implementations)] +#![deny(missing_docs)] +#![deny(rust_2018_idioms)] + +mod error; +mod lexer; +mod parser; + +use std::{ops, str::FromStr}; + +pub use error::{Error, Result}; + +/// Returns a Rust grammar. +pub fn rust_grammar() -> Grammar { + let src = include_str!("../rust.ungram"); + src.parse().unwrap() +} + +/// A node, like `A = 'b' | 'c'`. +/// +/// Indexing into a [`Grammar`] with a [`Node`] returns a reference to a +/// [`NodeData`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Node(usize); + +/// A token, denoted with single quotes, like `'+'` or `'struct'`. +/// +/// Indexing into a [`Grammar`] with a [`Token`] returns a reference to a +/// [`TokenData`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Token(usize); + +/// An Ungrammar grammar. +#[derive(Default, Debug)] +pub struct Grammar { + nodes: Vec<NodeData>, + tokens: Vec<TokenData>, +} + +impl FromStr for Grammar { + type Err = Error; + fn from_str(s: &str) -> Result<Self> { + let tokens = lexer::tokenize(s)?; + parser::parse(tokens) + } +} + +impl Grammar { + /// Returns an iterator over all nodes in the grammar. + pub fn iter(&self) -> impl Iterator<Item = Node> + '_ { + (0..self.nodes.len()).map(Node) + } + + /// Returns an iterator over all tokens in the grammar. + pub fn tokens(&self) -> impl Iterator<Item = Token> + '_ { + (0..self.tokens.len()).map(Token) + } +} + +impl ops::Index<Node> for Grammar { + type Output = NodeData; + fn index(&self, Node(index): Node) -> &NodeData { + &self.nodes[index] + } +} + +impl ops::Index<Token> for Grammar { + type Output = TokenData; + fn index(&self, Token(index): Token) -> &TokenData { + &self.tokens[index] + } +} + +/// Data about a node. +#[derive(Debug)] +pub struct NodeData { + /// The name of the node. + /// + /// In the rule `A = 'b' | 'c'`, this is `"A"`. + pub name: String, + /// The rule for this node. + /// + /// In the rule `A = 'b' | 'c'`, this represents `'b' | 'c'`. + pub rule: Rule, +} + +/// Data about a token. +#[derive(Debug)] +pub struct TokenData { + /// The name of the token. + pub name: String, +} + +/// A production rule. +#[derive(Debug, Eq, PartialEq)] +pub enum Rule { + /// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule). + Labeled { + /// The label. + label: String, + /// The rule. + rule: Box<Rule>, + }, + /// A node, like `A`. + Node(Node), + /// A token, like `'struct'`. + Token(Token), + /// A sequence of rules, like `'while' '(' Expr ')' Stmt`. + Seq(Vec<Rule>), + /// An alternative between many rules, like `'+' | '-' | '*' | '/'`. + Alt(Vec<Rule>), + /// An optional rule, like `A?`. + Opt(Box<Rule>), + /// A repeated rule, like `A*`. + Rep(Box<Rule>), +} + +#[test] +fn smoke() { + let grammar = include_str!("../ungrammar.ungram"); + let grammar = grammar.parse::<Grammar>().unwrap(); + drop(grammar) +} + +#[test] +fn test_rust_grammar() { + let _ = rust_grammar(); +} |