diff options
Diffstat (limited to 'vendor/ungrammar')
-rw-r--r-- | vendor/ungrammar/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | vendor/ungrammar/Cargo.toml | 21 | ||||
-rw-r--r-- | vendor/ungrammar/README.md | 21 | ||||
-rw-r--r-- | vendor/ungrammar/rust.ungram | 664 | ||||
-rw-r--r-- | vendor/ungrammar/src/error.rs | 50 | ||||
-rw-r--r-- | vendor/ungrammar/src/lexer.rs | 129 | ||||
-rw-r--r-- | vendor/ungrammar/src/lib.rs | 137 | ||||
-rw-r--r-- | vendor/ungrammar/src/parser.rs | 225 | ||||
-rw-r--r-- | vendor/ungrammar/ungrammar.ungram | 16 |
9 files changed, 1264 insertions, 0 deletions
diff --git a/vendor/ungrammar/.cargo-checksum.json b/vendor/ungrammar/.cargo-checksum.json new file mode 100644 index 000000000..33fb13faa --- /dev/null +++ b/vendor/ungrammar/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"5a03d1af06cbbb65472b00b1ef082893b4e5264af1532a05473dc2a3a902d4c8","README.md":"76d59396d14617d22223c9227cf1a8d65d326250b5d7dd211af4c4f2c9ddad70","rust.ungram":"3b52e6eca2bc2bf56482658b3af8d165b4fb70d0e735e2bb03e60172113c8987","src/error.rs":"b6d62f266dc336e3cc2ffdc9c0494c9b9381ca70136796f50a79afe0cb35bc6a","src/lexer.rs":"09f6cd46bd9ee98bf2b4204dc5dc76bd25b70fbe88e734519efc4184e8c4d074","src/lib.rs":"309ddf513434d91e22921f902fcf39404e13bd2d586e4f73a29b0657bba01e1f","src/parser.rs":"8b3f278a297e9f2b7a5925d1844ff2b91c2559db1dcdb3f3e26eaa5b09bdadfe","ungrammar.ungram":"7db0838f2b02c9e093d321c38bb5c864ee4c799c336f966e10af2fb80109010b"},"package":"a3e5df347f0bf3ec1d670aad6ca5c6a1859cd9ea61d2113125794654ccced68f"}
\ No newline at end of file diff --git a/vendor/ungrammar/Cargo.toml b/vendor/ungrammar/Cargo.toml new file mode 100644 index 000000000..1dca38caf --- /dev/null +++ b/vendor/ungrammar/Cargo.toml @@ -0,0 +1,21 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "ungrammar" +version = "1.16.1" +exclude = ["/bors.toml", "/.github"] +description = "A DSL for describing concrete syntax trees" +license = "MIT OR Apache-2.0" +repository = "https://github.com/matklad/ungrammar" + +[dependencies] diff --git a/vendor/ungrammar/README.md b/vendor/ungrammar/README.md new file mode 100644 index 000000000..a5e130fed --- /dev/null +++ b/vendor/ungrammar/README.md @@ -0,0 +1,21 @@ +# ungrammar + +A DSL for specifying concrete syntax trees. + +See the [blog post][post] for an introduction. + +See [./rust.ungram](./rust.ungram) for an example. + +## Editor support + +- Vim + - [vim-ungrammar][] + - [ungrammar.vim][] +- VSCode + - [ungrammar-tools][] + +[post]: + https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html +[vim-ungrammar]: https://github.com/Iron-E/vim-ungrammar +[ungrammar.vim]: https://github.com/drtychai/ungrammar.vim +[ungrammar-tools]: https://github.com/azdavis/ungrammar-tools diff --git a/vendor/ungrammar/rust.ungram b/vendor/ungrammar/rust.ungram new file mode 100644 index 000000000..cb58486ef --- /dev/null +++ b/vendor/ungrammar/rust.ungram @@ -0,0 +1,664 @@ +// Rust Un-Grammar. +// +// This grammar specifies the structure of Rust's concrete syntax tree. +// It does not specify parsing rules (ambiguities, precedence, etc are out of scope). +// Tokens are processed -- contextual keywords are recognised, compound operators glued. +// +// Legend: +// +// // -- comment +// Name = -- non-terminal definition +// 'ident' -- token (terminal) +// A B -- sequence +// A | B -- alternation +// A* -- zero or more repetition +// A? -- zero or one repetition +// (A) -- same as A +// label:A -- suggested name for field of AST node + +//*************************// +// Names, Paths and Macros // +//*************************// + +Name = + 'ident' | 'self' + +NameRef = + 'ident' | 'int_number' | 'self' | 'super' | 'crate' | 'Self' + +Lifetime = + 'lifetime_ident' + +Path = + (qualifier:Path '::')? segment:PathSegment + +PathSegment = + '::'? NameRef +| NameRef GenericArgList? +| NameRef ParamList RetType? +| '<' PathType ('as' PathType)? '>' + +GenericArgList = + '::'? '<' (GenericArg (',' GenericArg)* ','?)? '>' + +GenericArg = + TypeArg +| AssocTypeArg +| LifetimeArg +| ConstArg + +TypeArg = + Type + +AssocTypeArg = + NameRef GenericParamList? (':' TypeBoundList | '=' Type) + +LifetimeArg = + Lifetime + +ConstArg = + Expr + +MacroCall = + Attr* Path '!' TokenTree ';'? + +TokenTree = + '(' ')' +| '{' '}' +| '[' ']' + +MacroItems = + Item* + +MacroStmts = + statements:Stmt* + Expr? + +//*************************// +// Items // +//*************************// + +SourceFile = + 'shebang'? + Attr* + Item* + +Item = + Const +| Enum +| ExternBlock +| ExternCrate +| Fn +| Impl +| MacroCall +| MacroRules +| MacroDef +| Module +| Static +| Struct +| Trait +| TypeAlias +| Union +| Use + +MacroRules = + Attr* Visibility? + 'macro_rules' '!' Name + TokenTree + +MacroDef = + Attr* Visibility? + 'macro' Name args:TokenTree? + body:TokenTree + +Module = + Attr* Visibility? + 'mod' Name + (ItemList | ';') + +ItemList = + '{' Attr* Item* '}' + +ExternCrate = + Attr* Visibility? + 'extern' 'crate' NameRef Rename? ';' + +Rename = + 'as' (Name | '_') + +Use = + Attr* Visibility? + 'use' UseTree ';' + +UseTree = + (Path? '::')? ('*' | UseTreeList) +| Path Rename? + +UseTreeList = + '{' (UseTree (',' UseTree)* ','?)? '}' + +Fn = + Attr* Visibility? + 'default'? 'const'? 'async'? 'unsafe'? Abi? + 'fn' Name GenericParamList? ParamList RetType? WhereClause? + (body:BlockExpr | ';') + +Abi = + 'extern' 'string'? + +ParamList = + '('( + SelfParam + | (SelfParam ',')? (Param (',' Param)* ','?)? + )')' +| '|' (Param (',' Param)* ','?)? '|' + +SelfParam = + Attr* ( + ('&' Lifetime?)? 'mut'? Name + | 'mut'? Name ':' Type + ) + +Param = + Attr* ( + Pat (':' Type)? + | Type + | '...' + ) + +RetType = + '->' Type + +TypeAlias = + Attr* Visibility? + 'default'? + 'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause? + ('=' Type)? ';' + +Struct = + Attr* Visibility? + 'struct' Name GenericParamList? ( + WhereClause? (RecordFieldList | ';') + | TupleFieldList WhereClause? ';' + ) + +RecordFieldList = + '{' fields:(RecordField (',' RecordField)* ','?)? '}' + +RecordField = + Attr* Visibility? + Name ':' Type + +TupleFieldList = + '(' fields:(TupleField (',' TupleField)* ','?)? ')' + +TupleField = + Attr* Visibility? + Type + +FieldList = + RecordFieldList +| TupleFieldList + +Enum = + Attr* Visibility? + 'enum' Name GenericParamList? WhereClause? + VariantList + +VariantList = + '{' (Variant (',' Variant)* ','?)? '}' + +Variant = + Attr* Visibility? + Name FieldList? ('=' Expr)? + +Union = + Attr* Visibility? + 'union' Name GenericParamList? WhereClause? + RecordFieldList + +// A Data Type. +// +// Not used directly in the grammar, but handy to have anyway. +Adt = + Enum +| Struct +| Union + +Const = + Attr* Visibility? + 'default'? + 'const' (Name | '_') ':' Type + ('=' body:Expr)? ';' + +Static = + Attr* Visibility? + 'static' 'mut'? Name ':' Type + ('=' body:Expr)? ';' + +Trait = + Attr* Visibility? + 'unsafe'? 'auto'? + 'trait' Name GenericParamList? (':' TypeBoundList?)? WhereClause? + AssocItemList + +AssocItemList = + '{' Attr* AssocItem* '}' + +AssocItem = + Const +| Fn +| MacroCall +| TypeAlias + +Impl = + Attr* Visibility? + 'default'? 'unsafe'? + 'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause? + AssocItemList + +ExternBlock = + Attr* 'unsafe'? Abi ExternItemList + +ExternItemList = + '{' Attr* ExternItem* '}' + +ExternItem = + Fn +| MacroCall +| Static +| TypeAlias + +GenericParamList = + '<' (GenericParam (',' GenericParam)* ','?)? '>' + +GenericParam = + ConstParam +| LifetimeParam +| TypeParam + +TypeParam = + Attr* Name (':' TypeBoundList?)? + ('=' default_type:Type)? + +ConstParam = + Attr* 'const' Name ':' Type + ('=' default_val:Expr)? + +LifetimeParam = + Attr* Lifetime (':' TypeBoundList?)? + +WhereClause = + 'where' predicates:(WherePred (',' WherePred)* ','?) + +WherePred = + ('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList? + +Visibility = + 'pub' ('(' 'in'? Path ')')? + +Attr = + '#' '!'? '[' Meta ']' + +Meta = + Path ('=' Expr | TokenTree)? + +//****************************// +// Statements and Expressions // +//****************************// + +Stmt = + ';' +| ExprStmt +| Item +| LetStmt + +LetStmt = + Attr* 'let' Pat (':' Type)? + '=' initializer:Expr + LetElse? + ';' + +LetElse = + 'else' BlockExpr + +ExprStmt = + Expr ';'? + +Expr = + ArrayExpr +| AwaitExpr +| BinExpr +| BlockExpr +| BoxExpr +| BreakExpr +| CallExpr +| CastExpr +| ClosureExpr +| ContinueExpr +| FieldExpr +| ForExpr +| IfExpr +| IndexExpr +| Literal +| LoopExpr +| MacroCall +| MacroStmts +| MatchExpr +| MethodCallExpr +| ParenExpr +| PathExpr +| PrefixExpr +| RangeExpr +| RecordExpr +| RefExpr +| ReturnExpr +| TryExpr +| TupleExpr +| WhileExpr +| YieldExpr +| LetExpr +| UnderscoreExpr + +Literal = + Attr* value:( + 'int_number' | 'float_number' + | 'string' | 'raw_string' + | 'byte_string' | 'raw_byte_string' + | 'true' | 'false' + | 'char' | 'byte' + ) + +PathExpr = + Attr* Path + +StmtList = + '{' + Attr* + statements:Stmt* + tail_expr:Expr? + '}' + +RefExpr = + Attr* '&' ('raw' | 'mut' | 'const') Expr + +TryExpr = + Attr* Expr '?' + +BlockExpr = + Attr* Label? ('try' | 'unsafe' | 'async' | 'const') StmtList + +PrefixExpr = + Attr* op:('-' | '!' | '*') Expr + +BinExpr = + Attr* + lhs:Expr + op:( + '||' | '&&' + | '==' | '!=' | '<=' | '>=' | '<' | '>' + | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&' + | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^=' + ) + rhs:Expr + +CastExpr = + Attr* Expr 'as' Type + +ParenExpr = + Attr* '(' Attr* Expr ')' + +ArrayExpr = + Attr* '[' Attr* ( + (Expr (',' Expr)* ','?)? + | Expr ';' Expr + ) ']' + +IndexExpr = + Attr* base:Expr '[' index:Expr ']' + +TupleExpr = + Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')' + +RecordExpr = + Path RecordExprFieldList + +RecordExprFieldList = + '{' + Attr* + fields:(RecordExprField (',' RecordExprField)* ','?)? + ('..' spread:Expr?)? + '}' + +RecordExprField = + Attr* (NameRef ':')? Expr + +CallExpr = + Attr* Expr ArgList + +ArgList = + '(' args:(Expr (',' Expr)* ','?)? ')' + +MethodCallExpr = + Attr* receiver:Expr '.' NameRef GenericArgList? ArgList + +FieldExpr = + Attr* Expr '.' NameRef + +ClosureExpr = + Attr* 'static'? 'async'? 'move'? ParamList RetType? + body:Expr + +IfExpr = + Attr* 'if' condition:Expr then_branch:BlockExpr + ('else' else_branch:(IfExpr | BlockExpr))? + +LoopExpr = + Attr* Label? 'loop' + loop_body:BlockExpr + +ForExpr = + Attr* Label? 'for' Pat 'in' iterable:Expr + loop_body:BlockExpr + +WhileExpr = + Attr* Label? 'while' condition:Expr + loop_body:BlockExpr + +Label = + Lifetime ':' + +BreakExpr = + Attr* 'break' Lifetime? Expr? + +ContinueExpr = + Attr* 'continue' Lifetime? + +RangeExpr = + Attr* start:Expr? op:('..' | '..=') end:Expr? + +MatchExpr = + Attr* 'match' Expr MatchArmList + +MatchArmList = + '{' + Attr* + arms:MatchArm* + '}' + +MatchArm = + Attr* Pat guard:MatchGuard? '=>' Expr ','? + +MatchGuard = + 'if' condition:Expr + +ReturnExpr = + Attr* 'return' Expr? + +YieldExpr = + Attr* 'yield' Expr? + +LetExpr = + Attr* 'let' Pat '=' Expr + +UnderscoreExpr = + Attr* '_' + +AwaitExpr = + Attr* Expr '.' 'await' + +BoxExpr = + Attr* 'box' Expr + +//*************************// +// Types // +//*************************// + +Type = + ArrayType +| DynTraitType +| FnPtrType +| ForType +| ImplTraitType +| InferType +| MacroType +| NeverType +| ParenType +| PathType +| PtrType +| RefType +| SliceType +| TupleType + +ParenType = + '(' Type ')' + +NeverType = + '!' + +MacroType = + MacroCall + +PathType = + Path + +TupleType = + '(' fields:(Type (',' Type)* ','?)? ')' + +PtrType = + '*' ('const' | 'mut') Type + +RefType = + '&' Lifetime? 'mut'? Type + +ArrayType = + '[' Type ';' Expr ']' + +SliceType = + '[' Type ']' + +InferType = + '_' + +FnPtrType = + 'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType? + +ForType = + 'for' GenericParamList Type + +ImplTraitType = + 'impl' TypeBoundList + +DynTraitType = + 'dyn' TypeBoundList + +TypeBoundList = + bounds:(TypeBound ('+' TypeBound)* '+'?) + +TypeBound = + Lifetime +| ('?' | '~' 'const')? Type + +//************************// +// Patterns // +//************************// + +Pat = + IdentPat +| BoxPat +| RestPat +| LiteralPat +| MacroPat +| OrPat +| ParenPat +| PathPat +| WildcardPat +| RangePat +| RecordPat +| RefPat +| SlicePat +| TuplePat +| TupleStructPat +| ConstBlockPat + +LiteralPat = + Literal + +IdentPat = + Attr* 'ref'? 'mut'? Name ('@' Pat)? + +WildcardPat = + '_' + +RangePat = + // 1.. + start:Pat op:('..' | '..=') + // 1..2 + | start:Pat op:('..' | '..=') end:Pat + // ..2 + | op:('..' | '..=') end:Pat + +RefPat = + '&' 'mut'? Pat + +RecordPat = + Path RecordPatFieldList + +RecordPatFieldList = + '{' + fields:(RecordPatField (',' RecordPatField)* ','?)? + RestPat? + '}' + +RecordPatField = + Attr* (NameRef ':')? Pat + +TupleStructPat = + Path '(' fields:(Pat (',' Pat)* ','?)? ')' + +TuplePat = + '(' fields:(Pat (',' Pat)* ','?)? ')' + +ParenPat = + '(' Pat ')' + +SlicePat = + '[' (Pat (',' Pat)* ','?)? ']' + +PathPat = + Path + +OrPat = + (Pat ('|' Pat)* '|'?) + +BoxPat = + 'box' Pat + +RestPat = + Attr* '..' + +MacroPat = + MacroCall + +ConstBlockPat = + 'const' BlockExpr diff --git a/vendor/ungrammar/src/error.rs b/vendor/ungrammar/src/error.rs new file mode 100644 index 000000000..355e0b7eb --- /dev/null +++ b/vendor/ungrammar/src/error.rs @@ -0,0 +1,50 @@ +//! Boilerplate error definitions. +use std::fmt; + +use crate::lexer::Location; + +/// A type alias for std's Result with the Error as our error type. +pub type Result<T, E = Error> = std::result::Result<T, E>; + +/// An error encountered when parsing a Grammar. +#[derive(Debug)] +pub struct Error { + pub(crate) message: String, + pub(crate) location: Option<Location>, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(loc) = self.location { + // Report 1-based indices, to match text editors + write!(f, "{}:{}: ", loc.line + 1, loc.column + 1)? + } + write!(f, "{}", self.message) + } +} + +impl std::error::Error for Error {} + +impl Error { + pub(crate) fn with_location(self, location: Location) -> Error { + Error { + location: Some(location), + ..self + } + } +} + +macro_rules! _format_err { + ($($tt:tt)*) => { + $crate::error::Error { + message: format!($($tt)*), + location: None, + } + }; +} +pub(crate) use _format_err as format_err; + +macro_rules! _bail { + ($($tt:tt)*) => { return Err($crate::error::format_err!($($tt)*)) }; +} +pub(crate) use _bail as bail; diff --git a/vendor/ungrammar/src/lexer.rs b/vendor/ungrammar/src/lexer.rs new file mode 100644 index 000000000..f4c979b5b --- /dev/null +++ b/vendor/ungrammar/src/lexer.rs @@ -0,0 +1,129 @@ +//! Simple hand-written ungrammar lexer +use crate::error::{bail, Result}; + +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum TokenKind { + Node(String), + Token(String), + Eq, + Star, + Pipe, + QMark, + Colon, + LParen, + RParen, +} + +#[derive(Debug)] +pub(crate) struct Token { + pub(crate) kind: TokenKind, + pub(crate) loc: Location, +} + +#[derive(Copy, Clone, Default, Debug)] +pub(crate) struct Location { + pub(crate) line: usize, + pub(crate) column: usize, +} + +impl Location { + fn advance(&mut self, text: &str) { + match text.rfind('\n') { + Some(idx) => { + self.line += text.chars().filter(|&it| it == '\n').count(); + self.column = text[idx + 1..].chars().count(); + } + None => self.column += text.chars().count(), + } + } +} + +pub(crate) fn tokenize(mut input: &str) -> Result<Vec<Token>> { + let mut res = Vec::new(); + let mut loc = Location::default(); + while !input.is_empty() { + let old_input = input; + skip_ws(&mut input); + skip_comment(&mut input); + if old_input.len() == input.len() { + match advance(&mut input) { + Ok(kind) => { + res.push(Token { kind, loc }); + } + Err(err) => return Err(err.with_location(loc)), + } + } + let consumed = old_input.len() - input.len(); + loc.advance(&old_input[..consumed]); + } + + Ok(res) +} + +fn skip_ws(input: &mut &str) { + *input = input.trim_start_matches(is_whitespace) +} +fn skip_comment(input: &mut &str) { + if input.starts_with("//") { + let idx = input.find('\n').map_or(input.len(), |it| it + 1); + *input = &input[idx..] + } +} + +fn advance(input: &mut &str) -> Result<TokenKind> { + let mut chars = input.chars(); + let c = chars.next().unwrap(); + let res = match c { + '=' => TokenKind::Eq, + '*' => TokenKind::Star, + '?' => TokenKind::QMark, + '(' => TokenKind::LParen, + ')' => TokenKind::RParen, + '|' => TokenKind::Pipe, + ':' => TokenKind::Colon, + '\'' => { + let mut buf = String::new(); + loop { + match chars.next() { + None => bail!("unclosed token literal"), + Some('\\') => match chars.next() { + Some(c) if is_escapable(c) => buf.push(c), + _ => bail!("invalid escape in token literal"), + }, + Some('\'') => break, + Some(c) => buf.push(c), + } + } + TokenKind::Token(buf) + } + c if is_ident_char(c) => { + let mut buf = String::new(); + buf.push(c); + loop { + match chars.clone().next() { + Some(c) if is_ident_char(c) => { + chars.next(); + buf.push(c); + } + _ => break, + } + } + TokenKind::Node(buf) + } + '\r' => bail!("unexpected `\\r`, only Unix-style line endings allowed"), + c => bail!("unexpected character: `{}`", c), + }; + + *input = chars.as_str(); + Ok(res) +} + +fn is_escapable(c: char) -> bool { + matches!(c, '\\' | '\'') +} +fn is_whitespace(c: char) -> bool { + matches!(c, ' ' | '\t' | '\n') +} +fn is_ident_char(c: char) -> bool { + matches!(c, 'a'..='z' | 'A'..='Z' | '_') +} diff --git a/vendor/ungrammar/src/lib.rs b/vendor/ungrammar/src/lib.rs new file mode 100644 index 000000000..7aa0ce9c8 --- /dev/null +++ b/vendor/ungrammar/src/lib.rs @@ -0,0 +1,137 @@ +//! Ungrammar -- a DSL for specifying concrete syntax tree grammar. +//! +//! Producing a parser is an explicit non-goal -- it's ok for this grammar to be +//! ambiguous, non LL, non LR, etc. +//! +//! See this +//! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html) +//! for details. + +#![deny(missing_debug_implementations)] +#![deny(missing_docs)] +#![deny(rust_2018_idioms)] + +mod error; +mod lexer; +mod parser; + +use std::{ops, str::FromStr}; + +pub use error::{Error, Result}; + +/// Returns a Rust grammar. +pub fn rust_grammar() -> Grammar { + let src = include_str!("../rust.ungram"); + src.parse().unwrap() +} + +/// A node, like `A = 'b' | 'c'`. +/// +/// Indexing into a [`Grammar`] with a [`Node`] returns a reference to a +/// [`NodeData`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Node(usize); + +/// A token, denoted with single quotes, like `'+'` or `'struct'`. +/// +/// Indexing into a [`Grammar`] with a [`Token`] returns a reference to a +/// [`TokenData`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Token(usize); + +/// An Ungrammar grammar. +#[derive(Default, Debug)] +pub struct Grammar { + nodes: Vec<NodeData>, + tokens: Vec<TokenData>, +} + +impl FromStr for Grammar { + type Err = Error; + fn from_str(s: &str) -> Result<Self> { + let tokens = lexer::tokenize(s)?; + parser::parse(tokens) + } +} + +impl Grammar { + /// Returns an iterator over all nodes in the grammar. + pub fn iter(&self) -> impl Iterator<Item = Node> + '_ { + (0..self.nodes.len()).map(Node) + } + + /// Returns an iterator over all tokens in the grammar. + pub fn tokens(&self) -> impl Iterator<Item = Token> + '_ { + (0..self.tokens.len()).map(Token) + } +} + +impl ops::Index<Node> for Grammar { + type Output = NodeData; + fn index(&self, Node(index): Node) -> &NodeData { + &self.nodes[index] + } +} + +impl ops::Index<Token> for Grammar { + type Output = TokenData; + fn index(&self, Token(index): Token) -> &TokenData { + &self.tokens[index] + } +} + +/// Data about a node. +#[derive(Debug)] +pub struct NodeData { + /// The name of the node. + /// + /// In the rule `A = 'b' | 'c'`, this is `"A"`. + pub name: String, + /// The rule for this node. + /// + /// In the rule `A = 'b' | 'c'`, this represents `'b' | 'c'`. + pub rule: Rule, +} + +/// Data about a token. +#[derive(Debug)] +pub struct TokenData { + /// The name of the token. + pub name: String, +} + +/// A production rule. +#[derive(Debug, Eq, PartialEq)] +pub enum Rule { + /// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule). + Labeled { + /// The label. + label: String, + /// The rule. + rule: Box<Rule>, + }, + /// A node, like `A`. + Node(Node), + /// A token, like `'struct'`. + Token(Token), + /// A sequence of rules, like `'while' '(' Expr ')' Stmt`. + Seq(Vec<Rule>), + /// An alternative between many rules, like `'+' | '-' | '*' | '/'`. + Alt(Vec<Rule>), + /// An optional rule, like `A?`. + Opt(Box<Rule>), + /// A repeated rule, like `A*`. + Rep(Box<Rule>), +} + +#[test] +fn smoke() { + let grammar = include_str!("../ungrammar.ungram"); + let grammar = grammar.parse::<Grammar>().unwrap(); + drop(grammar) +} + +#[test] +fn test_rust_grammar() { + let _ = rust_grammar(); +} diff --git a/vendor/ungrammar/src/parser.rs b/vendor/ungrammar/src/parser.rs new file mode 100644 index 000000000..a4ce9c120 --- /dev/null +++ b/vendor/ungrammar/src/parser.rs @@ -0,0 +1,225 @@ +//! Simple hand-written ungrammar parser. +use std::collections::HashMap; + +use crate::{ + error::{bail, format_err, Result}, + lexer::{self, TokenKind}, + Grammar, Node, NodeData, Rule, Token, TokenData, +}; + +macro_rules! bail { + ($loc:expr, $($tt:tt)*) => {{ + let err = $crate::error::format_err!($($tt)*) + .with_location($loc); + return Err(err); + }}; +} + +pub(crate) fn parse(tokens: Vec<lexer::Token>) -> Result<Grammar> { + let mut p = Parser::new(tokens); + while !p.is_eof() { + node(&mut p)?; + } + p.finish() +} + +#[derive(Default)] +struct Parser { + grammar: Grammar, + tokens: Vec<lexer::Token>, + node_table: HashMap<String, Node>, + token_table: HashMap<String, Token>, +} + +const DUMMY_RULE: Rule = Rule::Node(Node(!0)); + +impl Parser { + fn new(mut tokens: Vec<lexer::Token>) -> Parser { + tokens.reverse(); + Parser { + tokens, + ..Parser::default() + } + } + + fn peek(&self) -> Option<&lexer::Token> { + self.peek_n(0) + } + fn peek_n(&self, n: usize) -> Option<&lexer::Token> { + self.tokens.iter().nth_back(n) + } + fn bump(&mut self) -> Result<lexer::Token> { + self.tokens + .pop() + .ok_or_else(|| format_err!("unexpected EOF")) + } + fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> { + let token = self.bump()?; + if token.kind != kind { + bail!(token.loc, "unexpected token, expected `{}`", what); + } + Ok(()) + } + fn is_eof(&self) -> bool { + self.tokens.is_empty() + } + fn finish(self) -> Result<Grammar> { + for node_data in &self.grammar.nodes { + if matches!(node_data.rule, DUMMY_RULE) { + crate::error::bail!("Undefined node: {}", node_data.name) + } + } + Ok(self.grammar) + } + fn intern_node(&mut self, name: String) -> Node { + let len = self.node_table.len(); + let grammar = &mut self.grammar; + *self.node_table.entry(name.clone()).or_insert_with(|| { + grammar.nodes.push(NodeData { + name, + rule: DUMMY_RULE, + }); + Node(len) + }) + } + fn intern_token(&mut self, name: String) -> Token { + let len = self.token_table.len(); + let grammar = &mut self.grammar; + *self.token_table.entry(name.clone()).or_insert_with(|| { + grammar.tokens.push(TokenData { name }); + Token(len) + }) + } +} + +fn node(p: &mut Parser) -> Result<()> { + let token = p.bump()?; + let node = match token.kind { + TokenKind::Node(it) => p.intern_node(it), + _ => bail!(token.loc, "expected ident"), + }; + p.expect(TokenKind::Eq, "=")?; + if !matches!(p.grammar[node].rule, DUMMY_RULE) { + bail!(token.loc, "duplicate rule: `{}`", p.grammar[node].name) + } + + let rule = rule(p)?; + p.grammar.nodes[node.0].rule = rule; + Ok(()) +} + +fn rule(p: &mut Parser) -> Result<Rule> { + if let Some(lexer::Token { kind: TokenKind::Pipe, loc }) = p.peek() { + bail!( + *loc, + "The first element in a sequence of productions or alternatives \ + must not have a leading pipe (`|`)" + ); + } + + let lhs = seq_rule(p)?; + let mut alt = vec![lhs]; + while let Some(token) = p.peek() { + if token.kind != TokenKind::Pipe { + break; + } + p.bump()?; + let rule = seq_rule(p)?; + alt.push(rule) + } + let res = if alt.len() == 1 { + alt.pop().unwrap() + } else { + Rule::Alt(alt) + }; + Ok(res) +} + +fn seq_rule(p: &mut Parser) -> Result<Rule> { + let lhs = atom_rule(p)?; + + let mut seq = vec![lhs]; + while let Some(rule) = opt_atom_rule(p)? { + seq.push(rule) + } + let res = if seq.len() == 1 { + seq.pop().unwrap() + } else { + Rule::Seq(seq) + }; + Ok(res) +} + +fn atom_rule(p: &mut Parser) -> Result<Rule> { + match opt_atom_rule(p)? { + Some(it) => Ok(it), + None => { + let token = p.bump()?; + bail!(token.loc, "unexpected token") + } + } +} + +fn opt_atom_rule(p: &mut Parser) -> Result<Option<Rule>> { + let token = match p.peek() { + Some(it) => it, + None => return Ok(None), + }; + let mut res = match &token.kind { + TokenKind::Node(name) => { + if let Some(lookahead) = p.peek_n(1) { + match lookahead.kind { + TokenKind::Eq => return Ok(None), + TokenKind::Colon => { + let label = name.clone(); + p.bump()?; + p.bump()?; + let rule = atom_rule(p)?; + let res = Rule::Labeled { + label, + rule: Box::new(rule), + }; + return Ok(Some(res)); + } + _ => (), + } + } + match p.peek_n(1) { + Some(token) if token.kind == TokenKind::Eq => return Ok(None), + _ => (), + } + let name = name.clone(); + p.bump()?; + let node = p.intern_node(name); + Rule::Node(node) + } + TokenKind::Token(name) => { + let name = name.clone(); + p.bump()?; + let token = p.intern_token(name); + Rule::Token(token) + } + TokenKind::LParen => { + p.bump()?; + let rule = rule(p)?; + p.expect(TokenKind::RParen, ")")?; + rule + } + _ => return Ok(None), + }; + + if let Some(token) = p.peek() { + match &token.kind { + TokenKind::QMark => { + p.bump()?; + res = Rule::Opt(Box::new(res)); + } + TokenKind::Star => { + p.bump()?; + res = Rule::Rep(Box::new(res)); + } + _ => (), + } + } + Ok(Some(res)) +} diff --git a/vendor/ungrammar/ungrammar.ungram b/vendor/ungrammar/ungrammar.ungram new file mode 100644 index 000000000..6cb4e10fb --- /dev/null +++ b/vendor/ungrammar/ungrammar.ungram @@ -0,0 +1,16 @@ +/// ungrammar for ungrammar +Grammar = + Node * + +Node = + name:'ident' '=' Rule + +Rule = + 'ident' +| 'token_ident' +| Rule * +| Rule ( '|' Rule) * +| Rule '?' +| Rule '*' +| '(' Rule ')' +| label:'ident' ':' |