//! Simple hand-written ungrammar parser. use std::collections::HashMap; use crate::{ error::{bail, format_err, Result}, lexer::{self, TokenKind}, Grammar, Node, NodeData, Rule, Token, TokenData, }; macro_rules! bail { ($loc:expr, $($tt:tt)*) => {{ let err = $crate::error::format_err!($($tt)*) .with_location($loc); return Err(err); }}; } pub(crate) fn parse(tokens: Vec) -> Result { let mut p = Parser::new(tokens); while !p.is_eof() { node(&mut p)?; } p.finish() } #[derive(Default)] struct Parser { grammar: Grammar, tokens: Vec, node_table: HashMap, token_table: HashMap, } const DUMMY_RULE: Rule = Rule::Node(Node(!0)); impl Parser { fn new(mut tokens: Vec) -> Parser { tokens.reverse(); Parser { tokens, ..Parser::default() } } fn peek(&self) -> Option<&lexer::Token> { self.peek_n(0) } fn peek_n(&self, n: usize) -> Option<&lexer::Token> { self.tokens.iter().nth_back(n) } fn bump(&mut self) -> Result { self.tokens .pop() .ok_or_else(|| format_err!("unexpected EOF")) } fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> { let token = self.bump()?; if token.kind != kind { bail!(token.loc, "unexpected token, expected `{}`", what); } Ok(()) } fn is_eof(&self) -> bool { self.tokens.is_empty() } fn finish(self) -> Result { for node_data in &self.grammar.nodes { if matches!(node_data.rule, DUMMY_RULE) { crate::error::bail!("Undefined node: {}", node_data.name) } } Ok(self.grammar) } fn intern_node(&mut self, name: String) -> Node { let len = self.node_table.len(); let grammar = &mut self.grammar; *self.node_table.entry(name.clone()).or_insert_with(|| { grammar.nodes.push(NodeData { name, rule: DUMMY_RULE, }); Node(len) }) } fn intern_token(&mut self, name: String) -> Token { let len = self.token_table.len(); let grammar = &mut self.grammar; *self.token_table.entry(name.clone()).or_insert_with(|| { grammar.tokens.push(TokenData { name }); Token(len) }) } } fn node(p: &mut Parser) -> Result<()> { let token = p.bump()?; let node = match token.kind { TokenKind::Node(it) => p.intern_node(it), _ => bail!(token.loc, "expected ident"), }; p.expect(TokenKind::Eq, "=")?; if !matches!(p.grammar[node].rule, DUMMY_RULE) { bail!(token.loc, "duplicate rule: `{}`", p.grammar[node].name) } let rule = rule(p)?; p.grammar.nodes[node.0].rule = rule; Ok(()) } fn rule(p: &mut Parser) -> Result { if let Some(lexer::Token { kind: TokenKind::Pipe, loc }) = p.peek() { bail!( *loc, "The first element in a sequence of productions or alternatives \ must not have a leading pipe (`|`)" ); } let lhs = seq_rule(p)?; let mut alt = vec![lhs]; while let Some(token) = p.peek() { if token.kind != TokenKind::Pipe { break; } p.bump()?; let rule = seq_rule(p)?; alt.push(rule) } let res = if alt.len() == 1 { alt.pop().unwrap() } else { Rule::Alt(alt) }; Ok(res) } fn seq_rule(p: &mut Parser) -> Result { let lhs = atom_rule(p)?; let mut seq = vec![lhs]; while let Some(rule) = opt_atom_rule(p)? { seq.push(rule) } let res = if seq.len() == 1 { seq.pop().unwrap() } else { Rule::Seq(seq) }; Ok(res) } fn atom_rule(p: &mut Parser) -> Result { match opt_atom_rule(p)? { Some(it) => Ok(it), None => { let token = p.bump()?; bail!(token.loc, "unexpected token") } } } fn opt_atom_rule(p: &mut Parser) -> Result> { let token = match p.peek() { Some(it) => it, None => return Ok(None), }; let mut res = match &token.kind { TokenKind::Node(name) => { if let Some(lookahead) = p.peek_n(1) { match lookahead.kind { TokenKind::Eq => return Ok(None), TokenKind::Colon => { let label = name.clone(); p.bump()?; p.bump()?; let rule = atom_rule(p)?; let res = Rule::Labeled { label, rule: Box::new(rule), }; return Ok(Some(res)); } _ => (), } } match p.peek_n(1) { Some(token) if token.kind == TokenKind::Eq => return Ok(None), _ => (), } let name = name.clone(); p.bump()?; let node = p.intern_node(name); Rule::Node(node) } TokenKind::Token(name) => { let name = name.clone(); p.bump()?; let token = p.intern_token(name); Rule::Token(token) } TokenKind::LParen => { p.bump()?; let rule = rule(p)?; p.expect(TokenKind::RParen, ")")?; rule } _ => return Ok(None), }; if let Some(token) = p.peek() { match &token.kind { TokenKind::QMark => { p.bump()?; res = Rule::Opt(Box::new(res)); } TokenKind::Star => { p.bump()?; res = Rule::Rep(Box::new(res)); } _ => (), } } Ok(Some(res)) }