// pest. The Elegant Parser // Copyright (c) 2018 Dragoș Tiselice // // Licensed under the Apache License, Version 2.0 // or the MIT // license , at your // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. //! Types for the pest's abstract syntax tree. /// A grammar rule #[derive(Clone, Debug, Eq, PartialEq)] pub struct Rule { /// The name of the rule pub name: String, /// The rule's type (silent, atomic, ...) pub ty: RuleType, /// The rule's expression pub expr: Expr, } /// All possible rule types #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum RuleType { /// The normal rule type Normal, /// Silent rules are just like normal rules /// — when run, they function the same way — /// except they do not produce pairs or tokens. /// If a rule is silent, it will never appear in a parse result. /// (their syntax is `_{ ... }`) Silent, /// atomic rule prevent implicit whitespace: inside an atomic rule, /// the tilde ~ means "immediately followed by", /// and repetition operators (asterisk * and plus sign +) /// have no implicit separation. In addition, all other rules /// called from an atomic rule are also treated as atomic. /// In an atomic rule, interior matching rules are silent. /// (their syntax is `@{ ... }`) Atomic, /// Compound atomic rules are similar to atomic rules, /// but they produce inner tokens as normal. /// (their syntax is `${ ... }`) CompoundAtomic, /// Non-atomic rules cancel the effect of atomic rules. /// (their syntax is `!{ ... }`) NonAtomic, } /// All possible rule expressions /// /// # Warning: Semantic Versioning /// There may be non-breaking changes to the meta-grammar /// between minor versions. Those non-breaking changes, however, /// may translate into semver-breaking changes due to the additional variants /// propaged from the `Rule` enum. This is a known issue and will be fixed in the /// future (e.g. by increasing MSRV and non_exhaustive annotations). #[derive(Clone, Debug, Eq, PartialEq)] pub enum Expr { /// Matches an exact string, e.g. `"a"` Str(String), /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"` Insens(String), /// Matches one character in the range, e.g. `'a'..'z'` Range(String, String), /// Matches the rule with the given name, e.g. `a` Ident(String), /// Matches a custom part of the stack, e.g. `PEEK[..]` PeekSlice(i32, Option), /// Positive lookahead; matches expression without making progress, e.g. `&e` PosPred(Box), /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e` NegPred(Box), /// Matches a sequence of two expressions, e.g. `e1 ~ e2` Seq(Box, Box), /// Matches either of two expressions, e.g. `e1 | e2` Choice(Box, Box), /// Optionally matches an expression, e.g. `e?` Opt(Box), /// Matches an expression zero or more times, e.g. `e*` Rep(Box), /// Matches an expression one or more times, e.g. `e+` RepOnce(Box), /// Matches an expression an exact number of times, e.g. `e{n}` RepExact(Box, u32), /// Matches an expression at least a number of times, e.g. `e{n,}` RepMin(Box, u32), /// Matches an expression at most a number of times, e.g. `e{,n}` RepMax(Box, u32), /// Matches an expression a number of times within a range, e.g. `e{m, n}` RepMinMax(Box, u32, u32), /// Continues to match expressions until one of the strings in the `Vec` is found Skip(Vec), /// Matches an expression and pushes it to the stack, e.g. `push(e)` Push(Box), /// Matches an expression and assigns a label to it, e.g. #label = exp #[cfg(feature = "grammar-extras")] NodeTag(Box, String), } impl Expr { /// Returns the iterator that steps the expression from top to bottom. pub fn iter_top_down(&self) -> ExprTopDownIterator { ExprTopDownIterator::new(self) } /// Applies `f` to the expression and all its children (top to bottom). pub fn map_top_down(self, mut f: F) -> Expr where F: FnMut(Expr) -> Expr, { fn map_internal(expr: Expr, f: &mut F) -> Expr where F: FnMut(Expr) -> Expr, { let expr = f(expr); match expr { Expr::PosPred(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::PosPred(mapped) } Expr::NegPred(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::NegPred(mapped) } Expr::Seq(lhs, rhs) => { let mapped_lhs = Box::new(map_internal(*lhs, f)); let mapped_rhs = Box::new(map_internal(*rhs, f)); Expr::Seq(mapped_lhs, mapped_rhs) } Expr::Choice(lhs, rhs) => { let mapped_lhs = Box::new(map_internal(*lhs, f)); let mapped_rhs = Box::new(map_internal(*rhs, f)); Expr::Choice(mapped_lhs, mapped_rhs) } Expr::Rep(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::Rep(mapped) } Expr::RepOnce(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::RepOnce(mapped) } Expr::RepExact(expr, max) => { let mapped = Box::new(map_internal(*expr, f)); Expr::RepExact(mapped, max) } Expr::RepMin(expr, num) => { let mapped = Box::new(map_internal(*expr, f)); Expr::RepMin(mapped, num) } Expr::RepMax(expr, num) => { let mapped = Box::new(map_internal(*expr, f)); Expr::RepMax(mapped, num) } Expr::RepMinMax(expr, min, max) => { let mapped = Box::new(map_internal(*expr, f)); Expr::RepMinMax(mapped, min, max) } Expr::Opt(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::Opt(mapped) } Expr::Push(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::Push(mapped) } #[cfg(feature = "grammar-extras")] Expr::NodeTag(expr, tag) => { let mapped = Box::new(map_internal(*expr, f)); Expr::NodeTag(mapped, tag) } expr => expr, } } map_internal(self, &mut f) } /// Applies `f` to the expression and all its children (bottom up). pub fn map_bottom_up(self, mut f: F) -> Expr where F: FnMut(Expr) -> Expr, { fn map_internal(expr: Expr, f: &mut F) -> Expr where F: FnMut(Expr) -> Expr, { let mapped = match expr { Expr::PosPred(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::PosPred(mapped) } Expr::NegPred(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::NegPred(mapped) } Expr::Seq(lhs, rhs) => { let mapped_lhs = Box::new(map_internal(*lhs, f)); let mapped_rhs = Box::new(map_internal(*rhs, f)); Expr::Seq(mapped_lhs, mapped_rhs) } Expr::Choice(lhs, rhs) => { let mapped_lhs = Box::new(map_internal(*lhs, f)); let mapped_rhs = Box::new(map_internal(*rhs, f)); Expr::Choice(mapped_lhs, mapped_rhs) } Expr::Rep(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::Rep(mapped) } Expr::RepOnce(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::RepOnce(mapped) } Expr::RepExact(expr, num) => { let mapped = Box::new(map_internal(*expr, f)); Expr::RepExact(mapped, num) } Expr::RepMin(expr, max) => { let mapped = Box::new(map_internal(*expr, f)); Expr::RepMin(mapped, max) } Expr::RepMax(expr, max) => { let mapped = Box::new(map_internal(*expr, f)); Expr::RepMax(mapped, max) } Expr::RepMinMax(expr, min, max) => { let mapped = Box::new(map_internal(*expr, f)); Expr::RepMinMax(mapped, min, max) } Expr::Opt(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::Opt(mapped) } Expr::Push(expr) => { let mapped = Box::new(map_internal(*expr, f)); Expr::Push(mapped) } #[cfg(feature = "grammar-extras")] Expr::NodeTag(expr, tag) => { let mapped = Box::new(map_internal(*expr, f)); Expr::NodeTag(mapped, tag) } expr => expr, }; f(mapped) } map_internal(self, &mut f) } } impl core::fmt::Display for Expr { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { Expr::Str(s) => write!(f, "{:?}", s), Expr::Insens(s) => write!(f, "^{:?}", s), Expr::Range(start, end) => { let start = start.chars().next().expect("Empty range start."); let end = end.chars().next().expect("Empty range end."); write!(f, "({:?}..{:?})", start, end) } Expr::Ident(id) => write!(f, "{}", id), Expr::PeekSlice(start, end) => match end { Some(end) => write!(f, "PEEK[{}..{}]", start, end), None => write!(f, "PEEK[{}..]", start), }, Expr::PosPred(expr) => write!(f, "&{}", expr.as_ref()), Expr::NegPred(expr) => write!(f, "!{}", expr.as_ref()), Expr::Seq(lhs, rhs) => { let mut nodes = Vec::new(); nodes.push(lhs); let mut current = rhs; while let Expr::Seq(lhs, rhs) = current.as_ref() { nodes.push(lhs); current = rhs; } nodes.push(current); let sequence = nodes .iter() .map(|node| format!("{}", node)) .collect::>() .join(" ~ "); write!(f, "({})", sequence) } Expr::Choice(lhs, rhs) => { let mut nodes = Vec::new(); nodes.push(lhs); let mut current = rhs; while let Expr::Choice(lhs, rhs) = current.as_ref() { nodes.push(lhs); current = rhs; } nodes.push(current); let sequence = nodes .iter() .map(|node| format!("{}", node)) .collect::>() .join(" | "); write!(f, "({})", sequence) } Expr::Opt(expr) => write!(f, "{}?", expr), Expr::Rep(expr) => write!(f, "{}*", expr), Expr::RepOnce(expr) => write!(f, "{}+", expr), Expr::RepExact(expr, n) => write!(f, "{}{{{}}}", expr, n), Expr::RepMin(expr, min) => write!(f, "{}{{{},}}", expr, min), Expr::RepMax(expr, max) => write!(f, "{}{{,{}}}", expr, max), Expr::RepMinMax(expr, min, max) => write!(f, "{}{{{}, {}}}", expr, min, max), Expr::Skip(strings) => { let strings = strings .iter() .map(|s| format!("{:?}", s)) .collect::>() .join(" | "); write!(f, "(!({}) ~ ANY)*", strings) } Expr::Push(expr) => write!(f, "PUSH({})", expr), #[cfg(feature = "grammar-extras")] Expr::NodeTag(expr, tag) => { write!(f, "(#{} = {})", tag, expr) } } } } /// The top down iterator for an expression. pub struct ExprTopDownIterator { current: Option, next: Option, right_branches: Vec, } impl ExprTopDownIterator { /// Constructs a top-down iterator from the expression. pub fn new(expr: &Expr) -> Self { let mut iter = ExprTopDownIterator { current: None, next: None, right_branches: vec![], }; iter.iterate_expr(expr.clone()); iter } fn iterate_expr(&mut self, expr: Expr) { self.current = Some(expr.clone()); match expr { Expr::Seq(lhs, rhs) => { self.right_branches.push(*rhs); self.next = Some(*lhs); } Expr::Choice(lhs, rhs) => { self.right_branches.push(*rhs); self.next = Some(*lhs); } Expr::PosPred(expr) | Expr::NegPred(expr) | Expr::Rep(expr) | Expr::RepOnce(expr) | Expr::RepExact(expr, _) | Expr::RepMin(expr, _) | Expr::RepMax(expr, _) | Expr::RepMinMax(expr, ..) | Expr::Opt(expr) | Expr::Push(expr) => { self.next = Some(*expr); } #[cfg(feature = "grammar-extras")] Expr::NodeTag(expr, _) => { self.next = Some(*expr); } _ => { self.next = None; } } } } impl Iterator for ExprTopDownIterator { type Item = Expr; fn next(&mut self) -> Option { let result = self.current.take(); if let Some(expr) = self.next.take() { self.iterate_expr(expr); } else if let Some(expr) = self.right_branches.pop() { self.iterate_expr(expr); } result } } #[cfg(test)] mod tests { use super::*; #[test] fn top_down_iterator() { let expr = Expr::Choice( Box::new(Expr::Str(String::from("a"))), Box::new(Expr::Str(String::from("b"))), ); let mut top_down = expr.iter_top_down(); assert_eq!(top_down.next(), Some(expr)); assert_eq!(top_down.next(), Some(Expr::Str(String::from("a")))); assert_eq!(top_down.next(), Some(Expr::Str(String::from("b")))); assert_eq!(top_down.next(), None); } #[test] fn identity() { let expr = Expr::Choice( Box::new(Expr::Seq( Box::new(Expr::Ident("a".to_owned())), Box::new(Expr::Str("b".to_owned())), )), Box::new(Expr::PosPred(Box::new(Expr::NegPred(Box::new(Expr::Rep( Box::new(Expr::RepOnce(Box::new(Expr::Opt(Box::new(Expr::Choice( Box::new(Expr::Insens("c".to_owned())), Box::new(Expr::Push(Box::new(Expr::Range( "'d'".to_owned(), "'e'".to_owned(), )))), )))))), )))))), ); assert_eq!( expr.clone() .map_bottom_up(|expr| expr) .map_top_down(|expr| expr), expr, ); } mod display { use super::super::*; #[test] fn string() { assert_eq!(Expr::Str("a".to_owned()).to_string(), r#""a""#); } #[test] fn insens() { assert_eq!(Expr::Insens("a".to_owned()).to_string(), r#"^"a""#); } #[test] fn range() { assert_eq!( Expr::Range("a".to_owned(), "z".to_owned()).to_string(), r#"('a'..'z')"#, ); } #[test] fn ident() { assert_eq!(Expr::Ident("a".to_owned()).to_string(), "a"); } #[test] fn peek_slice() { assert_eq!(Expr::PeekSlice(0, None).to_string(), "PEEK[0..]"); assert_eq!(Expr::PeekSlice(0, Some(-1)).to_string(), "PEEK[0..-1]"); } #[test] fn pos_pred() { assert_eq!( Expr::PosPred(Box::new(Expr::Ident("e".to_owned()))).to_string(), "&e", ); } #[test] fn neg_pred() { assert_eq!( Expr::NegPred(Box::new(Expr::Ident("e".to_owned()))).to_string(), "!e", ); } #[test] fn seq() { assert_eq!( Expr::Seq( Box::new(Expr::Ident("e1".to_owned())), Box::new(Expr::Ident("e2".to_owned())), ) .to_string(), "(e1 ~ e2)", ); assert_eq!( Expr::Seq( Box::new(Expr::Ident("e1".to_owned())), Box::new(Expr::Seq( Box::new(Expr::Ident("e2".to_owned())), Box::new(Expr::Ident("e3".to_owned())), )), ) .to_string(), "(e1 ~ e2 ~ e3)", ); assert_eq!( Expr::Seq( Box::new(Expr::Ident("e1".to_owned())), Box::new(Expr::Seq( Box::new(Expr::Ident("e2".to_owned())), Box::new(Expr::Seq( Box::new(Expr::Ident("e3".to_owned())), Box::new(Expr::Ident("e4".to_owned())), )), )), ) .to_string(), "(e1 ~ e2 ~ e3 ~ e4)", ); assert_eq!( Expr::Seq( Box::new(Expr::Ident("e1".to_owned())), Box::new(Expr::Choice( Box::new(Expr::Ident("e2".to_owned())), Box::new(Expr::Seq( Box::new(Expr::Ident("e3".to_owned())), Box::new(Expr::Ident("e4".to_owned())), )), )), ) .to_string(), "(e1 ~ (e2 | (e3 ~ e4)))", ); assert_eq!( Expr::Seq( Box::new(Expr::Ident("e1".to_owned())), Box::new(Expr::Seq( Box::new(Expr::Ident("e2".to_owned())), Box::new(Expr::Choice( Box::new(Expr::Ident("e3".to_owned())), Box::new(Expr::Ident("e4".to_owned())), )), )), ) .to_string(), "(e1 ~ e2 ~ (e3 | e4))", ); } #[test] fn choice() { assert_eq!( Expr::Choice( Box::new(Expr::Ident("e1".to_owned())), Box::new(Expr::Ident("e2".to_owned())), ) .to_string(), "(e1 | e2)", ); assert_eq!( Expr::Choice( Box::new(Expr::Ident("e1".to_owned())), Box::new(Expr::Choice( Box::new(Expr::Ident("e2".to_owned())), Box::new(Expr::Ident("e3".to_owned())), )), ) .to_string(), "(e1 | e2 | e3)", ); assert_eq!( Expr::Choice( Box::new(Expr::Ident("e1".to_owned())), Box::new(Expr::Choice( Box::new(Expr::Ident("e2".to_owned())), Box::new(Expr::Choice( Box::new(Expr::Ident("e3".to_owned())), Box::new(Expr::Ident("e4".to_owned())), )), )), ) .to_string(), "(e1 | e2 | e3 | e4)", ); assert_eq!( Expr::Choice( Box::new(Expr::Ident("e1".to_owned())), Box::new(Expr::Seq( Box::new(Expr::Ident("e2".to_owned())), Box::new(Expr::Choice( Box::new(Expr::Ident("e3".to_owned())), Box::new(Expr::Ident("e4".to_owned())), )), )), ) .to_string(), "(e1 | (e2 ~ (e3 | e4)))", ); } #[test] fn opt() { assert_eq!( Expr::Opt(Box::new(Expr::Ident("e".to_owned()))).to_string(), "e?", ); } #[test] fn rep() { assert_eq!( Expr::Rep(Box::new(Expr::Ident("e".to_owned()))).to_string(), "e*", ); } #[test] fn rep_once() { assert_eq!( Expr::RepOnce(Box::new(Expr::Ident("e".to_owned()))).to_string(), "e+", ); } #[test] fn rep_exact() { assert_eq!( Expr::RepExact(Box::new(Expr::Ident("e".to_owned())), 1).to_string(), "e{1}", ); } #[test] fn rep_min() { assert_eq!( Expr::RepMin(Box::new(Expr::Ident("e".to_owned())), 1).to_string(), "e{1,}", ); } #[test] fn rep_max() { assert_eq!( Expr::RepMax(Box::new(Expr::Ident("e".to_owned())), 1).to_string(), "e{,1}", ); } #[test] fn rep_min_max() { assert_eq!( Expr::RepMinMax(Box::new(Expr::Ident("e".to_owned())), 1, 2).to_string(), "e{1, 2}", ); } #[test] fn skip() { assert_eq!( Expr::Skip( ["a", "bc"] .into_iter() .map(|s| s.to_owned()) .collect::>(), ) .to_string(), r#"(!("a" | "bc") ~ ANY)*"#, ); } #[test] fn push() { assert_eq!( Expr::Push(Box::new(Expr::Ident("e".to_owned()))).to_string(), "PUSH(e)", ); } #[test] #[cfg(feature = "grammar-extras")] fn node_tag() { assert_eq!( Expr::NodeTag(Box::new(Expr::Ident("expr".to_owned())), "label".to_owned()) .to_string(), "(#label = expr)", ); } } }