// pest. The Elegant Parser // Copyright (c) 2018 DragoČ™ Tiselice // // Licensed under the Apache License, Version 2.0 // or the MIT // license , at your // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. use ast::*; use std::collections::HashMap; #[cfg(test)] macro_rules! box_tree { ( $node:ident( $( $child:ident( $($args:tt)* ) ),+ ) ) => ( $node ( $( Box::new( box_tree!( $child ( $($args )* ) ) ) ),+ ) ); ($expr:expr) => ($expr); } mod concatenator; mod factorizer; mod restorer; mod rotater; mod skipper; mod unroller; pub fn optimize(rules: Vec) -> Vec { let optimized: Vec = rules .into_iter() .map(rotater::rotate) .map(skipper::skip) .map(unroller::unroll) .map(concatenator::concatenate) .map(factorizer::factor) .map(rule_to_optimized_rule) .collect(); let rules = to_hash_map(&optimized); optimized .into_iter() .map(|rule| restorer::restore_on_err(rule, &rules)) .collect() } fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule { fn to_optimized(expr: Expr) -> OptimizedExpr { match expr { Expr::Str(string) => OptimizedExpr::Str(string), Expr::Insens(string) => OptimizedExpr::Insens(string), Expr::Range(start, end) => OptimizedExpr::Range(start, end), Expr::Ident(ident) => OptimizedExpr::Ident(ident), Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end), Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))), Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))), Expr::Seq(lhs, rhs) => { OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs))) } Expr::Choice(lhs, rhs) => { OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs))) } Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))), Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))), Expr::Skip(strings) => OptimizedExpr::Skip(strings), Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))), Expr::RepOnce(_) | Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => unreachable!("No valid transformation to OptimizedRule"), } } OptimizedRule { name: rule.name, ty: rule.ty, expr: to_optimized(rule.expr), } } fn to_hash_map(rules: &[OptimizedRule]) -> HashMap { rules .iter() .map(|r| (r.name.clone(), r.expr.clone())) .collect() } #[derive(Clone, Debug, Eq, PartialEq)] pub struct OptimizedRule { pub name: String, pub ty: RuleType, pub expr: OptimizedExpr, } #[derive(Clone, Debug, Eq, PartialEq)] pub enum OptimizedExpr { Str(String), Insens(String), Range(String, String), Ident(String), PeekSlice(i32, Option), PosPred(Box), NegPred(Box), Seq(Box, Box), Choice(Box, Box), Opt(Box), Rep(Box), Skip(Vec), Push(Box), RestoreOnErr(Box), } impl OptimizedExpr { pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator { OptimizedExprTopDownIterator::new(self) } pub fn map_top_down(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr, { fn map_internal(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr, { let expr = f(expr); match expr { // TODO: Use box syntax when it gets stabilized. OptimizedExpr::PosPred(expr) => { let mapped = Box::new(map_internal(*expr, f)); OptimizedExpr::PosPred(mapped) } OptimizedExpr::NegPred(expr) => { let mapped = Box::new(map_internal(*expr, f)); OptimizedExpr::NegPred(mapped) } OptimizedExpr::Seq(lhs, rhs) => { let mapped_lhs = Box::new(map_internal(*lhs, f)); let mapped_rhs = Box::new(map_internal(*rhs, f)); OptimizedExpr::Seq(mapped_lhs, mapped_rhs) } OptimizedExpr::Choice(lhs, rhs) => { let mapped_lhs = Box::new(map_internal(*lhs, f)); let mapped_rhs = Box::new(map_internal(*rhs, f)); OptimizedExpr::Choice(mapped_lhs, mapped_rhs) } OptimizedExpr::Rep(expr) => { let mapped = Box::new(map_internal(*expr, f)); OptimizedExpr::Rep(mapped) } OptimizedExpr::Opt(expr) => { let mapped = Box::new(map_internal(*expr, f)); OptimizedExpr::Opt(mapped) } OptimizedExpr::Push(expr) => { let mapped = Box::new(map_internal(*expr, f)); OptimizedExpr::Push(mapped) } expr => expr, } } map_internal(self, &mut f) } pub fn map_bottom_up(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr, { fn map_internal(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr, { let mapped = match expr { OptimizedExpr::PosPred(expr) => { // TODO: Use box syntax when it gets stabilized. let mapped = Box::new(map_internal(*expr, f)); OptimizedExpr::PosPred(mapped) } OptimizedExpr::NegPred(expr) => { let mapped = Box::new(map_internal(*expr, f)); OptimizedExpr::NegPred(mapped) } OptimizedExpr::Seq(lhs, rhs) => { let mapped_lhs = Box::new(map_internal(*lhs, f)); let mapped_rhs = Box::new(map_internal(*rhs, f)); OptimizedExpr::Seq(mapped_lhs, mapped_rhs) } OptimizedExpr::Choice(lhs, rhs) => { let mapped_lhs = Box::new(map_internal(*lhs, f)); let mapped_rhs = Box::new(map_internal(*rhs, f)); OptimizedExpr::Choice(mapped_lhs, mapped_rhs) } OptimizedExpr::Rep(expr) => { let mapped = Box::new(map_internal(*expr, f)); OptimizedExpr::Rep(mapped) } OptimizedExpr::Opt(expr) => { let mapped = Box::new(map_internal(*expr, f)); OptimizedExpr::Opt(mapped) } OptimizedExpr::Push(expr) => { let mapped = Box::new(map_internal(*expr, f)); OptimizedExpr::Push(mapped) } expr => expr, }; f(mapped) } map_internal(self, &mut f) } } pub struct OptimizedExprTopDownIterator { current: Option, next: Option, right_branches: Vec, } impl OptimizedExprTopDownIterator { pub fn new(expr: &OptimizedExpr) -> Self { let mut iter = OptimizedExprTopDownIterator { current: None, next: None, right_branches: vec![], }; iter.iterate_expr(expr.clone()); iter } fn iterate_expr(&mut self, expr: OptimizedExpr) { self.current = Some(expr.clone()); match expr { OptimizedExpr::Seq(lhs, rhs) => { self.right_branches.push(*rhs); self.next = Some(*lhs); } OptimizedExpr::Choice(lhs, rhs) => { self.right_branches.push(*rhs); self.next = Some(*lhs); } OptimizedExpr::PosPred(expr) | OptimizedExpr::NegPred(expr) | OptimizedExpr::Rep(expr) | OptimizedExpr::Opt(expr) | OptimizedExpr::Push(expr) => { self.next = Some(*expr); } _ => { self.next = None; } } } } impl Iterator for OptimizedExprTopDownIterator { type Item = OptimizedExpr; fn next(&mut self) -> Option { let result = self.current.take(); if let Some(expr) = self.next.take() { self.iterate_expr(expr); } else if let Some(expr) = self.right_branches.pop() { self.iterate_expr(expr); } result } } #[cfg(test)] mod tests { use super::*; #[test] fn rotate() { let rules = { use ast::Expr::*; vec![Rule { name: "rule".to_owned(), ty: RuleType::Normal, expr: box_tree!(Choice( Choice( Choice(Str(String::from("a")), Str(String::from("b"))), Str(String::from("c")) ), Str(String::from("d")) )), }] }; let rotated = { use optimizer::OptimizedExpr::*; vec![OptimizedRule { name: "rule".to_owned(), ty: RuleType::Normal, expr: box_tree!(Choice( Str(String::from("a")), Choice( Str(String::from("b")), Choice(Str(String::from("c")), Str(String::from("d"))) ) )), }] }; assert_eq!(optimize(rules), rotated); } #[test] fn skip() { let rules = { use ast::Expr::*; vec![Rule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: box_tree!(Rep(Seq( NegPred(Choice(Str(String::from("a")), Str(String::from("b")))), Ident(String::from("ANY")) ))), }] }; let skipped = vec![OptimizedRule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]), }]; assert_eq!(optimize(rules), skipped); } #[test] fn concat_strings() { let rules = { use ast::Expr::*; vec![Rule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: box_tree!(Seq( Seq(Str(String::from("a")), Str(String::from("b"))), Seq(Str(String::from("c")), Str(String::from("d"))) )), }] }; let concatenated = vec![OptimizedRule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: OptimizedExpr::Str(String::from("abcd")), }]; assert_eq!(optimize(rules), concatenated); } #[test] fn unroll_loop_exact() { let rules = vec![Rule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3), }]; let unrolled = { use optimizer::OptimizedExpr::*; vec![OptimizedRule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: box_tree!(Seq( Ident(String::from("a")), Seq(Ident(String::from("a")), Ident(String::from("a"))) )), }] }; assert_eq!(optimize(rules), unrolled); } #[test] fn unroll_loop_max() { let rules = vec![Rule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3), }]; let unrolled = { use optimizer::OptimizedExpr::*; vec![OptimizedRule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: box_tree!(Seq( Opt(Str(String::from("a"))), Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a")))) )), }] }; assert_eq!(optimize(rules), unrolled); } #[test] fn unroll_loop_min() { let rules = vec![Rule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2), }]; let unrolled = { use optimizer::OptimizedExpr::*; vec![OptimizedRule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: box_tree!(Seq( Str(String::from("a")), Seq(Str(String::from("a")), Rep(Str(String::from("a")))) )), }] }; assert_eq!(optimize(rules), unrolled); } #[test] fn unroll_loop_min_max() { let rules = vec![Rule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3), }]; let unrolled = { use optimizer::OptimizedExpr::*; vec![OptimizedRule { name: "rule".to_owned(), ty: RuleType::Atomic, /* TODO possible room for improvement here: * if the sequences were rolled out in the opposite * order, we could further optimize the strings * in cases like this. Str(String::from(("aa")), Opt(Str(String::from("a"))) */ expr: box_tree!(Seq( Str(String::from("a")), Seq(Str(String::from("a")), Opt(Str(String::from("a")))) )), }] }; assert_eq!(optimize(rules), unrolled); } #[test] fn concat_insensitive_strings() { let rules = { use ast::Expr::*; vec![Rule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: box_tree!(Seq( Seq(Insens(String::from("a")), Insens(String::from("b"))), Seq(Insens(String::from("c")), Insens(String::from("d"))) )), }] }; let concatenated = vec![OptimizedRule { name: "rule".to_owned(), ty: RuleType::Atomic, expr: OptimizedExpr::Insens(String::from("abcd")), }]; assert_eq!(optimize(rules), concatenated); } #[test] fn long_common_sequence() { let rules = { use ast::Expr::*; vec![Rule { name: "rule".to_owned(), ty: RuleType::Silent, expr: box_tree!(Choice( Seq( Ident(String::from("a")), Seq(Ident(String::from("b")), Ident(String::from("c"))) ), Seq( Seq(Ident(String::from("a")), Ident(String::from("b"))), Ident(String::from("d")) ) )), }] }; let optimized = { use optimizer::OptimizedExpr::*; vec![OptimizedRule { name: "rule".to_owned(), ty: RuleType::Silent, expr: box_tree!(Seq( Ident(String::from("a")), Seq( Ident(String::from("b")), Choice(Ident(String::from("c")), Ident(String::from("d"))) ) )), }] }; assert_eq!(optimize(rules), optimized); } }