diff options
Diffstat (limited to 'vendor/pest_meta/src/optimizer/mod.rs')
-rw-r--r-- | vendor/pest_meta/src/optimizer/mod.rs | 510 |
1 files changed, 510 insertions, 0 deletions
diff --git a/vendor/pest_meta/src/optimizer/mod.rs b/vendor/pest_meta/src/optimizer/mod.rs new file mode 100644 index 000000000..7013c43f9 --- /dev/null +++ b/vendor/pest_meta/src/optimizer/mod.rs @@ -0,0 +1,510 @@ +// pest. The Elegant Parser +// Copyright (c) 2018 DragoČ™ Tiselice +// +// Licensed under the Apache License, Version 2.0 +// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT +// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. + +use ast::*; +use std::collections::HashMap; + +#[cfg(test)] +macro_rules! box_tree { + ( $node:ident( $( $child:ident( $($args:tt)* ) ),+ ) ) => ( + $node ( $( Box::new( box_tree!( $child ( $($args )* ) ) ) ),+ ) + ); + ($expr:expr) => ($expr); +} + +mod concatenator; +mod factorizer; +mod restorer; +mod rotater; +mod skipper; +mod unroller; + +pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> { + let optimized: Vec<OptimizedRule> = rules + .into_iter() + .map(rotater::rotate) + .map(skipper::skip) + .map(unroller::unroll) + .map(concatenator::concatenate) + .map(factorizer::factor) + .map(rule_to_optimized_rule) + .collect(); + + let rules = to_hash_map(&optimized); + optimized + .into_iter() + .map(|rule| restorer::restore_on_err(rule, &rules)) + .collect() +} + +fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule { + fn to_optimized(expr: Expr) -> OptimizedExpr { + match expr { + Expr::Str(string) => OptimizedExpr::Str(string), + Expr::Insens(string) => OptimizedExpr::Insens(string), + Expr::Range(start, end) => OptimizedExpr::Range(start, end), + Expr::Ident(ident) => OptimizedExpr::Ident(ident), + Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end), + Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))), + Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))), + Expr::Seq(lhs, rhs) => { + OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs))) + } + Expr::Choice(lhs, rhs) => { + OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs))) + } + Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))), + Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))), + Expr::Skip(strings) => OptimizedExpr::Skip(strings), + Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))), + Expr::RepOnce(_) + | Expr::RepExact(..) + | Expr::RepMin(..) + | Expr::RepMax(..) + | Expr::RepMinMax(..) => unreachable!("No valid transformation to OptimizedRule"), + } + } + + OptimizedRule { + name: rule.name, + ty: rule.ty, + expr: to_optimized(rule.expr), + } +} + +fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> { + rules + .iter() + .map(|r| (r.name.clone(), r.expr.clone())) + .collect() +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct OptimizedRule { + pub name: String, + pub ty: RuleType, + pub expr: OptimizedExpr, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum OptimizedExpr { + Str(String), + Insens(String), + Range(String, String), + Ident(String), + PeekSlice(i32, Option<i32>), + PosPred(Box<OptimizedExpr>), + NegPred(Box<OptimizedExpr>), + Seq(Box<OptimizedExpr>, Box<OptimizedExpr>), + Choice(Box<OptimizedExpr>, Box<OptimizedExpr>), + Opt(Box<OptimizedExpr>), + Rep(Box<OptimizedExpr>), + Skip(Vec<String>), + Push(Box<OptimizedExpr>), + RestoreOnErr(Box<OptimizedExpr>), +} + +impl OptimizedExpr { + pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator { + OptimizedExprTopDownIterator::new(self) + } + + pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr + where + F: FnMut(OptimizedExpr) -> OptimizedExpr, + { + fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr + where + F: FnMut(OptimizedExpr) -> OptimizedExpr, + { + let expr = f(expr); + + match expr { + // TODO: Use box syntax when it gets stabilized. + OptimizedExpr::PosPred(expr) => { + let mapped = Box::new(map_internal(*expr, f)); + OptimizedExpr::PosPred(mapped) + } + OptimizedExpr::NegPred(expr) => { + let mapped = Box::new(map_internal(*expr, f)); + OptimizedExpr::NegPred(mapped) + } + OptimizedExpr::Seq(lhs, rhs) => { + let mapped_lhs = Box::new(map_internal(*lhs, f)); + let mapped_rhs = Box::new(map_internal(*rhs, f)); + OptimizedExpr::Seq(mapped_lhs, mapped_rhs) + } + OptimizedExpr::Choice(lhs, rhs) => { + let mapped_lhs = Box::new(map_internal(*lhs, f)); + let mapped_rhs = Box::new(map_internal(*rhs, f)); + OptimizedExpr::Choice(mapped_lhs, mapped_rhs) + } + OptimizedExpr::Rep(expr) => { + let mapped = Box::new(map_internal(*expr, f)); + OptimizedExpr::Rep(mapped) + } + OptimizedExpr::Opt(expr) => { + let mapped = Box::new(map_internal(*expr, f)); + OptimizedExpr::Opt(mapped) + } + OptimizedExpr::Push(expr) => { + let mapped = Box::new(map_internal(*expr, f)); + OptimizedExpr::Push(mapped) + } + expr => expr, + } + } + + map_internal(self, &mut f) + } + + pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr + where + F: FnMut(OptimizedExpr) -> OptimizedExpr, + { + fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr + where + F: FnMut(OptimizedExpr) -> OptimizedExpr, + { + let mapped = match expr { + OptimizedExpr::PosPred(expr) => { + // TODO: Use box syntax when it gets stabilized. + let mapped = Box::new(map_internal(*expr, f)); + OptimizedExpr::PosPred(mapped) + } + OptimizedExpr::NegPred(expr) => { + let mapped = Box::new(map_internal(*expr, f)); + OptimizedExpr::NegPred(mapped) + } + OptimizedExpr::Seq(lhs, rhs) => { + let mapped_lhs = Box::new(map_internal(*lhs, f)); + let mapped_rhs = Box::new(map_internal(*rhs, f)); + OptimizedExpr::Seq(mapped_lhs, mapped_rhs) + } + OptimizedExpr::Choice(lhs, rhs) => { + let mapped_lhs = Box::new(map_internal(*lhs, f)); + let mapped_rhs = Box::new(map_internal(*rhs, f)); + OptimizedExpr::Choice(mapped_lhs, mapped_rhs) + } + OptimizedExpr::Rep(expr) => { + let mapped = Box::new(map_internal(*expr, f)); + OptimizedExpr::Rep(mapped) + } + OptimizedExpr::Opt(expr) => { + let mapped = Box::new(map_internal(*expr, f)); + OptimizedExpr::Opt(mapped) + } + OptimizedExpr::Push(expr) => { + let mapped = Box::new(map_internal(*expr, f)); + OptimizedExpr::Push(mapped) + } + expr => expr, + }; + + f(mapped) + } + + map_internal(self, &mut f) + } +} + +pub struct OptimizedExprTopDownIterator { + current: Option<OptimizedExpr>, + next: Option<OptimizedExpr>, + right_branches: Vec<OptimizedExpr>, +} + +impl OptimizedExprTopDownIterator { + pub fn new(expr: &OptimizedExpr) -> Self { + let mut iter = OptimizedExprTopDownIterator { + current: None, + next: None, + right_branches: vec![], + }; + iter.iterate_expr(expr.clone()); + iter + } + + fn iterate_expr(&mut self, expr: OptimizedExpr) { + self.current = Some(expr.clone()); + match expr { + OptimizedExpr::Seq(lhs, rhs) => { + self.right_branches.push(*rhs); + self.next = Some(*lhs); + } + OptimizedExpr::Choice(lhs, rhs) => { + self.right_branches.push(*rhs); + self.next = Some(*lhs); + } + OptimizedExpr::PosPred(expr) + | OptimizedExpr::NegPred(expr) + | OptimizedExpr::Rep(expr) + | OptimizedExpr::Opt(expr) + | OptimizedExpr::Push(expr) => { + self.next = Some(*expr); + } + _ => { + self.next = None; + } + } + } +} + +impl Iterator for OptimizedExprTopDownIterator { + type Item = OptimizedExpr; + + fn next(&mut self) -> Option<Self::Item> { + let result = self.current.take(); + + if let Some(expr) = self.next.take() { + self.iterate_expr(expr); + } else if let Some(expr) = self.right_branches.pop() { + self.iterate_expr(expr); + } + + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rotate() { + let rules = { + use ast::Expr::*; + vec![Rule { + name: "rule".to_owned(), + ty: RuleType::Normal, + expr: box_tree!(Choice( + Choice( + Choice(Str(String::from("a")), Str(String::from("b"))), + Str(String::from("c")) + ), + Str(String::from("d")) + )), + }] + }; + let rotated = { + use optimizer::OptimizedExpr::*; + vec![OptimizedRule { + name: "rule".to_owned(), + ty: RuleType::Normal, + expr: box_tree!(Choice( + Str(String::from("a")), + Choice( + Str(String::from("b")), + Choice(Str(String::from("c")), Str(String::from("d"))) + ) + )), + }] + }; + + assert_eq!(optimize(rules), rotated); + } + + #[test] + fn skip() { + let rules = { + use ast::Expr::*; + vec![Rule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: box_tree!(Rep(Seq( + NegPred(Choice(Str(String::from("a")), Str(String::from("b")))), + Ident(String::from("ANY")) + ))), + }] + }; + let skipped = vec![OptimizedRule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]), + }]; + + assert_eq!(optimize(rules), skipped); + } + + #[test] + fn concat_strings() { + let rules = { + use ast::Expr::*; + vec![Rule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: box_tree!(Seq( + Seq(Str(String::from("a")), Str(String::from("b"))), + Seq(Str(String::from("c")), Str(String::from("d"))) + )), + }] + }; + let concatenated = vec![OptimizedRule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: OptimizedExpr::Str(String::from("abcd")), + }]; + + assert_eq!(optimize(rules), concatenated); + } + + #[test] + fn unroll_loop_exact() { + let rules = vec![Rule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3), + }]; + let unrolled = { + use optimizer::OptimizedExpr::*; + vec![OptimizedRule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: box_tree!(Seq( + Ident(String::from("a")), + Seq(Ident(String::from("a")), Ident(String::from("a"))) + )), + }] + }; + + assert_eq!(optimize(rules), unrolled); + } + + #[test] + fn unroll_loop_max() { + let rules = vec![Rule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3), + }]; + let unrolled = { + use optimizer::OptimizedExpr::*; + vec![OptimizedRule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: box_tree!(Seq( + Opt(Str(String::from("a"))), + Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a")))) + )), + }] + }; + + assert_eq!(optimize(rules), unrolled); + } + + #[test] + fn unroll_loop_min() { + let rules = vec![Rule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2), + }]; + let unrolled = { + use optimizer::OptimizedExpr::*; + vec![OptimizedRule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: box_tree!(Seq( + Str(String::from("a")), + Seq(Str(String::from("a")), Rep(Str(String::from("a")))) + )), + }] + }; + + assert_eq!(optimize(rules), unrolled); + } + + #[test] + fn unroll_loop_min_max() { + let rules = vec![Rule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3), + }]; + let unrolled = { + use optimizer::OptimizedExpr::*; + vec![OptimizedRule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + /* TODO possible room for improvement here: + * if the sequences were rolled out in the opposite + * order, we could further optimize the strings + * in cases like this. + Str(String::from(("aa")), + Opt(Str(String::from("a"))) + */ + expr: box_tree!(Seq( + Str(String::from("a")), + Seq(Str(String::from("a")), Opt(Str(String::from("a")))) + )), + }] + }; + + assert_eq!(optimize(rules), unrolled); + } + + #[test] + fn concat_insensitive_strings() { + let rules = { + use ast::Expr::*; + vec![Rule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: box_tree!(Seq( + Seq(Insens(String::from("a")), Insens(String::from("b"))), + Seq(Insens(String::from("c")), Insens(String::from("d"))) + )), + }] + }; + let concatenated = vec![OptimizedRule { + name: "rule".to_owned(), + ty: RuleType::Atomic, + expr: OptimizedExpr::Insens(String::from("abcd")), + }]; + + assert_eq!(optimize(rules), concatenated); + } + + #[test] + fn long_common_sequence() { + let rules = { + use ast::Expr::*; + vec![Rule { + name: "rule".to_owned(), + ty: RuleType::Silent, + expr: box_tree!(Choice( + Seq( + Ident(String::from("a")), + Seq(Ident(String::from("b")), Ident(String::from("c"))) + ), + Seq( + Seq(Ident(String::from("a")), Ident(String::from("b"))), + Ident(String::from("d")) + ) + )), + }] + }; + let optimized = { + use optimizer::OptimizedExpr::*; + vec![OptimizedRule { + name: "rule".to_owned(), + ty: RuleType::Silent, + expr: box_tree!(Seq( + Ident(String::from("a")), + Seq( + Ident(String::from("b")), + Choice(Ident(String::from("c")), Ident(String::from("d"))) + ) + )), + }] + }; + + assert_eq!(optimize(rules), optimized); + } +} |