use std::fmt; use crate::ast::{self, Ast}; /// A trait for visiting an abstract syntax tree (AST) in depth first order. /// /// The principle aim of this trait is to enable callers to perform case /// analysis on an abstract syntax tree without necessarily using recursion. /// In particular, this permits callers to do case analysis with constant stack /// usage, which can be important since the size of an abstract syntax tree /// may be proportional to end user input. /// /// Typical usage of this trait involves providing an implementation and then /// running it using the [`visit`](fn.visit.html) function. /// /// Note that the abstract syntax tree for a regular expression is quite /// complex. Unless you specifically need it, you might be able to use the /// much simpler /// [high-level intermediate representation](../hir/struct.Hir.html) /// and its /// [corresponding `Visitor` trait](../hir/trait.Visitor.html) /// instead. pub trait Visitor { /// The result of visiting an AST. type Output; /// An error that visiting an AST might return. type Err; /// All implementors of `Visitor` must provide a `finish` method, which /// yields the result of visiting the AST or an error. fn finish(self) -> Result; /// This method is called before beginning traversal of the AST. fn start(&mut self) {} /// This method is called on an `Ast` before descending into child `Ast` /// nodes. fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> { Ok(()) } /// This method is called on an `Ast` after descending all of its child /// `Ast` nodes. fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> { Ok(()) } /// This method is called between child nodes of an /// [`Alternation`](struct.Alternation.html). fn visit_alternation_in(&mut self) -> Result<(), Self::Err> { Ok(()) } /// This method is called on every /// [`ClassSetItem`](enum.ClassSetItem.html) /// before descending into child nodes. fn visit_class_set_item_pre( &mut self, _ast: &ast::ClassSetItem, ) -> Result<(), Self::Err> { Ok(()) } /// This method is called on every /// [`ClassSetItem`](enum.ClassSetItem.html) /// after descending into child nodes. fn visit_class_set_item_post( &mut self, _ast: &ast::ClassSetItem, ) -> Result<(), Self::Err> { Ok(()) } /// This method is called on every /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html) /// before descending into child nodes. fn visit_class_set_binary_op_pre( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err> { Ok(()) } /// This method is called on every /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html) /// after descending into child nodes. fn visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err> { Ok(()) } /// This method is called between the left hand and right hand child nodes /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html). fn visit_class_set_binary_op_in( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err> { Ok(()) } } /// Executes an implementation of `Visitor` in constant stack space. /// /// This function will visit every node in the given `Ast` while calling the /// appropriate methods provided by the /// [`Visitor`](trait.Visitor.html) trait. /// /// The primary use case for this method is when one wants to perform case /// analysis over an `Ast` without using a stack size proportional to the depth /// of the `Ast`. Namely, this method will instead use constant stack size, but /// will use heap space proportional to the size of the `Ast`. This may be /// desirable in cases where the size of `Ast` is proportional to end user /// input. /// /// If the visitor returns an error at any point, then visiting is stopped and /// the error is returned. pub fn visit(ast: &Ast, visitor: V) -> Result { HeapVisitor::new().visit(ast, visitor) } /// HeapVisitor visits every item in an `Ast` recursively using constant stack /// size and a heap size proportional to the size of the `Ast`. struct HeapVisitor<'a> { /// A stack of `Ast` nodes. This is roughly analogous to the call stack /// used in a typical recursive visitor. stack: Vec<(&'a Ast, Frame<'a>)>, /// Similar to the `Ast` stack above, but is used only for character /// classes. In particular, character classes embed their own mini /// recursive syntax. stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>, } /// Represents a single stack frame while performing structural induction over /// an `Ast`. enum Frame<'a> { /// A stack frame allocated just before descending into a repetition /// operator's child node. Repetition(&'a ast::Repetition), /// A stack frame allocated just before descending into a group's child /// node. Group(&'a ast::Group), /// The stack frame used while visiting every child node of a concatenation /// of expressions. Concat { /// The child node we are currently visiting. head: &'a Ast, /// The remaining child nodes to visit (which may be empty). tail: &'a [Ast], }, /// The stack frame used while visiting every child node of an alternation /// of expressions. Alternation { /// The child node we are currently visiting. head: &'a Ast, /// The remaining child nodes to visit (which may be empty). tail: &'a [Ast], }, } /// Represents a single stack frame while performing structural induction over /// a character class. enum ClassFrame<'a> { /// The stack frame used while visiting every child node of a union of /// character class items. Union { /// The child node we are currently visiting. head: &'a ast::ClassSetItem, /// The remaining child nodes to visit (which may be empty). tail: &'a [ast::ClassSetItem], }, /// The stack frame used while a binary class operation. Binary { op: &'a ast::ClassSetBinaryOp }, /// A stack frame allocated just before descending into a binary operator's /// left hand child node. BinaryLHS { op: &'a ast::ClassSetBinaryOp, lhs: &'a ast::ClassSet, rhs: &'a ast::ClassSet, }, /// A stack frame allocated just before descending into a binary operator's /// right hand child node. BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet }, } /// A representation of the inductive step when performing structural induction /// over a character class. /// /// Note that there is no analogous explicit type for the inductive step for /// `Ast` nodes because the inductive step is just an `Ast`. For character /// classes, the inductive step can produce one of two possible child nodes: /// an item or a binary operation. (An item cannot be a binary operation /// because that would imply binary operations can be unioned in the concrete /// syntax, which is not possible.) enum ClassInduct<'a> { Item(&'a ast::ClassSetItem), BinaryOp(&'a ast::ClassSetBinaryOp), } impl<'a> HeapVisitor<'a> { fn new() -> HeapVisitor<'a> { HeapVisitor { stack: vec![], stack_class: vec![] } } fn visit( &mut self, mut ast: &'a Ast, mut visitor: V, ) -> Result { self.stack.clear(); self.stack_class.clear(); visitor.start(); loop { visitor.visit_pre(ast)?; if let Some(x) = self.induct(ast, &mut visitor)? { let child = x.child(); self.stack.push((ast, x)); ast = child; continue; } // No induction means we have a base case, so we can post visit // it now. visitor.visit_post(ast)?; // At this point, we now try to pop our call stack until it is // either empty or we hit another inductive case. loop { let (post_ast, frame) = match self.stack.pop() { None => return visitor.finish(), Some((post_ast, frame)) => (post_ast, frame), }; // If this is a concat/alternate, then we might have additional // inductive steps to process. if let Some(x) = self.pop(frame) { if let Frame::Alternation { .. } = x { visitor.visit_alternation_in()?; } ast = x.child(); self.stack.push((post_ast, x)); break; } // Otherwise, we've finished visiting all the child nodes for // this AST, so we can post visit it now. visitor.visit_post(post_ast)?; } } } /// Build a stack frame for the given AST if one is needed (which occurs if /// and only if there are child nodes in the AST). Otherwise, return None. /// /// If this visits a class, then the underlying visitor implementation may /// return an error which will be passed on here. fn induct( &mut self, ast: &'a Ast, visitor: &mut V, ) -> Result>, V::Err> { Ok(match *ast { Ast::Class(ast::Class::Bracketed(ref x)) => { self.visit_class(x, visitor)?; None } Ast::Repetition(ref x) => Some(Frame::Repetition(x)), Ast::Group(ref x) => Some(Frame::Group(x)), Ast::Concat(ref x) if x.asts.is_empty() => None, Ast::Concat(ref x) => { Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] }) } Ast::Alternation(ref x) if x.asts.is_empty() => None, Ast::Alternation(ref x) => Some(Frame::Alternation { head: &x.asts[0], tail: &x.asts[1..], }), _ => None, }) } /// Pops the given frame. If the frame has an additional inductive step, /// then return it, otherwise return `None`. fn pop(&self, induct: Frame<'a>) -> Option> { match induct { Frame::Repetition(_) => None, Frame::Group(_) => None, Frame::Concat { tail, .. } => { if tail.is_empty() { None } else { Some(Frame::Concat { head: &tail[0], tail: &tail[1..] }) } } Frame::Alternation { tail, .. } => { if tail.is_empty() { None } else { Some(Frame::Alternation { head: &tail[0], tail: &tail[1..], }) } } } } fn visit_class( &mut self, ast: &'a ast::ClassBracketed, visitor: &mut V, ) -> Result<(), V::Err> { let mut ast = ClassInduct::from_bracketed(ast); loop { self.visit_class_pre(&ast, visitor)?; if let Some(x) = self.induct_class(&ast) { let child = x.child(); self.stack_class.push((ast, x)); ast = child; continue; } self.visit_class_post(&ast, visitor)?; // At this point, we now try to pop our call stack until it is // either empty or we hit another inductive case. loop { let (post_ast, frame) = match self.stack_class.pop() { None => return Ok(()), Some((post_ast, frame)) => (post_ast, frame), }; // If this is a union or a binary op, then we might have // additional inductive steps to process. if let Some(x) = self.pop_class(frame) { if let ClassFrame::BinaryRHS { ref op, .. } = x { visitor.visit_class_set_binary_op_in(op)?; } ast = x.child(); self.stack_class.push((post_ast, x)); break; } // Otherwise, we've finished visiting all the child nodes for // this class node, so we can post visit it now. self.visit_class_post(&post_ast, visitor)?; } } } /// Call the appropriate `Visitor` methods given an inductive step. fn visit_class_pre( &self, ast: &ClassInduct<'a>, visitor: &mut V, ) -> Result<(), V::Err> { match *ast { ClassInduct::Item(item) => { visitor.visit_class_set_item_pre(item)?; } ClassInduct::BinaryOp(op) => { visitor.visit_class_set_binary_op_pre(op)?; } } Ok(()) } /// Call the appropriate `Visitor` methods given an inductive step. fn visit_class_post( &self, ast: &ClassInduct<'a>, visitor: &mut V, ) -> Result<(), V::Err> { match *ast { ClassInduct::Item(item) => { visitor.visit_class_set_item_post(item)?; } ClassInduct::BinaryOp(op) => { visitor.visit_class_set_binary_op_post(op)?; } } Ok(()) } /// Build a stack frame for the given class node if one is needed (which /// occurs if and only if there are child nodes). Otherwise, return None. fn induct_class(&self, ast: &ClassInduct<'a>) -> Option> { match *ast { ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => { match x.kind { ast::ClassSet::Item(ref item) => { Some(ClassFrame::Union { head: item, tail: &[] }) } ast::ClassSet::BinaryOp(ref op) => { Some(ClassFrame::Binary { op }) } } } ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => { if x.items.is_empty() { None } else { Some(ClassFrame::Union { head: &x.items[0], tail: &x.items[1..], }) } } ClassInduct::BinaryOp(op) => { Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs }) } _ => None, } } /// Pops the given frame. If the frame has an additional inductive step, /// then return it, otherwise return `None`. fn pop_class(&self, induct: ClassFrame<'a>) -> Option> { match induct { ClassFrame::Union { tail, .. } => { if tail.is_empty() { None } else { Some(ClassFrame::Union { head: &tail[0], tail: &tail[1..], }) } } ClassFrame::Binary { .. } => None, ClassFrame::BinaryLHS { op, rhs, .. } => { Some(ClassFrame::BinaryRHS { op, rhs }) } ClassFrame::BinaryRHS { .. } => None, } } } impl<'a> Frame<'a> { /// Perform the next inductive step on this frame and return the next /// child AST node to visit. fn child(&self) -> &'a Ast { match *self { Frame::Repetition(rep) => &rep.ast, Frame::Group(group) => &group.ast, Frame::Concat { head, .. } => head, Frame::Alternation { head, .. } => head, } } } impl<'a> ClassFrame<'a> { /// Perform the next inductive step on this frame and return the next /// child class node to visit. fn child(&self) -> ClassInduct<'a> { match *self { ClassFrame::Union { head, .. } => ClassInduct::Item(head), ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op), ClassFrame::BinaryLHS { ref lhs, .. } => { ClassInduct::from_set(lhs) } ClassFrame::BinaryRHS { ref rhs, .. } => { ClassInduct::from_set(rhs) } } } } impl<'a> ClassInduct<'a> { fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> { ClassInduct::from_set(&ast.kind) } fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> { match *ast { ast::ClassSet::Item(ref item) => ClassInduct::Item(item), ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op), } } } impl<'a> fmt::Debug for ClassFrame<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let x = match *self { ClassFrame::Union { .. } => "Union", ClassFrame::Binary { .. } => "Binary", ClassFrame::BinaryLHS { .. } => "BinaryLHS", ClassFrame::BinaryRHS { .. } => "BinaryRHS", }; write!(f, "{}", x) } } impl<'a> fmt::Debug for ClassInduct<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let x = match *self { ClassInduct::Item(it) => match *it { ast::ClassSetItem::Empty(_) => "Item(Empty)", ast::ClassSetItem::Literal(_) => "Item(Literal)", ast::ClassSetItem::Range(_) => "Item(Range)", ast::ClassSetItem::Ascii(_) => "Item(Ascii)", ast::ClassSetItem::Perl(_) => "Item(Perl)", ast::ClassSetItem::Unicode(_) => "Item(Unicode)", ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)", ast::ClassSetItem::Union(_) => "Item(Union)", }, ClassInduct::BinaryOp(it) => match it.kind { ast::ClassSetBinaryOpKind::Intersection => { "BinaryOp(Intersection)" } ast::ClassSetBinaryOpKind::Difference => { "BinaryOp(Difference)" } ast::ClassSetBinaryOpKind::SymmetricDifference => { "BinaryOp(SymmetricDifference)" } }, }; write!(f, "{}", x) } }