summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regex-syntax/src/ast/visitor.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/regex-syntax/src/ast/visitor.rs')
-rw-r--r--third_party/rust/regex-syntax/src/ast/visitor.rs517
1 files changed, 517 insertions, 0 deletions
diff --git a/third_party/rust/regex-syntax/src/ast/visitor.rs b/third_party/rust/regex-syntax/src/ast/visitor.rs
new file mode 100644
index 0000000000..78ee487cff
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/ast/visitor.rs
@@ -0,0 +1,517 @@
+use std::fmt;
+
+use crate::ast::{self, Ast};
+
+/// A trait for visiting an abstract syntax tree (AST) in depth first order.
+///
+/// The principle aim of this trait is to enable callers to perform case
+/// analysis on an abstract syntax tree without necessarily using recursion.
+/// In particular, this permits callers to do case analysis with constant stack
+/// usage, which can be important since the size of an abstract syntax tree
+/// may be proportional to end user input.
+///
+/// Typical usage of this trait involves providing an implementation and then
+/// running it using the [`visit`](fn.visit.html) function.
+///
+/// Note that the abstract syntax tree for a regular expression is quite
+/// complex. Unless you specifically need it, you might be able to use the
+/// much simpler
+/// [high-level intermediate representation](../hir/struct.Hir.html)
+/// and its
+/// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
+/// instead.
+pub trait Visitor {
+ /// The result of visiting an AST.
+ type Output;
+ /// An error that visiting an AST might return.
+ type Err;
+
+ /// All implementors of `Visitor` must provide a `finish` method, which
+ /// yields the result of visiting the AST or an error.
+ fn finish(self) -> Result<Self::Output, Self::Err>;
+
+ /// This method is called before beginning traversal of the AST.
+ fn start(&mut self) {}
+
+ /// This method is called on an `Ast` before descending into child `Ast`
+ /// nodes.
+ fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on an `Ast` after descending all of its child
+ /// `Ast` nodes.
+ fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called between child nodes of an
+ /// [`Alternation`](struct.Alternation.html).
+ fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on every
+ /// [`ClassSetItem`](enum.ClassSetItem.html)
+ /// before descending into child nodes.
+ fn visit_class_set_item_pre(
+ &mut self,
+ _ast: &ast::ClassSetItem,
+ ) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on every
+ /// [`ClassSetItem`](enum.ClassSetItem.html)
+ /// after descending into child nodes.
+ fn visit_class_set_item_post(
+ &mut self,
+ _ast: &ast::ClassSetItem,
+ ) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on every
+ /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
+ /// before descending into child nodes.
+ fn visit_class_set_binary_op_pre(
+ &mut self,
+ _ast: &ast::ClassSetBinaryOp,
+ ) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called on every
+ /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
+ /// after descending into child nodes.
+ fn visit_class_set_binary_op_post(
+ &mut self,
+ _ast: &ast::ClassSetBinaryOp,
+ ) -> Result<(), Self::Err> {
+ Ok(())
+ }
+
+ /// This method is called between the left hand and right hand child nodes
+ /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
+ fn visit_class_set_binary_op_in(
+ &mut self,
+ _ast: &ast::ClassSetBinaryOp,
+ ) -> Result<(), Self::Err> {
+ Ok(())
+ }
+}
+
+/// Executes an implementation of `Visitor` in constant stack space.
+///
+/// This function will visit every node in the given `Ast` while calling the
+/// appropriate methods provided by the
+/// [`Visitor`](trait.Visitor.html) trait.
+///
+/// The primary use case for this method is when one wants to perform case
+/// analysis over an `Ast` without using a stack size proportional to the depth
+/// of the `Ast`. Namely, this method will instead use constant stack size, but
+/// will use heap space proportional to the size of the `Ast`. This may be
+/// desirable in cases where the size of `Ast` is proportional to end user
+/// input.
+///
+/// If the visitor returns an error at any point, then visiting is stopped and
+/// the error is returned.
+pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
+ HeapVisitor::new().visit(ast, visitor)
+}
+
+/// HeapVisitor visits every item in an `Ast` recursively using constant stack
+/// size and a heap size proportional to the size of the `Ast`.
+struct HeapVisitor<'a> {
+ /// A stack of `Ast` nodes. This is roughly analogous to the call stack
+ /// used in a typical recursive visitor.
+ stack: Vec<(&'a Ast, Frame<'a>)>,
+ /// Similar to the `Ast` stack above, but is used only for character
+ /// classes. In particular, character classes embed their own mini
+ /// recursive syntax.
+ stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// an `Ast`.
+enum Frame<'a> {
+ /// A stack frame allocated just before descending into a repetition
+ /// operator's child node.
+ Repetition(&'a ast::Repetition),
+ /// A stack frame allocated just before descending into a group's child
+ /// node.
+ Group(&'a ast::Group),
+ /// The stack frame used while visiting every child node of a concatenation
+ /// of expressions.
+ Concat {
+ /// The child node we are currently visiting.
+ head: &'a Ast,
+ /// The remaining child nodes to visit (which may be empty).
+ tail: &'a [Ast],
+ },
+ /// The stack frame used while visiting every child node of an alternation
+ /// of expressions.
+ Alternation {
+ /// The child node we are currently visiting.
+ head: &'a Ast,
+ /// The remaining child nodes to visit (which may be empty).
+ tail: &'a [Ast],
+ },
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// a character class.
+enum ClassFrame<'a> {
+ /// The stack frame used while visiting every child node of a union of
+ /// character class items.
+ Union {
+ /// The child node we are currently visiting.
+ head: &'a ast::ClassSetItem,
+ /// The remaining child nodes to visit (which may be empty).
+ tail: &'a [ast::ClassSetItem],
+ },
+ /// The stack frame used while a binary class operation.
+ Binary { op: &'a ast::ClassSetBinaryOp },
+ /// A stack frame allocated just before descending into a binary operator's
+ /// left hand child node.
+ BinaryLHS {
+ op: &'a ast::ClassSetBinaryOp,
+ lhs: &'a ast::ClassSet,
+ rhs: &'a ast::ClassSet,
+ },
+ /// A stack frame allocated just before descending into a binary operator's
+ /// right hand child node.
+ BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
+}
+
+/// A representation of the inductive step when performing structural induction
+/// over a character class.
+///
+/// Note that there is no analogous explicit type for the inductive step for
+/// `Ast` nodes because the inductive step is just an `Ast`. For character
+/// classes, the inductive step can produce one of two possible child nodes:
+/// an item or a binary operation. (An item cannot be a binary operation
+/// because that would imply binary operations can be unioned in the concrete
+/// syntax, which is not possible.)
+enum ClassInduct<'a> {
+ Item(&'a ast::ClassSetItem),
+ BinaryOp(&'a ast::ClassSetBinaryOp),
+}
+
+impl<'a> HeapVisitor<'a> {
+ fn new() -> HeapVisitor<'a> {
+ HeapVisitor { stack: vec![], stack_class: vec![] }
+ }
+
+ fn visit<V: Visitor>(
+ &mut self,
+ mut ast: &'a Ast,
+ mut visitor: V,
+ ) -> Result<V::Output, V::Err> {
+ self.stack.clear();
+ self.stack_class.clear();
+
+ visitor.start();
+ loop {
+ visitor.visit_pre(ast)?;
+ if let Some(x) = self.induct(ast, &mut visitor)? {
+ let child = x.child();
+ self.stack.push((ast, x));
+ ast = child;
+ continue;
+ }
+ // No induction means we have a base case, so we can post visit
+ // it now.
+ visitor.visit_post(ast)?;
+
+ // At this point, we now try to pop our call stack until it is
+ // either empty or we hit another inductive case.
+ loop {
+ let (post_ast, frame) = match self.stack.pop() {
+ None => return visitor.finish(),
+ Some((post_ast, frame)) => (post_ast, frame),
+ };
+ // If this is a concat/alternate, then we might have additional
+ // inductive steps to process.
+ if let Some(x) = self.pop(frame) {
+ if let Frame::Alternation { .. } = x {
+ visitor.visit_alternation_in()?;
+ }
+ ast = x.child();
+ self.stack.push((post_ast, x));
+ break;
+ }
+ // Otherwise, we've finished visiting all the child nodes for
+ // this AST, so we can post visit it now.
+ visitor.visit_post(post_ast)?;
+ }
+ }
+ }
+
+ /// Build a stack frame for the given AST if one is needed (which occurs if
+ /// and only if there are child nodes in the AST). Otherwise, return None.
+ ///
+ /// If this visits a class, then the underlying visitor implementation may
+ /// return an error which will be passed on here.
+ fn induct<V: Visitor>(
+ &mut self,
+ ast: &'a Ast,
+ visitor: &mut V,
+ ) -> Result<Option<Frame<'a>>, V::Err> {
+ Ok(match *ast {
+ Ast::Class(ast::Class::Bracketed(ref x)) => {
+ self.visit_class(x, visitor)?;
+ None
+ }
+ Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
+ Ast::Group(ref x) => Some(Frame::Group(x)),
+ Ast::Concat(ref x) if x.asts.is_empty() => None,
+ Ast::Concat(ref x) => {
+ Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
+ }
+ Ast::Alternation(ref x) if x.asts.is_empty() => None,
+ Ast::Alternation(ref x) => Some(Frame::Alternation {
+ head: &x.asts[0],
+ tail: &x.asts[1..],
+ }),
+ _ => None,
+ })
+ }
+
+ /// Pops the given frame. If the frame has an additional inductive step,
+ /// then return it, otherwise return `None`.
+ fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
+ match induct {
+ Frame::Repetition(_) => None,
+ Frame::Group(_) => None,
+ Frame::Concat { tail, .. } => {
+ if tail.is_empty() {
+ None
+ } else {
+ Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
+ }
+ }
+ Frame::Alternation { tail, .. } => {
+ if tail.is_empty() {
+ None
+ } else {
+ Some(Frame::Alternation {
+ head: &tail[0],
+ tail: &tail[1..],
+ })
+ }
+ }
+ }
+ }
+
+ fn visit_class<V: Visitor>(
+ &mut self,
+ ast: &'a ast::ClassBracketed,
+ visitor: &mut V,
+ ) -> Result<(), V::Err> {
+ let mut ast = ClassInduct::from_bracketed(ast);
+ loop {
+ self.visit_class_pre(&ast, visitor)?;
+ if let Some(x) = self.induct_class(&ast) {
+ let child = x.child();
+ self.stack_class.push((ast, x));
+ ast = child;
+ continue;
+ }
+ self.visit_class_post(&ast, visitor)?;
+
+ // At this point, we now try to pop our call stack until it is
+ // either empty or we hit another inductive case.
+ loop {
+ let (post_ast, frame) = match self.stack_class.pop() {
+ None => return Ok(()),
+ Some((post_ast, frame)) => (post_ast, frame),
+ };
+ // If this is a union or a binary op, then we might have
+ // additional inductive steps to process.
+ if let Some(x) = self.pop_class(frame) {
+ if let ClassFrame::BinaryRHS { ref op, .. } = x {
+ visitor.visit_class_set_binary_op_in(op)?;
+ }
+ ast = x.child();
+ self.stack_class.push((post_ast, x));
+ break;
+ }
+ // Otherwise, we've finished visiting all the child nodes for
+ // this class node, so we can post visit it now.
+ self.visit_class_post(&post_ast, visitor)?;
+ }
+ }
+ }
+
+ /// Call the appropriate `Visitor` methods given an inductive step.
+ fn visit_class_pre<V: Visitor>(
+ &self,
+ ast: &ClassInduct<'a>,
+ visitor: &mut V,
+ ) -> Result<(), V::Err> {
+ match *ast {
+ ClassInduct::Item(item) => {
+ visitor.visit_class_set_item_pre(item)?;
+ }
+ ClassInduct::BinaryOp(op) => {
+ visitor.visit_class_set_binary_op_pre(op)?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Call the appropriate `Visitor` methods given an inductive step.
+ fn visit_class_post<V: Visitor>(
+ &self,
+ ast: &ClassInduct<'a>,
+ visitor: &mut V,
+ ) -> Result<(), V::Err> {
+ match *ast {
+ ClassInduct::Item(item) => {
+ visitor.visit_class_set_item_post(item)?;
+ }
+ ClassInduct::BinaryOp(op) => {
+ visitor.visit_class_set_binary_op_post(op)?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Build a stack frame for the given class node if one is needed (which
+ /// occurs if and only if there are child nodes). Otherwise, return None.
+ fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
+ match *ast {
+ ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
+ match x.kind {
+ ast::ClassSet::Item(ref item) => {
+ Some(ClassFrame::Union { head: item, tail: &[] })
+ }
+ ast::ClassSet::BinaryOp(ref op) => {
+ Some(ClassFrame::Binary { op })
+ }
+ }
+ }
+ ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
+ if x.items.is_empty() {
+ None
+ } else {
+ Some(ClassFrame::Union {
+ head: &x.items[0],
+ tail: &x.items[1..],
+ })
+ }
+ }
+ ClassInduct::BinaryOp(op) => {
+ Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs })
+ }
+ _ => None,
+ }
+ }
+
+ /// Pops the given frame. If the frame has an additional inductive step,
+ /// then return it, otherwise return `None`.
+ fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
+ match induct {
+ ClassFrame::Union { tail, .. } => {
+ if tail.is_empty() {
+ None
+ } else {
+ Some(ClassFrame::Union {
+ head: &tail[0],
+ tail: &tail[1..],
+ })
+ }
+ }
+ ClassFrame::Binary { .. } => None,
+ ClassFrame::BinaryLHS { op, rhs, .. } => {
+ Some(ClassFrame::BinaryRHS { op, rhs })
+ }
+ ClassFrame::BinaryRHS { .. } => None,
+ }
+ }
+}
+
+impl<'a> Frame<'a> {
+ /// Perform the next inductive step on this frame and return the next
+ /// child AST node to visit.
+ fn child(&self) -> &'a Ast {
+ match *self {
+ Frame::Repetition(rep) => &rep.ast,
+ Frame::Group(group) => &group.ast,
+ Frame::Concat { head, .. } => head,
+ Frame::Alternation { head, .. } => head,
+ }
+ }
+}
+
+impl<'a> ClassFrame<'a> {
+ /// Perform the next inductive step on this frame and return the next
+ /// child class node to visit.
+ fn child(&self) -> ClassInduct<'a> {
+ match *self {
+ ClassFrame::Union { head, .. } => ClassInduct::Item(head),
+ ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
+ ClassFrame::BinaryLHS { ref lhs, .. } => {
+ ClassInduct::from_set(lhs)
+ }
+ ClassFrame::BinaryRHS { ref rhs, .. } => {
+ ClassInduct::from_set(rhs)
+ }
+ }
+ }
+}
+
+impl<'a> ClassInduct<'a> {
+ fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
+ ClassInduct::from_set(&ast.kind)
+ }
+
+ fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
+ match *ast {
+ ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
+ ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
+ }
+ }
+}
+
+impl<'a> fmt::Debug for ClassFrame<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let x = match *self {
+ ClassFrame::Union { .. } => "Union",
+ ClassFrame::Binary { .. } => "Binary",
+ ClassFrame::BinaryLHS { .. } => "BinaryLHS",
+ ClassFrame::BinaryRHS { .. } => "BinaryRHS",
+ };
+ write!(f, "{}", x)
+ }
+}
+
+impl<'a> fmt::Debug for ClassInduct<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let x = match *self {
+ ClassInduct::Item(it) => match *it {
+ ast::ClassSetItem::Empty(_) => "Item(Empty)",
+ ast::ClassSetItem::Literal(_) => "Item(Literal)",
+ ast::ClassSetItem::Range(_) => "Item(Range)",
+ ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
+ ast::ClassSetItem::Perl(_) => "Item(Perl)",
+ ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
+ ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
+ ast::ClassSetItem::Union(_) => "Item(Union)",
+ },
+ ClassInduct::BinaryOp(it) => match it.kind {
+ ast::ClassSetBinaryOpKind::Intersection => {
+ "BinaryOp(Intersection)"
+ }
+ ast::ClassSetBinaryOpKind::Difference => {
+ "BinaryOp(Difference)"
+ }
+ ast::ClassSetBinaryOpKind::SymmetricDifference => {
+ "BinaryOp(SymmetricDifference)"
+ }
+ },
+ };
+ write!(f, "{}", x)
+ }
+}