1 files changed, 3724 insertions, 0 deletions
diff --git a/vendor/regex-syntax/src/hir/translate.rs b/vendor/regex-syntax/src/hir/translate.rs
new file mode 100644
index 0000000..313a1e9
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/translate.rs
@@ -0,0 +1,3724 @@
+/*!
+Defines a translator that converts an `Ast` to an `Hir`.
+*/
+
+use core::cell::{Cell, RefCell};
+
+use alloc::{boxed::Box, string::ToString, vec, vec::Vec};
+
+use crate::{
+    ast::{self, Ast, Span, Visitor},
+    either::Either,
+    hir::{self, Error, ErrorKind, Hir, HirKind},
+    unicode::{self, ClassQuery},
+};
+
+type Result<T> = core::result::Result<T, Error>;
+
+/// A builder for constructing an AST->HIR translator.
+#[derive(Clone, Debug)]
+pub struct TranslatorBuilder {
+    utf8: bool,
+    line_terminator: u8,
+    flags: Flags,
+}
+
+impl Default for TranslatorBuilder {
+    fn default() -> TranslatorBuilder {
+        TranslatorBuilder::new()
+    }
+}
+
+impl TranslatorBuilder {
+    /// Create a new translator builder with a default c onfiguration.
+    pub fn new() -> TranslatorBuilder {
+        TranslatorBuilder {
+            utf8: true,
+            line_terminator: b'\n',
+            flags: Flags::default(),
+        }
+    }
+
+    /// Build a translator using the current configuration.
+    pub fn build(&self) -> Translator {
+        Translator {
+            stack: RefCell::new(vec![]),
+            flags: Cell::new(self.flags),
+            utf8: self.utf8,
+            line_terminator: self.line_terminator,
+        }
+    }
+
+    /// When disabled, translation will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// When enabled (the default), the translator is guaranteed to produce an
+    /// expression that, for non-empty matches, will only ever produce spans
+    /// that are entirely valid UTF-8 (otherwise, the translator will return an
+    /// error).
+    ///
+    /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
+    /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
+    /// syntax) will be allowed even though they can produce matches that split
+    /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
+    /// matches, and it is expected that the regex engine itself must handle
+    /// these cases if necessary (perhaps by suppressing any zero-width matches
+    /// that split a codepoint).
+    pub fn utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.utf8 = yes;
+        self
+    }
+
+    /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
+    ///
+    /// Namely, instead of `.` (by default) matching everything except for `\n`,
+    /// this will cause `.` to match everything except for the byte given.
+    ///
+    /// If `.` is used in a context where Unicode mode is enabled and this byte
+    /// isn't ASCII, then an error will be returned. When Unicode mode is
+    /// disabled, then any byte is permitted, but will return an error if UTF-8
+    /// mode is enabled and it is a non-ASCII byte.
+    ///
+    /// In short, any ASCII value for a line terminator is always okay. But a
+    /// non-ASCII byte might result in an error depending on whether Unicode
+    /// mode or UTF-8 mode are enabled.
+    ///
+    /// Note that if `R` mode is enabled then it always takes precedence and
+    /// the line terminator will be treated as `\r` and `\n` simultaneously.
+    ///
+    /// Note also that this *doesn't* impact the look-around assertions
+    /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
+    /// configuration in the regex engine itself.
+    pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder {
+        self.line_terminator = byte;
+        self
+    }
+
+    /// Enable or disable the case insensitive flag (`i`) by default.
+    pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.case_insensitive = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag (`m`) by default.
+    pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.multi_line = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag (`s`) by
+    /// default.
+    pub fn dot_matches_new_line(
+        &mut self,
+        yes: bool,
+    ) -> &mut TranslatorBuilder {
+        self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the CRLF mode flag (`R`) by default.
+    pub fn crlf(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.crlf = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag (`U`) by default.
+    pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.swap_greed = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.unicode = if yes { None } else { Some(false) };
+        self
+    }
+}
+
+/// A translator maps abstract syntax to a high level intermediate
+/// representation.
+///
+/// A translator may be benefit from reuse. That is, a translator can translate
+/// many abstract syntax trees.
+///
+/// A `Translator` can be configured in more detail via a
+/// [`TranslatorBuilder`].
+#[derive(Clone, Debug)]
+pub struct Translator {
+    /// Our call stack, but on the heap.
+    stack: RefCell<Vec<HirFrame>>,
+    /// The current flag settings.
+    flags: Cell<Flags>,
+    /// Whether we're allowed to produce HIR that can match arbitrary bytes.
+    utf8: bool,
+    /// The line terminator to use for `.`.
+    line_terminator: u8,
+}
+
+impl Translator {
+    /// Create a new translator using the default configuration.
+    pub fn new() -> Translator {
+        TranslatorBuilder::new().build()
+    }
+
+    /// Translate the given abstract syntax tree (AST) into a high level
+    /// intermediate representation (HIR).
+    ///
+    /// If there was a problem doing the translation, then an HIR-specific
+    /// error is returned.
+    ///
+    /// The original pattern string used to produce the `Ast` *must* also be
+    /// provided. The translator does not use the pattern string during any
+    /// correct translation, but is used for error reporting.
+    pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
+        ast::visit(ast, TranslatorI::new(self, pattern))
+    }
+}
+
+/// An HirFrame is a single stack frame, represented explicitly, which is
+/// created for each item in the Ast that we traverse.
+///
+/// Note that technically, this type doesn't represent our entire stack
+/// frame. In particular, the Ast visitor represents any state associated with
+/// traversing the Ast itself.
+#[derive(Clone, Debug)]
+enum HirFrame {
+    /// An arbitrary HIR expression. These get pushed whenever we hit a base
+    /// case in the Ast. They get popped after an inductive (i.e., recursive)
+    /// step is complete.
+    Expr(Hir),
+    /// A literal that is being constructed, character by character, from the
+    /// AST. We need this because the AST gives each individual character its
+    /// own node. So as we see characters, we peek at the top-most HirFrame.
+    /// If it's a literal, then we add to it. Otherwise, we push a new literal.
+    /// When it comes time to pop it, we convert it to an Hir via Hir::literal.
+    Literal(Vec<u8>),
+    /// A Unicode character class. This frame is mutated as we descend into
+    /// the Ast of a character class (which is itself its own mini recursive
+    /// structure).
+    ClassUnicode(hir::ClassUnicode),
+    /// A byte-oriented character class. This frame is mutated as we descend
+    /// into the Ast of a character class (which is itself its own mini
+    /// recursive structure).
+    ///
+    /// Byte character classes are created when Unicode mode (`u`) is disabled.
+    /// If `utf8` is enabled (the default), then a byte character is only
+    /// permitted to match ASCII text.
+    ClassBytes(hir::ClassBytes),
+    /// This is pushed whenever a repetition is observed. After visiting every
+    /// sub-expression in the repetition, the translator's stack is expected to
+    /// have this sentinel at the top.
+    ///
+    /// This sentinel only exists to stop other things (like flattening
+    /// literals) from reaching across repetition operators.
+    Repetition,
+    /// This is pushed on to the stack upon first seeing any kind of capture,
+    /// indicated by parentheses (including non-capturing groups). It is popped
+    /// upon leaving a group.
+    Group {
+        /// The old active flags when this group was opened.
+        ///
+        /// If this group sets flags, then the new active flags are set to the
+        /// result of merging the old flags with the flags introduced by this
+        /// group. If the group doesn't set any flags, then this is simply
+        /// equivalent to whatever flags were set when the group was opened.
+        ///
+        /// When this group is popped, the active flags should be restored to
+        /// the flags set here.
+        ///
+        /// The "active" flags correspond to whatever flags are set in the
+        /// Translator.
+        old_flags: Flags,
+    },
+    /// This is pushed whenever a concatenation is observed. After visiting
+    /// every sub-expression in the concatenation, the translator's stack is
+    /// popped until it sees a Concat frame.
+    Concat,
+    /// This is pushed whenever an alternation is observed. After visiting
+    /// every sub-expression in the alternation, the translator's stack is
+    /// popped until it sees an Alternation frame.
+    Alternation,
+    /// This is pushed immediately before each sub-expression in an
+    /// alternation. This separates the branches of an alternation on the
+    /// stack and prevents literal flattening from reaching across alternation
+    /// branches.
+    ///
+    /// It is popped after each expression in a branch until an 'Alternation'
+    /// frame is observed when doing a post visit on an alternation.
+    AlternationBranch,
+}
+
+impl HirFrame {
+    /// Assert that the current stack frame is an Hir expression and return it.
+    fn unwrap_expr(self) -> Hir {
+        match self {
+            HirFrame::Expr(expr) => expr,
+            HirFrame::Literal(lit) => Hir::literal(lit),
+            _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
+        }
+    }
+
+    /// Assert that the current stack frame is a Unicode class expression and
+    /// return it.
+    fn unwrap_class_unicode(self) -> hir::ClassUnicode {
+        match self {
+            HirFrame::ClassUnicode(cls) => cls,
+            _ => panic!(
+                "tried to unwrap Unicode class \
+                 from HirFrame, got: {:?}",
+                self
+            ),
+        }
+    }
+
+    /// Assert that the current stack frame is a byte class expression and
+    /// return it.
+    fn unwrap_class_bytes(self) -> hir::ClassBytes {
+        match self {
+            HirFrame::ClassBytes(cls) => cls,
+            _ => panic!(
+                "tried to unwrap byte class \
+                 from HirFrame, got: {:?}",
+                self
+            ),
+        }
+    }
+
+    /// Assert that the current stack frame is a repetition sentinel. If it
+    /// isn't, then panic.
+    fn unwrap_repetition(self) {
+        match self {
+            HirFrame::Repetition => {}
+            _ => {
+                panic!(
+                    "tried to unwrap repetition from HirFrame, got: {:?}",
+                    self
+                )
+            }
+        }
+    }
+
+    /// Assert that the current stack frame is a group indicator and return
+    /// its corresponding flags (the flags that were active at the time the
+    /// group was entered).
+    fn unwrap_group(self) -> Flags {
+        match self {
+            HirFrame::Group { old_flags } => old_flags,
+            _ => {
+                panic!("tried to unwrap group from HirFrame, got: {:?}", self)
+            }
+        }
+    }
+
+    /// Assert that the current stack frame is an alternation pipe sentinel. If
+    /// it isn't, then panic.
+    fn unwrap_alternation_pipe(self) {
+        match self {
+            HirFrame::AlternationBranch => {}
+            _ => {
+                panic!(
+                    "tried to unwrap alt pipe from HirFrame, got: {:?}",
+                    self
+                )
+            }
+        }
+    }
+}
+
+impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
+    type Output = Hir;
+    type Err = Error;
+
+    fn finish(self) -> Result<Hir> {
+        // ... otherwise, we should have exactly one HIR on the stack.
+        assert_eq!(self.trans().stack.borrow().len(), 1);
+        Ok(self.pop().unwrap().unwrap_expr())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::ClassBracketed(_) => {
+                if self.flags().unicode() {
+                    let cls = hir::ClassUnicode::empty();
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let cls = hir::ClassBytes::empty();
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            Ast::Repetition(_) => self.push(HirFrame::Repetition),
+            Ast::Group(ref x) => {
+                let old_flags = x
+                    .flags()
+                    .map(|ast| self.set_flags(ast))
+                    .unwrap_or_else(|| self.flags());
+                self.push(HirFrame::Group { old_flags });
+            }
+            Ast::Concat(_) => {
+                self.push(HirFrame::Concat);
+            }
+            Ast::Alternation(ref x) => {
+                self.push(HirFrame::Alternation);
+                if !x.asts.is_empty() {
+                    self.push(HirFrame::AlternationBranch);
+                }
+            }
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Empty(_) => {
+                self.push(HirFrame::Expr(Hir::empty()));
+            }
+            Ast::Flags(ref x) => {
+                self.set_flags(&x.flags);
+                // Flags in the AST are generally considered directives and
+                // not actual sub-expressions. However, they can be used in
+                // the concrete syntax like `((?i))`, and we need some kind of
+                // indication of an expression there, and Empty is the correct
+                // choice.
+                //
+                // There can also be things like `(?i)+`, but we rule those out
+                // in the parser. In the future, we might allow them for
+                // consistency sake.
+                self.push(HirFrame::Expr(Hir::empty()));
+            }
+            Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
+                Either::Right(byte) => self.push_byte(byte),
+                Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
+                    None => self.push_char(ch),
+                    Some(expr) => self.push(HirFrame::Expr(expr)),
+                },
+            },
+            Ast::Dot(ref span) => {
+                self.push(HirFrame::Expr(self.hir_dot(**span)?));
+            }
+            Ast::Assertion(ref x) => {
+                self.push(HirFrame::Expr(self.hir_assertion(x)?));
+            }
+            Ast::ClassPerl(ref x) => {
+                if self.flags().unicode() {
+                    let cls = self.hir_perl_unicode_class(x)?;
+                    let hcls = hir::Class::Unicode(cls);
+                    self.push(HirFrame::Expr(Hir::class(hcls)));
+                } else {
+                    let cls = self.hir_perl_byte_class(x)?;
+                    let hcls = hir::Class::Bytes(cls);
+                    self.push(HirFrame::Expr(Hir::class(hcls)));
+                }
+            }
+            Ast::ClassUnicode(ref x) => {
+                let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
+                self.push(HirFrame::Expr(Hir::class(cls)));
+            }
+            Ast::ClassBracketed(ref ast) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    self.unicode_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls,
+                    )?;
+                    let expr = Hir::class(hir::Class::Unicode(cls));
+                    self.push(HirFrame::Expr(expr));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    self.bytes_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls,
+                    )?;
+                    let expr = Hir::class(hir::Class::Bytes(cls));
+                    self.push(HirFrame::Expr(expr));
+                }
+            }
+            Ast::Repetition(ref x) => {
+                let expr = self.pop().unwrap().unwrap_expr();
+                self.pop().unwrap().unwrap_repetition();
+                self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
+            }
+            Ast::Group(ref x) => {
+                let expr = self.pop().unwrap().unwrap_expr();
+                let old_flags = self.pop().unwrap().unwrap_group();
+                self.trans().flags.set(old_flags);
+                self.push(HirFrame::Expr(self.hir_capture(x, expr)));
+            }
+            Ast::Concat(_) => {
+                let mut exprs = vec![];
+                while let Some(expr) = self.pop_concat_expr() {
+                    if !matches!(*expr.kind(), HirKind::Empty) {
+                        exprs.push(expr);
+                    }
+                }
+                exprs.reverse();
+                self.push(HirFrame::Expr(Hir::concat(exprs)));
+            }
+            Ast::Alternation(_) => {
+                let mut exprs = vec![];
+                while let Some(expr) = self.pop_alt_expr() {
+                    self.pop().unwrap().unwrap_alternation_pipe();
+                    exprs.push(expr);
+                }
+                exprs.reverse();
+                self.push(HirFrame::Expr(Hir::alternation(exprs)));
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_alternation_in(&mut self) -> Result<()> {
+        self.push(HirFrame::AlternationBranch);
+        Ok(())
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Bracketed(_) => {
+                if self.flags().unicode() {
+                    let cls = hir::ClassUnicode::empty();
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let cls = hir::ClassBytes::empty();
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            // We needn't handle the Union case here since the visitor will
+            // do it for us.
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Empty(_) => {}
+            ast::ClassSetItem::Literal(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    let byte = self.class_literal_byte(x)?;
+                    cls.push(hir::ClassBytesRange::new(byte, byte));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Range(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    let start = self.class_literal_byte(&x.start)?;
+                    let end = self.class_literal_byte(&x.end)?;
+                    cls.push(hir::ClassBytesRange::new(start, end));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Ascii(ref x) => {
+                if self.flags().unicode() {
+                    let xcls = self.hir_ascii_unicode_class(x)?;
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let xcls = self.hir_ascii_byte_class(x)?;
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Unicode(ref x) => {
+                let xcls = self.hir_unicode_class(x)?;
+                let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                cls.union(&xcls);
+                self.push(HirFrame::ClassUnicode(cls));
+            }
+            ast::ClassSetItem::Perl(ref x) => {
+                if self.flags().unicode() {
+                    let xcls = self.hir_perl_unicode_class(x)?;
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let xcls = self.hir_perl_byte_class(x)?;
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Bracketed(ref ast) => {
+                if self.flags().unicode() {
+                    let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
+                    self.unicode_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls1,
+                    )?;
+
+                    let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
+                    cls2.union(&cls1);
+                    self.push(HirFrame::ClassUnicode(cls2));
+                } else {
+                    let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
+                    self.bytes_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls1,
+                    )?;
+
+                    let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
+                    cls2.union(&cls1);
+                    self.push(HirFrame::ClassBytes(cls2));
+                }
+            }
+            // This is handled automatically by the visitor.
+            ast::ClassSetItem::Union(_) => {}
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        _op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        if self.flags().unicode() {
+            let cls = hir::ClassUnicode::empty();
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let cls = hir::ClassBytes::empty();
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        _op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        if self.flags().unicode() {
+            let cls = hir::ClassUnicode::empty();
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let cls = hir::ClassBytes::empty();
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        use crate::ast::ClassSetBinaryOpKind::*;
+
+        if self.flags().unicode() {
+            let mut rhs = self.pop().unwrap().unwrap_class_unicode();
+            let mut lhs = self.pop().unwrap().unwrap_class_unicode();
+            let mut cls = self.pop().unwrap().unwrap_class_unicode();
+            if self.flags().case_insensitive() {
+                rhs.try_case_fold_simple().map_err(|_| {
+                    self.error(
+                        op.rhs.span().clone(),
+                        ErrorKind::UnicodeCaseUnavailable,
+                    )
+                })?;
+                lhs.try_case_fold_simple().map_err(|_| {
+                    self.error(
+                        op.lhs.span().clone(),
+                        ErrorKind::UnicodeCaseUnavailable,
+                    )
+                })?;
+            }
+            match op.kind {
+                Intersection => lhs.intersect(&rhs),
+                Difference => lhs.difference(&rhs),
+                SymmetricDifference => lhs.symmetric_difference(&rhs),
+            }
+            cls.union(&lhs);
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let mut rhs = self.pop().unwrap().unwrap_class_bytes();
+            let mut lhs = self.pop().unwrap().unwrap_class_bytes();
+            let mut cls = self.pop().unwrap().unwrap_class_bytes();
+            if self.flags().case_insensitive() {
+                rhs.case_fold_simple();
+                lhs.case_fold_simple();
+            }
+            match op.kind {
+                Intersection => lhs.intersect(&rhs),
+                Difference => lhs.difference(&rhs),
+                SymmetricDifference => lhs.symmetric_difference(&rhs),
+            }
+            cls.union(&lhs);
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+}
+
+/// The internal implementation of a translator.
+///
+/// This type is responsible for carrying around the original pattern string,
+/// which is not tied to the internal state of a translator.
+///
+/// A TranslatorI exists for the time it takes to translate a single Ast.
+#[derive(Clone, Debug)]
+struct TranslatorI<'t, 'p> {
+    trans: &'t Translator,
+    pattern: &'p str,
+}
+
+impl<'t, 'p> TranslatorI<'t, 'p> {
+    /// Build a new internal translator.
+    fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
+        TranslatorI { trans, pattern }
+    }
+
+    /// Return a reference to the underlying translator.
+    fn trans(&self) -> &Translator {
+        &self.trans
+    }
+
+    /// Push the given frame on to the call stack.
+    fn push(&self, frame: HirFrame) {
+        self.trans().stack.borrow_mut().push(frame);
+    }
+
+    /// Push the given literal char on to the call stack.
+    ///
+    /// If the top-most element of the stack is a literal, then the char
+    /// is appended to the end of that literal. Otherwise, a new literal
+    /// containing just the given char is pushed to the top of the stack.
+    fn push_char(&self, ch: char) {
+        let mut buf = [0; 4];
+        let bytes = ch.encode_utf8(&mut buf).as_bytes();
+        let mut stack = self.trans().stack.borrow_mut();
+        if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
+            literal.extend_from_slice(bytes);
+        } else {
+            stack.push(HirFrame::Literal(bytes.to_vec()));
+        }
+    }
+
+    /// Push the given literal byte on to the call stack.
+    ///
+    /// If the top-most element of the stack is a literal, then the byte
+    /// is appended to the end of that literal. Otherwise, a new literal
+    /// containing just the given byte is pushed to the top of the stack.
+    fn push_byte(&self, byte: u8) {
+        let mut stack = self.trans().stack.borrow_mut();
+        if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
+            literal.push(byte);
+        } else {
+            stack.push(HirFrame::Literal(vec![byte]));
+        }
+    }
+
+    /// Pop the top of the call stack. If the call stack is empty, return None.
+    fn pop(&self) -> Option<HirFrame> {
+        self.trans().stack.borrow_mut().pop()
+    }
+
+    /// Pop an HIR expression from the top of the stack for a concatenation.
+    ///
+    /// This returns None if the stack is empty or when a concat frame is seen.
+    /// Otherwise, it panics if it could not find an HIR expression.
+    fn pop_concat_expr(&self) -> Option<Hir> {
+        let frame = self.pop()?;
+        match frame {
+            HirFrame::Concat => None,
+            HirFrame::Expr(expr) => Some(expr),
+            HirFrame::Literal(lit) => Some(Hir::literal(lit)),
+            HirFrame::ClassUnicode(_) => {
+                unreachable!("expected expr or concat, got Unicode class")
+            }
+            HirFrame::ClassBytes(_) => {
+                unreachable!("expected expr or concat, got byte class")
+            }
+            HirFrame::Repetition => {
+                unreachable!("expected expr or concat, got repetition")
+            }
+            HirFrame::Group { .. } => {
+                unreachable!("expected expr or concat, got group")
+            }
+            HirFrame::Alternation => {
+                unreachable!("expected expr or concat, got alt marker")
+            }
+            HirFrame::AlternationBranch => {
+                unreachable!("expected expr or concat, got alt branch marker")
+            }
+        }
+    }
+
+    /// Pop an HIR expression from the top of the stack for an alternation.
+    ///
+    /// This returns None if the stack is empty or when an alternation frame is
+    /// seen. Otherwise, it panics if it could not find an HIR expression.
+    fn pop_alt_expr(&self) -> Option<Hir> {
+        let frame = self.pop()?;
+        match frame {
+            HirFrame::Alternation => None,
+            HirFrame::Expr(expr) => Some(expr),
+            HirFrame::Literal(lit) => Some(Hir::literal(lit)),
+            HirFrame::ClassUnicode(_) => {
+                unreachable!("expected expr or alt, got Unicode class")
+            }
+            HirFrame::ClassBytes(_) => {
+                unreachable!("expected expr or alt, got byte class")
+            }
+            HirFrame::Repetition => {
+                unreachable!("expected expr or alt, got repetition")
+            }
+            HirFrame::Group { .. } => {
+                unreachable!("expected expr or alt, got group")
+            }
+            HirFrame::Concat => {
+                unreachable!("expected expr or alt, got concat marker")
+            }
+            HirFrame::AlternationBranch => {
+                unreachable!("expected expr or alt, got alt branch marker")
+            }
+        }
+    }
+
+    /// Create a new error with the given span and error type.
+    fn error(&self, span: Span, kind: ErrorKind) -> Error {
+        Error { kind, pattern: self.pattern.to_string(), span }
+    }
+
+    /// Return a copy of the active flags.
+    fn flags(&self) -> Flags {
+        self.trans().flags.get()
+    }
+
+    /// Set the flags of this translator from the flags set in the given AST.
+    /// Then, return the old flags.
+    fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
+        let old_flags = self.flags();
+        let mut new_flags = Flags::from_ast(ast_flags);
+        new_flags.merge(&old_flags);
+        self.trans().flags.set(new_flags);
+        old_flags
+    }
+
+    /// Convert an Ast literal to its scalar representation.
+    ///
+    /// When Unicode mode is enabled, then this always succeeds and returns a
+    /// `char` (Unicode scalar value).
+    ///
+    /// When Unicode mode is disabled, then a `char` will still be returned
+    /// whenever possible. A byte is returned only when invalid UTF-8 is
+    /// allowed and when the byte is not ASCII. Otherwise, a non-ASCII byte
+    /// will result in an error when invalid UTF-8 is not allowed.
+    fn ast_literal_to_scalar(
+        &self,
+        lit: &ast::Literal,
+    ) -> Result<Either<char, u8>> {
+        if self.flags().unicode() {
+            return Ok(Either::Left(lit.c));
+        }
+        let byte = match lit.byte() {
+            None => return Ok(Either::Left(lit.c)),
+            Some(byte) => byte,
+        };
+        if byte <= 0x7F {
+            return Ok(Either::Left(char::try_from(byte).unwrap()));
+        }
+        if self.trans().utf8 {
+            return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
+        }
+        Ok(Either::Right(byte))
+    }
+
+    fn case_fold_char(&self, span: Span, c: char) -> Result<Option<Hir>> {
+        if !self.flags().case_insensitive() {
+            return Ok(None);
+        }
+        if self.flags().unicode() {
+            // If case folding won't do anything, then don't bother trying.
+            let map = unicode::SimpleCaseFolder::new()
+                .map(|f| f.overlaps(c, c))
+                .map_err(|_| {
+                    self.error(span, ErrorKind::UnicodeCaseUnavailable)
+                })?;
+            if !map {
+                return Ok(None);
+            }
+            let mut cls =
+                hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
+                    c, c,
+                )]);
+            cls.try_case_fold_simple().map_err(|_| {
+                self.error(span, ErrorKind::UnicodeCaseUnavailable)
+            })?;
+            Ok(Some(Hir::class(hir::Class::Unicode(cls))))
+        } else {
+            if !c.is_ascii() {
+                return Ok(None);
+            }
+            // If case folding won't do anything, then don't bother trying.
+            match c {
+                'A'..='Z' | 'a'..='z' => {}
+                _ => return Ok(None),
+            }
+            let mut cls =
+                hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
+                    // OK because 'c.len_utf8() == 1' which in turn implies
+                    // that 'c' is ASCII.
+                    u8::try_from(c).unwrap(),
+                    u8::try_from(c).unwrap(),
+                )]);
+            cls.case_fold_simple();
+            Ok(Some(Hir::class(hir::Class::Bytes(cls))))
+        }
+    }
+
+    fn hir_dot(&self, span: Span) -> Result<Hir> {
+        let (utf8, lineterm, flags) =
+            (self.trans().utf8, self.trans().line_terminator, self.flags());
+        if utf8 && (!flags.unicode() || !lineterm.is_ascii()) {
+            return Err(self.error(span, ErrorKind::InvalidUtf8));
+        }
+        let dot = if flags.dot_matches_new_line() {
+            if flags.unicode() {
+                hir::Dot::AnyChar
+            } else {
+                hir::Dot::AnyByte
+            }
+        } else {
+            if flags.unicode() {
+                if flags.crlf() {
+                    hir::Dot::AnyCharExceptCRLF
+                } else {
+                    if !lineterm.is_ascii() {
+                        return Err(
+                            self.error(span, ErrorKind::InvalidLineTerminator)
+                        );
+                    }
+                    hir::Dot::AnyCharExcept(char::from(lineterm))
+                }
+            } else {
+                if flags.crlf() {
+                    hir::Dot::AnyByteExceptCRLF
+                } else {
+                    hir::Dot::AnyByteExcept(lineterm)
+                }
+            }
+        };
+        Ok(Hir::dot(dot))
+    }
+
+    fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
+        let unicode = self.flags().unicode();
+        let multi_line = self.flags().multi_line();
+        let crlf = self.flags().crlf();
+        Ok(match asst.kind {
+            ast::AssertionKind::StartLine => Hir::look(if multi_line {
+                if crlf {
+                    hir::Look::StartCRLF
+                } else {
+                    hir::Look::StartLF
+                }
+            } else {
+                hir::Look::Start
+            }),
+            ast::AssertionKind::EndLine => Hir::look(if multi_line {
+                if crlf {
+                    hir::Look::EndCRLF
+                } else {
+                    hir::Look::EndLF
+                }
+            } else {
+                hir::Look::End
+            }),
+            ast::AssertionKind::StartText => Hir::look(hir::Look::Start),
+            ast::AssertionKind::EndText => Hir::look(hir::Look::End),
+            ast::AssertionKind::WordBoundary => Hir::look(if unicode {
+                hir::Look::WordUnicode
+            } else {
+                hir::Look::WordAscii
+            }),
+            ast::AssertionKind::NotWordBoundary => Hir::look(if unicode {
+                hir::Look::WordUnicodeNegate
+            } else {
+                hir::Look::WordAsciiNegate
+            }),
+            ast::AssertionKind::WordBoundaryStart
+            | ast::AssertionKind::WordBoundaryStartAngle => {
+                Hir::look(if unicode {
+                    hir::Look::WordStartUnicode
+                } else {
+                    hir::Look::WordStartAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryEnd
+            | ast::AssertionKind::WordBoundaryEndAngle => {
+                Hir::look(if unicode {
+                    hir::Look::WordEndUnicode
+                } else {
+                    hir::Look::WordEndAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryStartHalf => {
+                Hir::look(if unicode {
+                    hir::Look::WordStartHalfUnicode
+                } else {
+                    hir::Look::WordStartHalfAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode {
+                hir::Look::WordEndHalfUnicode
+            } else {
+                hir::Look::WordEndHalfAscii
+            }),
+        })
+    }
+
+    fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir {
+        let (index, name) = match group.kind {
+            ast::GroupKind::CaptureIndex(index) => (index, None),
+            ast::GroupKind::CaptureName { ref name, .. } => {
+                (name.index, Some(name.name.clone().into_boxed_str()))
+            }
+            // The HIR doesn't need to use non-capturing groups, since the way
+            // in which the data type is defined handles this automatically.
+            ast::GroupKind::NonCapturing(_) => return expr,
+        };
+        Hir::capture(hir::Capture { index, name, sub: Box::new(expr) })
+    }
+
+    fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
+        let (min, max) = match rep.op.kind {
+            ast::RepetitionKind::ZeroOrOne => (0, Some(1)),
+            ast::RepetitionKind::ZeroOrMore => (0, None),
+            ast::RepetitionKind::OneOrMore => (1, None),
+            ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
+                (m, Some(m))
+            }
+            ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
+                (m, None)
+            }
+            ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
+                m,
+                n,
+            )) => (m, Some(n)),
+        };
+        let greedy =
+            if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
+        Hir::repetition(hir::Repetition {
+            min,
+            max,
+            greedy,
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_unicode_class(
+        &self,
+        ast_class: &ast::ClassUnicode,
+    ) -> Result<hir::ClassUnicode> {
+        use crate::ast::ClassUnicodeKind::*;
+
+        if !self.flags().unicode() {
+            return Err(
+                self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
+            );
+        }
+        let query = match ast_class.kind {
+            OneLetter(name) => ClassQuery::OneLetter(name),
+            Named(ref name) => ClassQuery::Binary(name),
+            NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
+                property_name: name,
+                property_value: value,
+            },
+        };
+        let mut result = self.convert_unicode_class_error(
+            &ast_class.span,
+            unicode::class(query),
+        );
+        if let Ok(ref mut class) = result {
+            self.unicode_fold_and_negate(
+                &ast_class.span,
+                ast_class.negated,
+                class,
+            )?;
+        }
+        result
+    }
+
+    fn hir_ascii_unicode_class(
+        &self,
+        ast: &ast::ClassAscii,
+    ) -> Result<hir::ClassUnicode> {
+        let mut cls = hir::ClassUnicode::new(
+            ascii_class_as_chars(&ast.kind)
+                .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
+        );
+        self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
+        Ok(cls)
+    }
+
+    fn hir_ascii_byte_class(
+        &self,
+        ast: &ast::ClassAscii,
+    ) -> Result<hir::ClassBytes> {
+        let mut cls = hir::ClassBytes::new(
+            ascii_class(&ast.kind)
+                .map(|(s, e)| hir::ClassBytesRange::new(s, e)),
+        );
+        self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
+        Ok(cls)
+    }
+
+    fn hir_perl_unicode_class(
+        &self,
+        ast_class: &ast::ClassPerl,
+    ) -> Result<hir::ClassUnicode> {
+        use crate::ast::ClassPerlKind::*;
+
+        assert!(self.flags().unicode());
+        let result = match ast_class.kind {
+            Digit => unicode::perl_digit(),
+            Space => unicode::perl_space(),
+            Word => unicode::perl_word(),
+        };
+        let mut class =
+            self.convert_unicode_class_error(&ast_class.span, result)?;
+        // We needn't apply case folding here because the Perl Unicode classes
+        // are already closed under Unicode simple case folding.
+        if ast_class.negated {
+            class.negate();
+        }
+        Ok(class)
+    }
+
+    fn hir_perl_byte_class(
+        &self,
+        ast_class: &ast::ClassPerl,
+    ) -> Result<hir::ClassBytes> {
+        use crate::ast::ClassPerlKind::*;
+
+        assert!(!self.flags().unicode());
+        let mut class = match ast_class.kind {
+            Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
+            Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
+            Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
+        };
+        // We needn't apply case folding here because the Perl ASCII classes
+        // are already closed (under ASCII case folding).
+        if ast_class.negated {
+            class.negate();
+        }
+        // Negating a Perl byte class is likely to cause it to match invalid
+        // UTF-8. That's only OK if the translator is configured to allow such
+        // things.
+        if self.trans().utf8 && !class.is_ascii() {
+            return Err(self.error(ast_class.span, ErrorKind::InvalidUtf8));
+        }
+        Ok(class)
+    }
+
+    /// Converts the given Unicode specific error to an HIR translation error.
+    ///
+    /// The span given should approximate the position at which an error would
+    /// occur.
+    fn convert_unicode_class_error(
+        &self,
+        span: &Span,
+        result: core::result::Result<hir::ClassUnicode, unicode::Error>,
+    ) -> Result<hir::ClassUnicode> {
+        result.map_err(|err| {
+            let sp = span.clone();
+            match err {
+                unicode::Error::PropertyNotFound => {
+                    self.error(sp, ErrorKind::UnicodePropertyNotFound)
+                }
+                unicode::Error::PropertyValueNotFound => {
+                    self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
+                }
+                unicode::Error::PerlClassNotFound => {
+                    self.error(sp, ErrorKind::UnicodePerlClassNotFound)
+                }
+            }
+        })
+    }
+
+    fn unicode_fold_and_negate(
+        &self,
+        span: &Span,
+        negated: bool,
+        class: &mut hir::ClassUnicode,
+    ) -> Result<()> {
+        // Note that we must apply case folding before negation!
+        // Consider `(?i)[^x]`. If we applied negation first, then
+        // the result would be the character class that matched any
+        // Unicode scalar value.
+        if self.flags().case_insensitive() {
+            class.try_case_fold_simple().map_err(|_| {
+                self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
+            })?;
+        }
+        if negated {
+            class.negate();
+        }
+        Ok(())
+    }
+
+    fn bytes_fold_and_negate(
+        &self,
+        span: &Span,
+        negated: bool,
+        class: &mut hir::ClassBytes,
+    ) -> Result<()> {
+        // Note that we must apply case folding before negation!
+        // Consider `(?i)[^x]`. If we applied negation first, then
+        // the result would be the character class that matched any
+        // Unicode scalar value.
+        if self.flags().case_insensitive() {
+            class.case_fold_simple();
+        }
+        if negated {
+            class.negate();
+        }
+        if self.trans().utf8 && !class.is_ascii() {
+            return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
+        }
+        Ok(())
+    }
+
+    /// Return a scalar byte value suitable for use as a literal in a byte
+    /// character class.
+    fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
+        match self.ast_literal_to_scalar(ast)? {
+            Either::Right(byte) => Ok(byte),
+            Either::Left(ch) => {
+                if ch.is_ascii() {
+                    Ok(u8::try_from(ch).unwrap())
+                } else {
+                    // We can't feasibly support Unicode in
+                    // byte oriented classes. Byte classes don't
+                    // do Unicode case folding.
+                    Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
+                }
+            }
+        }
+    }
+}
+
+/// A translator's representation of a regular expression's flags at any given
+/// moment in time.
+///
+/// Each flag can be in one of three states: absent, present but disabled or
+/// present but enabled.
+#[derive(Clone, Copy, Debug, Default)]
+struct Flags {
+    case_insensitive: Option<bool>,
+    multi_line: Option<bool>,
+    dot_matches_new_line: Option<bool>,
+    swap_greed: Option<bool>,
+    unicode: Option<bool>,
+    crlf: Option<bool>,
+    // Note that `ignore_whitespace` is omitted here because it is handled
+    // entirely in the parser.
+}
+
+impl Flags {
+    fn from_ast(ast: &ast::Flags) -> Flags {
+        let mut flags = Flags::default();
+        let mut enable = true;
+        for item in &ast.items {
+            match item.kind {
+                ast::FlagsItemKind::Negation => {
+                    enable = false;
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
+                    flags.case_insensitive = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
+                    flags.multi_line = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
+                    flags.dot_matches_new_line = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
+                    flags.swap_greed = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
+                    flags.unicode = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::CRLF) => {
+                    flags.crlf = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
+            }
+        }
+        flags
+    }
+
+    fn merge(&mut self, previous: &Flags) {
+        if self.case_insensitive.is_none() {
+            self.case_insensitive = previous.case_insensitive;
+        }
+        if self.multi_line.is_none() {
+            self.multi_line = previous.multi_line;
+        }
+        if self.dot_matches_new_line.is_none() {
+            self.dot_matches_new_line = previous.dot_matches_new_line;
+        }
+        if self.swap_greed.is_none() {
+            self.swap_greed = previous.swap_greed;
+        }
+        if self.unicode.is_none() {
+            self.unicode = previous.unicode;
+        }
+        if self.crlf.is_none() {
+            self.crlf = previous.crlf;
+        }
+    }
+
+    fn case_insensitive(&self) -> bool {
+        self.case_insensitive.unwrap_or(false)
+    }
+
+    fn multi_line(&self) -> bool {
+        self.multi_line.unwrap_or(false)
+    }
+
+    fn dot_matches_new_line(&self) -> bool {
+        self.dot_matches_new_line.unwrap_or(false)
+    }
+
+    fn swap_greed(&self) -> bool {
+        self.swap_greed.unwrap_or(false)
+    }
+
+    fn unicode(&self) -> bool {
+        self.unicode.unwrap_or(true)
+    }
+
+    fn crlf(&self) -> bool {
+        self.crlf.unwrap_or(false)
+    }
+}
+
+fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
+    let ranges: Vec<_> = ascii_class(kind)
+        .map(|(s, e)| hir::ClassBytesRange::new(s, e))
+        .collect();
+    hir::ClassBytes::new(ranges)
+}
+
+fn ascii_class(kind: &ast::ClassAsciiKind) -> impl Iterator<Item = (u8, u8)> {
+    use crate::ast::ClassAsciiKind::*;
+
+    let slice: &'static [(u8, u8)] = match *kind {
+        Alnum => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')],
+        Alpha => &[(b'A', b'Z'), (b'a', b'z')],
+        Ascii => &[(b'\x00', b'\x7F')],
+        Blank => &[(b'\t', b'\t'), (b' ', b' ')],
+        Cntrl => &[(b'\x00', b'\x1F'), (b'\x7F', b'\x7F')],
+        Digit => &[(b'0', b'9')],
+        Graph => &[(b'!', b'~')],
+        Lower => &[(b'a', b'z')],
+        Print => &[(b' ', b'~')],
+        Punct => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')],
+        Space => &[
+            (b'\t', b'\t'),
+            (b'\n', b'\n'),
+            (b'\x0B', b'\x0B'),
+            (b'\x0C', b'\x0C'),
+            (b'\r', b'\r'),
+            (b' ', b' '),
+        ],
+        Upper => &[(b'A', b'Z')],
+        Word => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')],
+        Xdigit => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')],
+    };
+    slice.iter().copied()
+}
+
+fn ascii_class_as_chars(
+    kind: &ast::ClassAsciiKind,
+) -> impl Iterator<Item = (char, char)> {
+    ascii_class(kind).map(|(s, e)| (char::from(s), char::from(e)))
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{
+        ast::{self, parse::ParserBuilder, Ast, Position, Span},
+        hir::{self, Hir, HirKind, Look, Properties},
+        unicode::{self, ClassQuery},
+    };
+
+    use super::*;
+
+    // We create these errors to compare with real hir::Errors in the tests.
+    // We define equality between TestError and hir::Error to disregard the
+    // pattern string in hir::Error, which is annoying to provide in tests.
+    #[derive(Clone, Debug)]
+    struct TestError {
+        span: Span,
+        kind: hir::ErrorKind,
+    }
+
+    impl PartialEq<hir::Error> for TestError {
+        fn eq(&self, other: &hir::Error) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    impl PartialEq<TestError> for hir::Error {
+        fn eq(&self, other: &TestError) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    fn parse(pattern: &str) -> Ast {
+        ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
+    }
+
+    fn t(pattern: &str) -> Hir {
+        TranslatorBuilder::new()
+            .utf8(true)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap()
+    }
+
+    fn t_err(pattern: &str) -> hir::Error {
+        TranslatorBuilder::new()
+            .utf8(true)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap_err()
+    }
+
+    fn t_bytes(pattern: &str) -> Hir {
+        TranslatorBuilder::new()
+            .utf8(false)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap()
+    }
+
+    fn props(pattern: &str) -> Properties {
+        t(pattern).properties().clone()
+    }
+
+    fn props_bytes(pattern: &str) -> Properties {
+        t_bytes(pattern).properties().clone()
+    }
+
+    fn hir_lit(s: &str) -> Hir {
+        hir_blit(s.as_bytes())
+    }
+
+    fn hir_blit(s: &[u8]) -> Hir {
+        Hir::literal(s)
+    }
+
+    fn hir_capture(index: u32, expr: Hir) -> Hir {
+        Hir::capture(hir::Capture { index, name: None, sub: Box::new(expr) })
+    }
+
+    fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir {
+        Hir::capture(hir::Capture {
+            index,
+            name: Some(name.into()),
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_quest(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            min: 0,
+            max: Some(1),
+            greedy,
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_star(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            min: 0,
+            max: None,
+            greedy,
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_plus(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            min: 1,
+            max: None,
+            greedy,
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_range(greedy: bool, min: u32, max: Option<u32>, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            min,
+            max,
+            greedy,
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_alt(alts: Vec<Hir>) -> Hir {
+        Hir::alternation(alts)
+    }
+
+    fn hir_cat(exprs: Vec<Hir>) -> Hir {
+        Hir::concat(exprs)
+    }
+
+    #[allow(dead_code)]
+    fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
+        Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
+    }
+
+    #[allow(dead_code)]
+    fn hir_uclass_perl_word() -> Hir {
+        Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
+    }
+
+    fn hir_ascii_uclass(kind: &ast::ClassAsciiKind) -> Hir {
+        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(
+            ascii_class_as_chars(kind)
+                .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
+        )))
+    }
+
+    fn hir_ascii_bclass(kind: &ast::ClassAsciiKind) -> Hir {
+        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(
+            ascii_class(kind).map(|(s, e)| hir::ClassBytesRange::new(s, e)),
+        )))
+    }
+
+    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
+        Hir::class(uclass(ranges))
+    }
+
+    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
+        Hir::class(bclass(ranges))
+    }
+
+    fn hir_case_fold(expr: Hir) -> Hir {
+        match expr.into_kind() {
+            HirKind::Class(mut cls) => {
+                cls.case_fold_simple();
+                Hir::class(cls)
+            }
+            _ => panic!("cannot case fold non-class Hir expr"),
+        }
+    }
+
+    fn hir_negate(expr: Hir) -> Hir {
+        match expr.into_kind() {
+            HirKind::Class(mut cls) => {
+                cls.negate();
+                Hir::class(cls)
+            }
+            _ => panic!("cannot negate non-class Hir expr"),
+        }
+    }
+
+    fn uclass(ranges: &[(char, char)]) -> hir::Class {
+        let ranges: Vec<hir::ClassUnicodeRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+            .collect();
+        hir::Class::Unicode(hir::ClassUnicode::new(ranges))
+    }
+
+    fn bclass(ranges: &[(u8, u8)]) -> hir::Class {
+        let ranges: Vec<hir::ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
+            .collect();
+        hir::Class::Bytes(hir::ClassBytes::new(ranges))
+    }
+
+    #[cfg(feature = "unicode-case")]
+    fn class_case_fold(mut cls: hir::Class) -> Hir {
+        cls.case_fold_simple();
+        Hir::class(cls)
+    }
+
+    fn class_negate(mut cls: hir::Class) -> Hir {
+        cls.negate();
+        Hir::class(cls)
+    }
+
+    #[allow(dead_code)]
+    fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
+        use crate::hir::Class::{Bytes, Unicode};
+
+        match (expr1.into_kind(), expr2.into_kind()) {
+            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
+                c1.union(&c2);
+                Hir::class(hir::Class::Unicode(c1))
+            }
+            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
+                c1.union(&c2);
+                Hir::class(hir::Class::Bytes(c1))
+            }
+            _ => panic!("cannot union non-class Hir exprs"),
+        }
+    }
+
+    #[allow(dead_code)]
+    fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
+        use crate::hir::Class::{Bytes, Unicode};
+
+        match (expr1.into_kind(), expr2.into_kind()) {
+            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
+                c1.difference(&c2);
+                Hir::class(hir::Class::Unicode(c1))
+            }
+            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
+                c1.difference(&c2);
+                Hir::class(hir::Class::Bytes(c1))
+            }
+            _ => panic!("cannot difference non-class Hir exprs"),
+        }
+    }
+
+    fn hir_look(look: hir::Look) -> Hir {
+        Hir::look(look)
+    }
+
+    #[test]
+    fn empty() {
+        assert_eq!(t(""), Hir::empty());
+        assert_eq!(t("(?i)"), Hir::empty());
+        assert_eq!(t("()"), hir_capture(1, Hir::empty()));
+        assert_eq!(t("(?:)"), Hir::empty());
+        assert_eq!(t("(?P<wat>)"), hir_capture_name(1, "wat", Hir::empty()));
+        assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
+        assert_eq!(
+            t("()|()"),
+            hir_alt(vec![
+                hir_capture(1, Hir::empty()),
+                hir_capture(2, Hir::empty()),
+            ])
+        );
+        assert_eq!(
+            t("(|b)"),
+            hir_capture(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
+        );
+        assert_eq!(
+            t("(a|)"),
+            hir_capture(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
+        );
+        assert_eq!(
+            t("(a||c)"),
+            hir_capture(
+                1,
+                hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
+            )
+        );
+        assert_eq!(
+            t("(||)"),
+            hir_capture(
+                1,
+                hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
+            )
+        );
+    }
+
+    #[test]
+    fn literal() {
+        assert_eq!(t("a"), hir_lit("a"));
+        assert_eq!(t("(?-u)a"), hir_lit("a"));
+        assert_eq!(t("☃"), hir_lit("☃"));
+        assert_eq!(t("abcd"), hir_lit("abcd"));
+
+        assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
+        assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
+        assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
+        assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
+
+        assert_eq!(t("(?-u)☃"), hir_lit("☃"));
+        assert_eq!(
+            t_err(r"(?-u)\xFF"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(9, 1, 10)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn literal_case_insensitive() {
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t("(?i:a)"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("a(?i)a(?-i)a"),
+            hir_cat(vec![
+                hir_lit("a"),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_lit("a"),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)ab@c"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_uclass(&[('B', 'B'), ('b', 'b')]),
+                hir_lit("@"),
+                hir_uclass(&[('C', 'C'), ('c', 'c')]),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)β"),
+            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
+        );
+
+        assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?-u)a(?i)a(?-i)a"),
+            hir_cat(vec![
+                hir_lit("a"),
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+                hir_lit("a"),
+            ])
+        );
+        assert_eq!(
+            t("(?i-u)ab@c"),
+            hir_cat(vec![
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+                hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
+                hir_lit("@"),
+                hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
+            ])
+        );
+
+        assert_eq!(
+            t_bytes("(?i-u)a"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+        );
+        assert_eq!(
+            t_bytes("(?i-u)\x61"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+        );
+        assert_eq!(
+            t_bytes(r"(?i-u)\x61"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+        );
+        assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
+
+        assert_eq!(t("(?i-u)β"), hir_lit("β"),);
+    }
+
+    #[test]
+    fn dot() {
+        assert_eq!(
+            t("."),
+            hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}')])
+        );
+        assert_eq!(
+            t("(?R)."),
+            hir_uclass(&[
+                ('\0', '\t'),
+                ('\x0B', '\x0C'),
+                ('\x0E', '\u{10FFFF}'),
+            ])
+        );
+        assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
+        assert_eq!(t("(?Rs)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
+        assert_eq!(
+            t_bytes("(?-u)."),
+            hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF')])
+        );
+        assert_eq!(
+            t_bytes("(?R-u)."),
+            hir_bclass(&[
+                (b'\0', b'\t'),
+                (b'\x0B', b'\x0C'),
+                (b'\x0E', b'\xFF'),
+            ])
+        );
+        assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
+        assert_eq!(t_bytes("(?Rs-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
+
+        // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
+        assert_eq!(
+            t_err("(?-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(6, 1, 7)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err("(?R-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err("(?s-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err("(?Rs-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(7, 1, 8),
+                    Position::new(8, 1, 9)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn assertions() {
+        assert_eq!(t("^"), hir_look(hir::Look::Start));
+        assert_eq!(t("$"), hir_look(hir::Look::End));
+        assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
+        assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
+        assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
+
+        assert_eq!(t(r"\b"), hir_look(hir::Look::WordUnicode));
+        assert_eq!(t(r"\B"), hir_look(hir::Look::WordUnicodeNegate));
+        assert_eq!(t(r"(?-u)\b"), hir_look(hir::Look::WordAscii));
+        assert_eq!(t(r"(?-u)\B"), hir_look(hir::Look::WordAsciiNegate));
+    }
+
+    #[test]
+    fn group() {
+        assert_eq!(t("(a)"), hir_capture(1, hir_lit("a")));
+        assert_eq!(
+            t("(a)(b)"),
+            hir_cat(vec![
+                hir_capture(1, hir_lit("a")),
+                hir_capture(2, hir_lit("b")),
+            ])
+        );
+        assert_eq!(
+            t("(a)|(b)"),
+            hir_alt(vec![
+                hir_capture(1, hir_lit("a")),
+                hir_capture(2, hir_lit("b")),
+            ])
+        );
+        assert_eq!(t("(?P<foo>)"), hir_capture_name(1, "foo", Hir::empty()));
+        assert_eq!(t("(?P<foo>a)"), hir_capture_name(1, "foo", hir_lit("a")));
+        assert_eq!(
+            t("(?P<foo>a)(?P<bar>b)"),
+            hir_cat(vec![
+                hir_capture_name(1, "foo", hir_lit("a")),
+                hir_capture_name(2, "bar", hir_lit("b")),
+            ])
+        );
+        assert_eq!(t("(?:)"), Hir::empty());
+        assert_eq!(t("(?:a)"), hir_lit("a"));
+        assert_eq!(
+            t("(?:a)(b)"),
+            hir_cat(vec![hir_lit("a"), hir_capture(1, hir_lit("b")),])
+        );
+        assert_eq!(
+            t("(a)(?:b)(c)"),
+            hir_cat(vec![
+                hir_capture(1, hir_lit("a")),
+                hir_lit("b"),
+                hir_capture(2, hir_lit("c")),
+            ])
+        );
+        assert_eq!(
+            t("(a)(?P<foo>b)(c)"),
+            hir_cat(vec![
+                hir_capture(1, hir_lit("a")),
+                hir_capture_name(2, "foo", hir_lit("b")),
+                hir_capture(3, hir_lit("c")),
+            ])
+        );
+        assert_eq!(t("()"), hir_capture(1, Hir::empty()));
+        assert_eq!(t("((?i))"), hir_capture(1, Hir::empty()));
+        assert_eq!(t("((?x))"), hir_capture(1, Hir::empty()));
+        assert_eq!(
+            t("(((?x)))"),
+            hir_capture(1, hir_capture(2, Hir::empty()))
+        );
+    }
+
+    #[test]
+    fn line_anchors() {
+        assert_eq!(t("^"), hir_look(hir::Look::Start));
+        assert_eq!(t("$"), hir_look(hir::Look::End));
+        assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"\z"), hir_look(hir::Look::End));
+
+        assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
+        assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
+
+        assert_eq!(t(r"(?R)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?R)\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?R)^"), hir_look(hir::Look::Start));
+        assert_eq!(t("(?R)$"), hir_look(hir::Look::End));
+
+        assert_eq!(t(r"(?Rm)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?Rm)\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?Rm)^"), hir_look(hir::Look::StartCRLF));
+        assert_eq!(t("(?Rm)$"), hir_look(hir::Look::EndCRLF));
+    }
+
+    #[test]
+    fn flags() {
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i:a)a"),
+            hir_cat(
+                vec![hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"),]
+            )
+        );
+        assert_eq!(
+            t("(?i-u:a)β"),
+            hir_cat(vec![
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+                hir_lit("β"),
+            ])
+        );
+        assert_eq!(
+            t("(?:(?i-u)a)b"),
+            hir_cat(vec![
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+                hir_lit("b"),
+            ])
+        );
+        assert_eq!(
+            t("((?i-u)a)b"),
+            hir_cat(vec![
+                hir_capture(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+                hir_lit("b"),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)(?-i:a)a"),
+            hir_cat(
+                vec![hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]),]
+            )
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?im)a^"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_look(hir::Look::StartLF),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?im)a^(?i-m)a^"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_look(hir::Look::StartLF),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_look(hir::Look::Start),
+            ])
+        );
+        assert_eq!(
+            t("(?U)a*a*?(?-U)a*a*?"),
+            hir_cat(vec![
+                hir_star(false, hir_lit("a")),
+                hir_star(true, hir_lit("a")),
+                hir_star(true, hir_lit("a")),
+                hir_star(false, hir_lit("a")),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?:a(?i)a)a"),
+            hir_cat(vec![
+                hir_cat(vec![
+                    hir_lit("a"),
+                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                ]),
+                hir_lit("a"),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)(?:a(?-i)a)a"),
+            hir_cat(vec![
+                hir_cat(vec![
+                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                    hir_lit("a"),
+                ]),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            ])
+        );
+    }
+
+    #[test]
+    fn escape() {
+        assert_eq!(
+            t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
+            hir_lit(r"\.+*?()|[]{}^$#")
+        );
+    }
+
+    #[test]
+    fn repetition() {
+        assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
+        assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
+        assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
+        assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
+        assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
+        assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
+
+        assert_eq!(t("a{1}"), hir_range(true, 1, Some(1), hir_lit("a"),));
+        assert_eq!(t("a{1,}"), hir_range(true, 1, None, hir_lit("a"),));
+        assert_eq!(t("a{1,2}"), hir_range(true, 1, Some(2), hir_lit("a"),));
+        assert_eq!(t("a{1}?"), hir_range(false, 1, Some(1), hir_lit("a"),));
+        assert_eq!(t("a{1,}?"), hir_range(false, 1, None, hir_lit("a"),));
+        assert_eq!(t("a{1,2}?"), hir_range(false, 1, Some(2), hir_lit("a"),));
+
+        assert_eq!(
+            t("ab?"),
+            hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
+        );
+        assert_eq!(t("(ab)?"), hir_quest(true, hir_capture(1, hir_lit("ab"))));
+        assert_eq!(
+            t("a|b?"),
+            hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
+        );
+    }
+
+    #[test]
+    fn cat_alt() {
+        let a = || hir_look(hir::Look::Start);
+        let b = || hir_look(hir::Look::End);
+        let c = || hir_look(hir::Look::WordUnicode);
+        let d = || hir_look(hir::Look::WordUnicodeNegate);
+
+        assert_eq!(t("(^$)"), hir_capture(1, hir_cat(vec![a(), b()])));
+        assert_eq!(t("^|$"), hir_alt(vec![a(), b()]));
+        assert_eq!(t(r"^|$|\b"), hir_alt(vec![a(), b(), c()]));
+        assert_eq!(
+            t(r"^$|$\b|\b\B"),
+            hir_alt(vec![
+                hir_cat(vec![a(), b()]),
+                hir_cat(vec![b(), c()]),
+                hir_cat(vec![c(), d()]),
+            ])
+        );
+        assert_eq!(t("(^|$)"), hir_capture(1, hir_alt(vec![a(), b()])));
+        assert_eq!(
+            t(r"(^|$|\b)"),
+            hir_capture(1, hir_alt(vec![a(), b(), c()]))
+        );
+        assert_eq!(
+            t(r"(^$|$\b|\b\B)"),
+            hir_capture(
+                1,
+                hir_alt(vec![
+                    hir_cat(vec![a(), b()]),
+                    hir_cat(vec![b(), c()]),
+                    hir_cat(vec![c(), d()]),
+                ])
+            )
+        );
+        assert_eq!(
+            t(r"(^$|($\b|(\b\B)))"),
+            hir_capture(
+                1,
+                hir_alt(vec![
+                    hir_cat(vec![a(), b()]),
+                    hir_capture(
+                        2,
+                        hir_alt(vec![
+                            hir_cat(vec![b(), c()]),
+                            hir_capture(3, hir_cat(vec![c(), d()])),
+                        ])
+                    ),
+                ])
+            )
+        );
+    }
+
+    // Tests the HIR transformation of things like '[a-z]|[A-Z]' into
+    // '[A-Za-z]'. In other words, an alternation of just classes is always
+    // equivalent to a single class corresponding to the union of the branches
+    // in that class. (Unless some branches match invalid UTF-8 and others
+    // match non-ASCII Unicode.)
+    #[test]
+    fn cat_class_flattened() {
+        assert_eq!(t(r"[a-z]|[A-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
+        // Combining all of the letter properties should give us the one giant
+        // letter property.
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"(?x)
+                \p{Lowercase_Letter}
+                |\p{Uppercase_Letter}
+                |\p{Titlecase_Letter}
+                |\p{Modifier_Letter}
+                |\p{Other_Letter}
+            "),
+            hir_uclass_query(ClassQuery::Binary("letter"))
+        );
+        // Byte classes that can truly match invalid UTF-8 cannot be combined
+        // with Unicode classes.
+        assert_eq!(
+            t_bytes(r"[Δδ]|(?-u:[\x90-\xFF])|[Λλ]"),
+            hir_alt(vec![
+                hir_uclass(&[('Δ', 'Δ'), ('δ', 'δ')]),
+                hir_bclass(&[(b'\x90', b'\xFF')]),
+                hir_uclass(&[('Λ', 'Λ'), ('λ', 'λ')]),
+            ])
+        );
+        // Byte classes on their own can be combined, even if some are ASCII
+        // and others are invalid UTF-8.
+        assert_eq!(
+            t_bytes(r"[a-z]|(?-u:[\x90-\xFF])|[A-Z]"),
+            hir_bclass(&[(b'A', b'Z'), (b'a', b'z'), (b'\x90', b'\xFF')]),
+        );
+    }
+
+    #[test]
+    fn class_ascii() {
+        assert_eq!(
+            t("[[:alnum:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Alnum)
+        );
+        assert_eq!(
+            t("[[:alpha:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Alpha)
+        );
+        assert_eq!(
+            t("[[:ascii:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Ascii)
+        );
+        assert_eq!(
+            t("[[:blank:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Blank)
+        );
+        assert_eq!(
+            t("[[:cntrl:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Cntrl)
+        );
+        assert_eq!(
+            t("[[:digit:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Digit)
+        );
+        assert_eq!(
+            t("[[:graph:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Graph)
+        );
+        assert_eq!(
+            t("[[:lower:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Lower)
+        );
+        assert_eq!(
+            t("[[:print:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Print)
+        );
+        assert_eq!(
+            t("[[:punct:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Punct)
+        );
+        assert_eq!(
+            t("[[:space:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Space)
+        );
+        assert_eq!(
+            t("[[:upper:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Upper)
+        );
+        assert_eq!(
+            t("[[:word:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Word)
+        );
+        assert_eq!(
+            t("[[:xdigit:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Xdigit)
+        );
+
+        assert_eq!(
+            t("[[:^lower:]]"),
+            hir_negate(hir_ascii_uclass(&ast::ClassAsciiKind::Lower))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[[:lower:]]"),
+            hir_uclass(&[
+                ('A', 'Z'),
+                ('a', 'z'),
+                ('\u{17F}', '\u{17F}'),
+                ('\u{212A}', '\u{212A}'),
+            ])
+        );
+
+        assert_eq!(
+            t("(?-u)[[:lower:]]"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Lower)
+        );
+        assert_eq!(
+            t("(?i-u)[[:lower:]]"),
+            hir_case_fold(hir_ascii_bclass(&ast::ClassAsciiKind::Lower))
+        );
+
+        assert_eq!(
+            t_err("(?-u)[[:^lower:]]"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(16, 1, 17)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err("(?i-u)[[:^lower:]]"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(7, 1, 8),
+                    Position::new(17, 1, 18)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn class_ascii_multiple() {
+        // See: https://github.com/rust-lang/regex/issues/680
+        assert_eq!(
+            t("[[:alnum:][:^ascii:]]"),
+            hir_union(
+                hir_ascii_uclass(&ast::ClassAsciiKind::Alnum),
+                hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
+            ),
+        );
+        assert_eq!(
+            t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
+            hir_union(
+                hir_ascii_bclass(&ast::ClassAsciiKind::Alnum),
+                hir_bclass(&[(0x80, 0xFF)]),
+            ),
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-perl")]
+    fn class_perl_unicode() {
+        // Unicode
+        assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
+        assert_eq!(t(r"\w"), hir_uclass_perl_word());
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\d"),
+            hir_uclass_query(ClassQuery::Binary("digit"))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\s"),
+            hir_uclass_query(ClassQuery::Binary("space"))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
+
+        // Unicode, negated
+        assert_eq!(
+            t(r"\D"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        assert_eq!(
+            t(r"\S"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
+        );
+        assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\D"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\S"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
+    }
+
+    #[test]
+    fn class_perl_ascii() {
+        // ASCII only
+        assert_eq!(
+            t(r"(?-u)\d"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
+        );
+        assert_eq!(
+            t(r"(?-u)\s"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Space)
+        );
+        assert_eq!(
+            t(r"(?-u)\w"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Word)
+        );
+        assert_eq!(
+            t(r"(?i-u)\d"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
+        );
+        assert_eq!(
+            t(r"(?i-u)\s"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Space)
+        );
+        assert_eq!(
+            t(r"(?i-u)\w"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Word)
+        );
+
+        // ASCII only, negated
+        assert_eq!(
+            t_bytes(r"(?-u)\D"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)\S"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)\W"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
+        );
+        assert_eq!(
+            t_bytes(r"(?i-u)\D"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t_bytes(r"(?i-u)\S"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
+        );
+        assert_eq!(
+            t_bytes(r"(?i-u)\W"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
+        );
+
+        // ASCII only, negated, with UTF-8 mode enabled.
+        // In this case, negating any Perl class results in an error because
+        // all such classes can match invalid UTF-8.
+        assert_eq!(
+            t_err(r"(?-u)\D"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(7, 1, 8),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?-u)\S"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(7, 1, 8),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?-u)\W"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(7, 1, 8),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?i-u)\D"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(8, 1, 9),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?i-u)\S"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(8, 1, 9),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?i-u)\W"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(8, 1, 9),
+                ),
+            },
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-perl"))]
+    fn class_perl_word_disabled() {
+        assert_eq!(
+            t_err(r"\w"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePerlClassNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(2, 1, 3)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
+    fn class_perl_space_disabled() {
+        assert_eq!(
+            t_err(r"\s"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePerlClassNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(2, 1, 3)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(all(
+        not(feature = "unicode-perl"),
+        not(feature = "unicode-gencat")
+    ))]
+    fn class_perl_digit_disabled() {
+        assert_eq!(
+            t_err(r"\d"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePerlClassNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(2, 1, 3)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-gencat")]
+    fn class_unicode_gencat() {
+        assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{se      PaRa ToR}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{gc:Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{gc=Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{Other}"),
+            hir_uclass_query(ClassQuery::Binary("Other"))
+        );
+        assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
+
+        assert_eq!(
+            t(r"\PZ"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+        );
+        assert_eq!(
+            t(r"\P{separator}"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+        );
+        assert_eq!(
+            t(r"\P{gc!=separator}"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+        );
+
+        assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
+        assert_eq!(
+            t(r"\p{assigned}"),
+            hir_uclass_query(ClassQuery::Binary("Assigned"))
+        );
+        assert_eq!(
+            t(r"\p{ascii}"),
+            hir_uclass_query(ClassQuery::Binary("ASCII"))
+        );
+        assert_eq!(
+            t(r"\p{gc:any}"),
+            hir_uclass_query(ClassQuery::Binary("Any"))
+        );
+        assert_eq!(
+            t(r"\p{gc:assigned}"),
+            hir_uclass_query(ClassQuery::Binary("Assigned"))
+        );
+        assert_eq!(
+            t(r"\p{gc:ascii}"),
+            hir_uclass_query(ClassQuery::Binary("ASCII"))
+        );
+
+        assert_eq!(
+            t_err(r"(?-u)\pZ"),
+            TestError {
+                kind: hir::ErrorKind::UnicodeNotAllowed,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(8, 1, 9)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"(?-u)\p{Separator}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodeNotAllowed,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(18, 1, 19)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\pE"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(3, 1, 4)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\p{Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\p{gc:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(10, 1, 11)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-gencat"))]
+    fn class_unicode_gencat_disabled() {
+        assert_eq!(
+            t_err(r"\p{Separator}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(13, 1, 14)
+                ),
+            }
+        );
+
+        assert_eq!(
+            t_err(r"\p{Any}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-script")]
+    fn class_unicode_script() {
+        assert_eq!(
+            t(r"\p{Greek}"),
+            hir_uclass_query(ClassQuery::Binary("Greek"))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\p{Greek}"),
+            hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\P{Greek}"),
+            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+                "Greek"
+            ))))
+        );
+
+        assert_eq!(
+            t_err(r"\p{sc:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(10, 1, 11)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\p{scx:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(11, 1, 12)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-script"))]
+    fn class_unicode_script_disabled() {
+        assert_eq!(
+            t_err(r"\p{Greek}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(9, 1, 10)
+                ),
+            }
+        );
+
+        assert_eq!(
+            t_err(r"\p{scx:Greek}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(13, 1, 14)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-age")]
+    fn class_unicode_age() {
+        assert_eq!(
+            t_err(r"\p{age:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(11, 1, 12)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-gencat")]
+    fn class_unicode_any_empty() {
+        assert_eq!(t(r"\P{any}"), hir_uclass(&[]),);
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-age"))]
+    fn class_unicode_age_disabled() {
+        assert_eq!(
+            t_err(r"\p{age:3.0}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(11, 1, 12)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn class_bracketed() {
+        assert_eq!(t("[a]"), hir_lit("a"));
+        assert_eq!(t("[ab]"), hir_uclass(&[('a', 'b')]));
+        assert_eq!(t("[^[a]]"), class_negate(uclass(&[('a', 'a')])));
+        assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
+        assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
+        assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
+        assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
+        assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
+        assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+        assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[\pZ]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[\p{separator}]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+        assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\PZ]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\P{separator}]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(all(
+            feature = "unicode-case",
+            any(feature = "unicode-perl", feature = "unicode-gencat")
+        ))]
+        assert_eq!(
+            t(r"(?i)[^\D]"),
+            hir_uclass_query(ClassQuery::Binary("digit"))
+        );
+        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"(?i)[^\P{greek}]"),
+            hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
+        );
+
+        assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
+        assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
+        assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[k]"),
+            hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[β]"),
+            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
+        );
+        assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
+
+        assert_eq!(t("[^a]"), class_negate(uclass(&[('a', 'a')])));
+        assert_eq!(t(r"[^\x00]"), class_negate(uclass(&[('\0', '\0')])));
+        assert_eq!(
+            t_bytes("(?-u)[^a]"),
+            class_negate(bclass(&[(b'a', b'a')]))
+        );
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+        assert_eq!(
+            t(r"[^\d]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\pZ]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\p{separator}]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
+        );
+        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"(?i)[^\p{greek}]"),
+            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+                "greek"
+            ))))
+        );
+        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"(?i)[\P{greek}]"),
+            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+                "greek"
+            ))))
+        );
+
+        // Test some weird cases.
+        assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
+
+        assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
+        assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
+
+        assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
+        assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
+
+        assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
+        assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
+
+        assert_eq!(
+            t_err("(?-u)[^a]"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(9, 1, 10)
+                ),
+            }
+        );
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
+        assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),);
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
+        assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),);
+    }
+
+    #[test]
+    fn class_bracketed_union() {
+        assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[a\pZb]"),
+            hir_union(
+                hir_uclass(&[('a', 'b')]),
+                hir_uclass_query(ClassQuery::Binary("separator"))
+            )
+        );
+        #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"[\pZ\p{Greek}]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::Binary("greek")),
+                hir_uclass_query(ClassQuery::Binary("separator"))
+            )
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"[\p{age:3.0}\pZ\p{Greek}]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            )
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("cyrillic")),
+                    hir_union(
+                        hir_uclass_query(ClassQuery::Binary("greek")),
+                        hir_uclass_query(ClassQuery::Binary("separator"))
+                    )
+                )
+            )
+        );
+
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-case",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
+            hir_case_fold(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            ))
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
+            hir_negate(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            ))
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-case",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
+            hir_negate(hir_case_fold(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            )))
+        );
+    }
+
+    #[test]
+    fn class_bracketed_nested() {
+        assert_eq!(t(r"[a[^c]]"), class_negate(uclass(&[('c', 'c')])));
+        assert_eq!(t(r"[a-b[^c]]"), class_negate(uclass(&[('c', 'c')])));
+        assert_eq!(t(r"[a-c[^c]]"), class_negate(uclass(&[])));
+
+        assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
+        assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)[a[^c]]"),
+            hir_negate(class_case_fold(uclass(&[('c', 'c')])))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)[a-b[^c]]"),
+            hir_negate(class_case_fold(uclass(&[('c', 'c')])))
+        );
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)[^a-b[^c]]"),
+            hir_uclass(&[('C', 'C'), ('c', 'c')])
+        );
+
+        assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),);
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),);
+    }
+
+    #[test]
+    fn class_bracketed_intersect() {
+        assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
+        assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
+        assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
+        assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
+        assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
+        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
+
+        assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
+        assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
+        assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[abc&&b-c]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[abc&&[b-c]]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[[abc]&&[b-c]]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[a-z&&b-y&&c-x]"),
+            hir_case_fold(hir_uclass(&[('c', 'x')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[c-da-b&&a-d]"),
+            hir_case_fold(hir_uclass(&[('a', 'd')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[a-d&&c-da-b]"),
+            hir_case_fold(hir_uclass(&[('a', 'd')]))
+        );
+
+        assert_eq!(
+            t("(?i-u)[abc&&b-c]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+        );
+        assert_eq!(
+            t("(?i-u)[abc&&[b-c]]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+        );
+        assert_eq!(
+            t("(?i-u)[[abc]&&[b-c]]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+        );
+        assert_eq!(
+            t("(?i-u)[a-z&&b-y&&c-x]"),
+            hir_case_fold(hir_bclass(&[(b'c', b'x')]))
+        );
+        assert_eq!(
+            t("(?i-u)[c-da-b&&a-d]"),
+            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
+        );
+        assert_eq!(
+            t("(?i-u)[a-d&&c-da-b]"),
+            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
+        );
+
+        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
+        // `^` is also allowed to be unescaped after `&&`.
+        assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
+        // `]` needs to be escaped after `&&` since it's not at start of class.
+        assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
+        assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
+        // Test precedence.
+        assert_eq!(
+            t(r"[a-w&&[^c-g]z]"),
+            hir_uclass(&[('a', 'b'), ('h', 'w')])
+        );
+    }
+
+    #[test]
+    fn class_bracketed_intersect_negate() {
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t(r"[^\w&&\d]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t(r"[^[\w&&\d]]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t(r"[^[^\w&&\d]]"),
+            hir_uclass_query(ClassQuery::Binary("digit"))
+        );
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
+
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t_bytes(r"(?-u)[^\w&&\d]"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[^[a-z&&a-c]]"),
+            hir_negate(hir_bclass(&[(b'a', b'c')]))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[^[\w&&\d]]"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[^[^\w&&\d]]"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
+        );
+    }
+
+    #[test]
+    fn class_bracketed_difference() {
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[\pL--[:ascii:]]"),
+            hir_difference(
+                hir_uclass_query(ClassQuery::Binary("letter")),
+                hir_uclass(&[('\0', '\x7F')])
+            )
+        );
+
+        assert_eq!(
+            t(r"(?-u)[[:alpha:]--[:lower:]]"),
+            hir_bclass(&[(b'A', b'Z')])
+        );
+    }
+
+    #[test]
+    fn class_bracketed_symmetric_difference() {
+        #[cfg(feature = "unicode-script")]
+        assert_eq!(
+            t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
+            hir_uclass(&[
+                ('\u{0342}', '\u{0342}'),
+                ('\u{0345}', '\u{0345}'),
+                ('\u{1DC0}', '\u{1DC1}'),
+            ])
+        );
+        assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
+
+        assert_eq!(
+            t(r"(?-u)[a-g~~c-j]"),
+            hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
+        );
+    }
+
+    #[test]
+    fn ignore_whitespace() {
+        assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
+        assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
+        assert_eq!(
+            t(r"(?x)\x # comment
+{ # comment
+    53 # comment
+} #comment"),
+            hir_lit("S")
+        );
+
+        assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
+        assert_eq!(
+            t(r"(?x)\x # comment
+        53 # comment"),
+            hir_lit("S")
+        );
+        assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
+
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"(?x)\p # comment
+{ # comment
+    Separator # comment
+} # comment"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+
+        assert_eq!(
+            t(r"(?x)a # comment
+{ # comment
+    5 # comment
+    , # comment
+    10 # comment
+} # comment"),
+            hir_range(true, 5, Some(10), hir_lit("a"))
+        );
+
+        assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
+    }
+
+    #[test]
+    fn analysis_is_utf8() {
+        // Positive examples.
+        assert!(props_bytes(r"a").is_utf8());
+        assert!(props_bytes(r"ab").is_utf8());
+        assert!(props_bytes(r"(?-u)a").is_utf8());
+        assert!(props_bytes(r"(?-u)ab").is_utf8());
+        assert!(props_bytes(r"\xFF").is_utf8());
+        assert!(props_bytes(r"\xFF\xFF").is_utf8());
+        assert!(props_bytes(r"[^a]").is_utf8());
+        assert!(props_bytes(r"[^a][^a]").is_utf8());
+        assert!(props_bytes(r"\b").is_utf8());
+        assert!(props_bytes(r"\B").is_utf8());
+        assert!(props_bytes(r"(?-u)\b").is_utf8());
+        assert!(props_bytes(r"(?-u)\B").is_utf8());
+
+        // Negative examples.
+        assert!(!props_bytes(r"(?-u)\xFF").is_utf8());
+        assert!(!props_bytes(r"(?-u)\xFF\xFF").is_utf8());
+        assert!(!props_bytes(r"(?-u)[^a]").is_utf8());
+        assert!(!props_bytes(r"(?-u)[^a][^a]").is_utf8());
+    }
+
+    #[test]
+    fn analysis_captures_len() {
+        assert_eq!(0, props(r"a").explicit_captures_len());
+        assert_eq!(0, props(r"(?:a)").explicit_captures_len());
+        assert_eq!(0, props(r"(?i-u:a)").explicit_captures_len());
+        assert_eq!(0, props(r"(?i-u)a").explicit_captures_len());
+        assert_eq!(1, props(r"(a)").explicit_captures_len());
+        assert_eq!(1, props(r"(?P<foo>a)").explicit_captures_len());
+        assert_eq!(1, props(r"()").explicit_captures_len());
+        assert_eq!(1, props(r"()a").explicit_captures_len());
+        assert_eq!(1, props(r"(a)+").explicit_captures_len());
+        assert_eq!(2, props(r"(a)(b)").explicit_captures_len());
+        assert_eq!(2, props(r"(a)|(b)").explicit_captures_len());
+        assert_eq!(2, props(r"((a))").explicit_captures_len());
+        assert_eq!(1, props(r"([a&&b])").explicit_captures_len());
+    }
+
+    #[test]
+    fn analysis_static_captures_len() {
+        let len = |pattern| props(pattern).static_explicit_captures_len();
+        assert_eq!(Some(0), len(r""));
+        assert_eq!(Some(0), len(r"foo|bar"));
+        assert_eq!(None, len(r"(foo)|bar"));
+        assert_eq!(None, len(r"foo|(bar)"));
+        assert_eq!(Some(1), len(r"(foo|bar)"));
+        assert_eq!(Some(1), len(r"(a|b|c|d|e|f)"));
+        assert_eq!(Some(1), len(r"(a)|(b)|(c)|(d)|(e)|(f)"));
+        assert_eq!(Some(2), len(r"(a)(b)|(c)(d)|(e)(f)"));
+        assert_eq!(Some(6), len(r"(a)(b)(c)(d)(e)(f)"));
+        assert_eq!(Some(3), len(r"(a)(b)(extra)|(a)(b)()"));
+        assert_eq!(Some(3), len(r"(a)(b)((?:extra)?)"));
+        assert_eq!(None, len(r"(a)(b)(extra)?"));
+        assert_eq!(Some(1), len(r"(foo)|(bar)"));
+        assert_eq!(Some(2), len(r"(foo)(bar)"));
+        assert_eq!(Some(2), len(r"(foo)+(bar)"));
+        assert_eq!(None, len(r"(foo)*(bar)"));
+        assert_eq!(Some(0), len(r"(foo)?{0}"));
+        assert_eq!(None, len(r"(foo)?{1}"));
+        assert_eq!(Some(1), len(r"(foo){1}"));
+        assert_eq!(Some(1), len(r"(foo){1,}"));
+        assert_eq!(Some(1), len(r"(foo){1,}?"));
+        assert_eq!(None, len(r"(foo){1,}??"));
+        assert_eq!(None, len(r"(foo){0,}"));
+        assert_eq!(Some(1), len(r"(foo)(?:bar)"));
+        assert_eq!(Some(2), len(r"(foo(?:bar)+)(?:baz(boo))"));
+        assert_eq!(Some(2), len(r"(?P<bar>foo)(?:bar)(bal|loon)"));
+        assert_eq!(
+            Some(2),
+            len(r#"<(a)[^>]+href="([^"]+)"|<(img)[^>]+src="([^"]+)""#)
+        );
+    }
+
+    #[test]
+    fn analysis_is_all_assertions() {
+        // Positive examples.
+        let p = props(r"\b");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"\B");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"^");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"$");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"\A");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"\z");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"$^\z\A\b\B");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"$|^|\z|\A|\b|\B");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"^$|$^");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"((\b)+())*^");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        // Negative examples.
+        let p = props(r"^a");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(1));
+    }
+
+    #[test]
+    fn analysis_look_set_prefix_any() {
+        let p = props(r"(?-u)(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))");
+        assert!(p.look_set_prefix_any().contains(Look::WordAscii));
+    }
+
+    #[test]
+    fn analysis_is_anchored() {
+        let is_start = |p| props(p).look_set_prefix().contains(Look::Start);
+        let is_end = |p| props(p).look_set_suffix().contains(Look::End);
+
+        // Positive examples.
+        assert!(is_start(r"^"));
+        assert!(is_end(r"$"));
+
+        assert!(is_start(r"^^"));
+        assert!(props(r"$$").look_set_suffix().contains(Look::End));
+
+        assert!(is_start(r"^$"));
+        assert!(is_end(r"^$"));
+
+        assert!(is_start(r"^foo"));
+        assert!(is_end(r"foo$"));
+
+        assert!(is_start(r"^foo|^bar"));
+        assert!(is_end(r"foo$|bar$"));
+
+        assert!(is_start(r"^(foo|bar)"));
+        assert!(is_end(r"(foo|bar)$"));
+
+        assert!(is_start(r"^+"));
+        assert!(is_end(r"$+"));
+        assert!(is_start(r"^++"));
+        assert!(is_end(r"$++"));
+        assert!(is_start(r"(^)+"));
+        assert!(is_end(r"($)+"));
+
+        assert!(is_start(r"$^"));
+        assert!(is_start(r"$^"));
+        assert!(is_start(r"$^|^$"));
+        assert!(is_end(r"$^|^$"));
+
+        assert!(is_start(r"\b^"));
+        assert!(is_end(r"$\b"));
+        assert!(is_start(r"^(?m:^)"));
+        assert!(is_end(r"(?m:$)$"));
+        assert!(is_start(r"(?m:^)^"));
+        assert!(is_end(r"$(?m:$)"));
+
+        // Negative examples.
+        assert!(!is_start(r"(?m)^"));
+        assert!(!is_end(r"(?m)$"));
+        assert!(!is_start(r"(?m:^$)|$^"));
+        assert!(!is_end(r"(?m:^$)|$^"));
+        assert!(!is_start(r"$^|(?m:^$)"));
+        assert!(!is_end(r"$^|(?m:^$)"));
+
+        assert!(!is_start(r"a^"));
+        assert!(!is_start(r"$a"));
+
+        assert!(!is_end(r"a^"));
+        assert!(!is_end(r"$a"));
+
+        assert!(!is_start(r"^foo|bar"));
+        assert!(!is_end(r"foo|bar$"));
+
+        assert!(!is_start(r"^*"));
+        assert!(!is_end(r"$*"));
+        assert!(!is_start(r"^*+"));
+        assert!(!is_end(r"$*+"));
+        assert!(!is_start(r"^+*"));
+        assert!(!is_end(r"$+*"));
+        assert!(!is_start(r"(^)*"));
+        assert!(!is_end(r"($)*"));
+    }
+
+    #[test]
+    fn analysis_is_any_anchored() {
+        let is_start = |p| props(p).look_set().contains(Look::Start);
+        let is_end = |p| props(p).look_set().contains(Look::End);
+
+        // Positive examples.
+        assert!(is_start(r"^"));
+        assert!(is_end(r"$"));
+        assert!(is_start(r"\A"));
+        assert!(is_end(r"\z"));
+
+        // Negative examples.
+        assert!(!is_start(r"(?m)^"));
+        assert!(!is_end(r"(?m)$"));
+        assert!(!is_start(r"$"));
+        assert!(!is_end(r"^"));
+    }
+
+    #[test]
+    fn analysis_can_empty() {
+        // Positive examples.
+        let assert_empty =
+            |p| assert_eq!(Some(0), props_bytes(p).minimum_len());
+        assert_empty(r"");
+        assert_empty(r"()");
+        assert_empty(r"()*");
+        assert_empty(r"()+");
+        assert_empty(r"()?");
+        assert_empty(r"a*");
+        assert_empty(r"a?");
+        assert_empty(r"a{0}");
+        assert_empty(r"a{0,}");
+        assert_empty(r"a{0,1}");
+        assert_empty(r"a{0,10}");
+        #[cfg(feature = "unicode-gencat")]
+        assert_empty(r"\pL*");
+        assert_empty(r"a*|b");
+        assert_empty(r"b|a*");
+        assert_empty(r"a|");
+        assert_empty(r"|a");
+        assert_empty(r"a||b");
+        assert_empty(r"a*a?(abcd)*");
+        assert_empty(r"^");
+        assert_empty(r"$");
+        assert_empty(r"(?m)^");
+        assert_empty(r"(?m)$");
+        assert_empty(r"\A");
+        assert_empty(r"\z");
+        assert_empty(r"\B");
+        assert_empty(r"(?-u)\B");
+        assert_empty(r"\b");
+        assert_empty(r"(?-u)\b");
+
+        // Negative examples.
+        let assert_non_empty =
+            |p| assert_ne!(Some(0), props_bytes(p).minimum_len());
+        assert_non_empty(r"a+");
+        assert_non_empty(r"a{1}");
+        assert_non_empty(r"a{1,}");
+        assert_non_empty(r"a{1,2}");
+        assert_non_empty(r"a{1,10}");
+        assert_non_empty(r"b|a");
+        assert_non_empty(r"a*a+(abcd)*");
+        #[cfg(feature = "unicode-gencat")]
+        assert_non_empty(r"\P{any}");
+        assert_non_empty(r"[a--a]");
+        assert_non_empty(r"[a&&b]");
+    }
+
+    #[test]
+    fn analysis_is_literal() {
+        // Positive examples.
+        assert!(props(r"a").is_literal());
+        assert!(props(r"ab").is_literal());
+        assert!(props(r"abc").is_literal());
+        assert!(props(r"(?m)abc").is_literal());
+        assert!(props(r"(?:a)").is_literal());
+        assert!(props(r"foo(?:a)").is_literal());
+        assert!(props(r"(?:a)foo").is_literal());
+        assert!(props(r"[a]").is_literal());
+
+        // Negative examples.
+        assert!(!props(r"").is_literal());
+        assert!(!props(r"^").is_literal());
+        assert!(!props(r"a|b").is_literal());
+        assert!(!props(r"(a)").is_literal());
+        assert!(!props(r"a+").is_literal());
+        assert!(!props(r"foo(a)").is_literal());
+        assert!(!props(r"(a)foo").is_literal());
+        assert!(!props(r"[ab]").is_literal());
+    }
+
+    #[test]
+    fn analysis_is_alternation_literal() {
+        // Positive examples.
+        assert!(props(r"a").is_alternation_literal());
+        assert!(props(r"ab").is_alternation_literal());
+        assert!(props(r"abc").is_alternation_literal());
+        assert!(props(r"(?m)abc").is_alternation_literal());
+        assert!(props(r"foo|bar").is_alternation_literal());
+        assert!(props(r"foo|bar|baz").is_alternation_literal());
+        assert!(props(r"[a]").is_alternation_literal());
+        assert!(props(r"(?:ab)|cd").is_alternation_literal());
+        assert!(props(r"ab|(?:cd)").is_alternation_literal());
+
+        // Negative examples.
+        assert!(!props(r"").is_alternation_literal());
+        assert!(!props(r"^").is_alternation_literal());
+        assert!(!props(r"(a)").is_alternation_literal());
+        assert!(!props(r"a+").is_alternation_literal());
+        assert!(!props(r"foo(a)").is_alternation_literal());
+        assert!(!props(r"(a)foo").is_alternation_literal());
+        assert!(!props(r"[ab]").is_alternation_literal());
+        assert!(!props(r"[ab]|b").is_alternation_literal());
+        assert!(!props(r"a|[ab]").is_alternation_literal());
+        assert!(!props(r"(a)|b").is_alternation_literal());
+        assert!(!props(r"a|(b)").is_alternation_literal());
+        assert!(!props(r"a|b").is_alternation_literal());
+        assert!(!props(r"a|b|c").is_alternation_literal());
+        assert!(!props(r"[a]|b").is_alternation_literal());
+        assert!(!props(r"a|[b]").is_alternation_literal());
+        assert!(!props(r"(?:a)|b").is_alternation_literal());
+        assert!(!props(r"a|(?:b)").is_alternation_literal());
+        assert!(!props(r"(?:z|xx)@|xx").is_alternation_literal());
+    }
+
+    // This tests that the smart Hir::repetition constructors does some basic
+    // simplifications.
+    #[test]
+    fn smart_repetition() {
+        assert_eq!(t(r"a{0}"), Hir::empty());
+        assert_eq!(t(r"a{1}"), hir_lit("a"));
+        assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate));
+    }
+
+    // This tests that the smart Hir::concat constructor simplifies the given
+    // exprs in a way we expect.
+    #[test]
+    fn smart_concat() {
+        assert_eq!(t(""), Hir::empty());
+        assert_eq!(t("(?:)"), Hir::empty());
+        assert_eq!(t("abc"), hir_lit("abc"));
+        assert_eq!(t("(?:foo)(?:bar)"), hir_lit("foobar"));
+        assert_eq!(t("quux(?:foo)(?:bar)baz"), hir_lit("quuxfoobarbaz"));
+        assert_eq!(
+            t("foo(?:bar^baz)quux"),
+            hir_cat(vec![
+                hir_lit("foobar"),
+                hir_look(hir::Look::Start),
+                hir_lit("bazquux"),
+            ])
+        );
+        assert_eq!(
+            t("foo(?:ba(?:r^b)az)quux"),
+            hir_cat(vec![
+                hir_lit("foobar"),
+                hir_look(hir::Look::Start),
+                hir_lit("bazquux"),
+            ])
+        );
+    }
+
+    // This tests that the smart Hir::alternation constructor simplifies the
+    // given exprs in a way we expect.
+    #[test]
+    fn smart_alternation() {
+        assert_eq!(
+            t("(?:foo)|(?:bar)"),
+            hir_alt(vec![hir_lit("foo"), hir_lit("bar")])
+        );
+        assert_eq!(
+            t("quux|(?:abc|def|xyz)|baz"),
+            hir_alt(vec![
+                hir_lit("quux"),
+                hir_lit("abc"),
+                hir_lit("def"),
+                hir_lit("xyz"),
+                hir_lit("baz"),
+            ])
+        );
+        assert_eq!(
+            t("quux|(?:abc|(?:def|mno)|xyz)|baz"),
+            hir_alt(vec![
+                hir_lit("quux"),
+                hir_lit("abc"),
+                hir_lit("def"),
+                hir_lit("mno"),
+                hir_lit("xyz"),
+                hir_lit("baz"),
+            ])
+        );
+        assert_eq!(
+            t("a|b|c|d|e|f|x|y|z"),
+            hir_uclass(&[('a', 'f'), ('x', 'z')]),
+        );
+        // Tests that we lift common prefixes out of an alternation.
+        assert_eq!(
+            t("[A-Z]foo|[A-Z]quux"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![hir_lit("foo"), hir_lit("quux")]),
+            ]),
+        );
+        assert_eq!(
+            t("[A-Z][A-Z]|[A-Z]quux"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![hir_uclass(&[('A', 'Z')]), hir_lit("quux")]),
+            ]),
+        );
+        assert_eq!(
+            t("[A-Z][A-Z]|[A-Z][A-Z]quux"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![Hir::empty(), hir_lit("quux")]),
+            ]),
+        );
+        assert_eq!(
+            t("[A-Z]foo|[A-Z]foobar"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![hir_lit("foo"), hir_lit("foobar")]),
+            ]),
+        );
+    }
+
+    #[test]
+    fn regression_alt_empty_concat() {
+        use crate::ast::{self, Ast};
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::alternation(ast::Alternation {
+            span,
+            asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::empty()), t.translate("", &ast));
+    }
+
+    #[test]
+    fn regression_empty_alt() {
+        use crate::ast::{self, Ast};
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::concat(ast::Concat {
+            span,
+            asts: vec![Ast::alternation(ast::Alternation {
+                span,
+                asts: vec![],
+            })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::fail()), t.translate("", &ast));
+    }
+
+    #[test]
+    fn regression_singleton_alt() {
+        use crate::{
+            ast::{self, Ast},
+            hir::Dot,
+        };
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::concat(ast::Concat {
+            span,
+            asts: vec![Ast::alternation(ast::Alternation {
+                span,
+                asts: vec![Ast::dot(span)],
+            })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast));
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168
+    #[test]
+    fn regression_fuzz_match() {
+        let pat = "[(\u{6} \0-\u{afdf5}]  \0 ";
+        let ast = ParserBuilder::new()
+            .octal(false)
+            .ignore_whitespace(true)
+            .build()
+            .parse(pat)
+            .unwrap();
+        let hir = TranslatorBuilder::new()
+            .utf8(true)
+            .case_insensitive(false)
+            .multi_line(false)
+            .dot_matches_new_line(false)
+            .swap_greed(true)
+            .unicode(true)
+            .build()
+            .translate(pat, &ast)
+            .unwrap();
+        assert_eq!(
+            hir,
+            Hir::concat(vec![
+                hir_uclass(&[('\0', '\u{afdf5}')]),
+                hir_lit("\0"),
+            ])
+        );
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155
+    #[cfg(feature = "unicode")]
+    #[test]
+    fn regression_fuzz_difference1() {
+        let pat = r"\W\W|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*";
+        let _ = t(pat); // shouldn't panic
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153
+    #[test]
+    fn regression_fuzz_char_decrement1() {
+        let pat = "w[w[^w?\rw\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0*\0\0\u{1}\0]\0\0-*\0][^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0x\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\0\0*??\0\u{7f}{2}\u{10}??\0\0\0\0\0\0\0\0\0\u{3}\0\0\0}\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\u{1}\0]\0\u{1}\u{1}H-i]-]\0\0\0\0\u{1}\0]\0\0\0\u{1}\0]\0\0-*\0\0\0\0\u{1}9-\u{7f}]\0'|-\u{7f}]\0'|(?i-ux)[-\u{7f}]\0'\u{3}\0\0\0}\0-*\0]<D\0\0\0\0\0\0\u{1}]\0\0\0\0]\0\0-*\0]\0\0 ";
+        let _ = t(pat); // shouldn't panic
+    }
+}