6 files changed, 12203 insertions, 0 deletions
diff --git a/vendor/regex-syntax/src/hir/interval.rs b/vendor/regex-syntax/src/hir/interval.rs
new file mode 100644
index 0000000..e063390
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/interval.rs
@@ -0,0 +1,581 @@
+use core::{char, cmp, fmt::Debug, slice};
+
+use alloc::vec::Vec;
+
+use crate::unicode;
+
+// This module contains an *internal* implementation of interval sets.
+//
+// The primary invariant that interval sets guards is canonical ordering. That
+// is, every interval set contains an ordered sequence of intervals where
+// no two intervals are overlapping or adjacent. While this invariant is
+// occasionally broken within the implementation, it should be impossible for
+// callers to observe it.
+//
+// Since case folding (as implemented below) breaks that invariant, we roll
+// that into this API even though it is a little out of place in an otherwise
+// generic interval set. (Hence the reason why the `unicode` module is imported
+// here.)
+//
+// Some of the implementation complexity here is a result of me wanting to
+// preserve the sequential representation without using additional memory.
+// In many cases, we do use linear extra memory, but it is at most 2x and it
+// is amortized. If we relaxed the memory requirements, this implementation
+// could become much simpler. The extra memory is honestly probably OK, but
+// character classes (especially of the Unicode variety) can become quite
+// large, and it would be nice to keep regex compilation snappy even in debug
+// builds. (In the past, I have been careless with this area of code and it has
+// caused slow regex compilations in debug mode, so this isn't entirely
+// unwarranted.)
+//
+// Tests on this are relegated to the public API of HIR in src/hir.rs.
+
+#[derive(Clone, Debug)]
+pub struct IntervalSet<I> {
+    /// A sorted set of non-overlapping ranges.
+    ranges: Vec<I>,
+    /// While not required at all for correctness, we keep track of whether an
+    /// interval set has been case folded or not. This helps us avoid doing
+    /// redundant work if, for example, a set has already been cased folded.
+    /// And note that whether a set is folded or not is preserved through
+    /// all of the pairwise set operations. That is, if both interval sets
+    /// have been case folded, then any of difference, union, intersection or
+    /// symmetric difference all produce a case folded set.
+    ///
+    /// Note that when this is true, it *must* be the case that the set is case
+    /// folded. But when it's false, the set *may* be case folded. In other
+    /// words, we only set this to true when we know it to be case, but we're
+    /// okay with it being false if it would otherwise be costly to determine
+    /// whether it should be true. This means code cannot assume that a false
+    /// value necessarily indicates that the set is not case folded.
+    ///
+    /// Bottom line: this is a performance optimization.
+    folded: bool,
+}
+
+impl<I: Interval> Eq for IntervalSet<I> {}
+
+// We implement PartialEq manually so that we don't consider the set's internal
+// 'folded' property to be part of its identity. The 'folded' property is
+// strictly an optimization.
+impl<I: Interval> PartialEq for IntervalSet<I> {
+    fn eq(&self, other: &IntervalSet<I>) -> bool {
+        self.ranges.eq(&other.ranges)
+    }
+}
+
+impl<I: Interval> IntervalSet<I> {
+    /// Create a new set from a sequence of intervals. Each interval is
+    /// specified as a pair of bounds, where both bounds are inclusive.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap.
+    pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> {
+        let ranges: Vec<I> = intervals.into_iter().collect();
+        // An empty set is case folded.
+        let folded = ranges.is_empty();
+        let mut set = IntervalSet { ranges, folded };
+        set.canonicalize();
+        set
+    }
+
+    /// Add a new interval to this set.
+    pub fn push(&mut self, interval: I) {
+        // TODO: This could be faster. e.g., Push the interval such that
+        // it preserves canonicalization.
+        self.ranges.push(interval);
+        self.canonicalize();
+        // We don't know whether the new interval added here is considered
+        // case folded, so we conservatively assume that the entire set is
+        // no longer case folded if it was previously.
+        self.folded = false;
+    }
+
+    /// Return an iterator over all intervals in this set.
+    ///
+    /// The iterator yields intervals in ascending order.
+    pub fn iter(&self) -> IntervalSetIter<'_, I> {
+        IntervalSetIter(self.ranges.iter())
+    }
+
+    /// Return an immutable slice of intervals in this set.
+    ///
+    /// The sequence returned is in canonical ordering.
+    pub fn intervals(&self) -> &[I] {
+        &self.ranges
+    }
+
+    /// Expand this interval set such that it contains all case folded
+    /// characters. For example, if this class consists of the range `a-z`,
+    /// then applying case folding will result in the class containing both the
+    /// ranges `a-z` and `A-Z`.
+    ///
+    /// This returns an error if the necessary case mapping data is not
+    /// available.
+    pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> {
+        if self.folded {
+            return Ok(());
+        }
+        let len = self.ranges.len();
+        for i in 0..len {
+            let range = self.ranges[i];
+            if let Err(err) = range.case_fold_simple(&mut self.ranges) {
+                self.canonicalize();
+                return Err(err);
+            }
+        }
+        self.canonicalize();
+        self.folded = true;
+        Ok(())
+    }
+
+    /// Union this set with the given set, in place.
+    pub fn union(&mut self, other: &IntervalSet<I>) {
+        if other.ranges.is_empty() || self.ranges == other.ranges {
+            return;
+        }
+        // This could almost certainly be done more efficiently.
+        self.ranges.extend(&other.ranges);
+        self.canonicalize();
+        self.folded = self.folded && other.folded;
+    }
+
+    /// Intersect this set with the given set, in place.
+    pub fn intersect(&mut self, other: &IntervalSet<I>) {
+        if self.ranges.is_empty() {
+            return;
+        }
+        if other.ranges.is_empty() {
+            self.ranges.clear();
+            // An empty set is case folded.
+            self.folded = true;
+            return;
+        }
+
+        // There should be a way to do this in-place with constant memory,
+        // but I couldn't figure out a simple way to do it. So just append
+        // the intersection to the end of this range, and then drain it before
+        // we're done.
+        let drain_end = self.ranges.len();
+
+        let mut ita = 0..drain_end;
+        let mut itb = 0..other.ranges.len();
+        let mut a = ita.next().unwrap();
+        let mut b = itb.next().unwrap();
+        loop {
+            if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) {
+                self.ranges.push(ab);
+            }
+            let (it, aorb) =
+                if self.ranges[a].upper() < other.ranges[b].upper() {
+                    (&mut ita, &mut a)
+                } else {
+                    (&mut itb, &mut b)
+                };
+            match it.next() {
+                Some(v) => *aorb = v,
+                None => break,
+            }
+        }
+        self.ranges.drain(..drain_end);
+        self.folded = self.folded && other.folded;
+    }
+
+    /// Subtract the given set from this set, in place.
+    pub fn difference(&mut self, other: &IntervalSet<I>) {
+        if self.ranges.is_empty() || other.ranges.is_empty() {
+            return;
+        }
+
+        // This algorithm is (to me) surprisingly complex. A search of the
+        // interwebs indicate that this is a potentially interesting problem.
+        // Folks seem to suggest interval or segment trees, but I'd like to
+        // avoid the overhead (both runtime and conceptual) of that.
+        //
+        // The following is basically my Shitty First Draft. Therefore, in
+        // order to grok it, you probably need to read each line carefully.
+        // Simplifications are most welcome!
+        //
+        // Remember, we can assume the canonical format invariant here, which
+        // says that all ranges are sorted, not overlapping and not adjacent in
+        // each class.
+        let drain_end = self.ranges.len();
+        let (mut a, mut b) = (0, 0);
+        'LOOP: while a < drain_end && b < other.ranges.len() {
+            // Basically, the easy cases are when neither range overlaps with
+            // each other. If the `b` range is less than our current `a`
+            // range, then we can skip it and move on.
+            if other.ranges[b].upper() < self.ranges[a].lower() {
+                b += 1;
+                continue;
+            }
+            // ... similarly for the `a` range. If it's less than the smallest
+            // `b` range, then we can add it as-is.
+            if self.ranges[a].upper() < other.ranges[b].lower() {
+                let range = self.ranges[a];
+                self.ranges.push(range);
+                a += 1;
+                continue;
+            }
+            // Otherwise, we have overlapping ranges.
+            assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b]));
+
+            // This part is tricky and was non-obvious to me without looking
+            // at explicit examples (see the tests). The trickiness stems from
+            // two things: 1) subtracting a range from another range could
+            // yield two ranges and 2) after subtracting a range, it's possible
+            // that future ranges can have an impact. The loop below advances
+            // the `b` ranges until they can't possible impact the current
+            // range.
+            //
+            // For example, if our `a` range is `a-t` and our next three `b`
+            // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply
+            // subtraction three times before moving on to the next `a` range.
+            let mut range = self.ranges[a];
+            while b < other.ranges.len()
+                && !range.is_intersection_empty(&other.ranges[b])
+            {
+                let old_range = range;
+                range = match range.difference(&other.ranges[b]) {
+                    (None, None) => {
+                        // We lost the entire range, so move on to the next
+                        // without adding this one.
+                        a += 1;
+                        continue 'LOOP;
+                    }
+                    (Some(range1), None) | (None, Some(range1)) => range1,
+                    (Some(range1), Some(range2)) => {
+                        self.ranges.push(range1);
+                        range2
+                    }
+                };
+                // It's possible that the `b` range has more to contribute
+                // here. In particular, if it is greater than the original
+                // range, then it might impact the next `a` range *and* it
+                // has impacted the current `a` range as much as possible,
+                // so we can quit. We don't bump `b` so that the next `a`
+                // range can apply it.
+                if other.ranges[b].upper() > old_range.upper() {
+                    break;
+                }
+                // Otherwise, the next `b` range might apply to the current
+                // `a` range.
+                b += 1;
+            }
+            self.ranges.push(range);
+            a += 1;
+        }
+        while a < drain_end {
+            let range = self.ranges[a];
+            self.ranges.push(range);
+            a += 1;
+        }
+        self.ranges.drain(..drain_end);
+        self.folded = self.folded && other.folded;
+    }
+
+    /// Compute the symmetric difference of the two sets, in place.
+    ///
+    /// This computes the symmetric difference of two interval sets. This
+    /// removes all elements in this set that are also in the given set,
+    /// but also adds all elements from the given set that aren't in this
+    /// set. That is, the set will contain all elements in either set,
+    /// but will not contain any elements that are in both sets.
+    pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) {
+        // TODO(burntsushi): Fix this so that it amortizes allocation.
+        let mut intersection = self.clone();
+        intersection.intersect(other);
+        self.union(other);
+        self.difference(&intersection);
+    }
+
+    /// Negate this interval set.
+    ///
+    /// For all `x` where `x` is any element, if `x` was in this set, then it
+    /// will not be in this set after negation.
+    pub fn negate(&mut self) {
+        if self.ranges.is_empty() {
+            let (min, max) = (I::Bound::min_value(), I::Bound::max_value());
+            self.ranges.push(I::create(min, max));
+            // The set containing everything must case folded.
+            self.folded = true;
+            return;
+        }
+
+        // There should be a way to do this in-place with constant memory,
+        // but I couldn't figure out a simple way to do it. So just append
+        // the negation to the end of this range, and then drain it before
+        // we're done.
+        let drain_end = self.ranges.len();
+
+        // We do checked arithmetic below because of the canonical ordering
+        // invariant.
+        if self.ranges[0].lower() > I::Bound::min_value() {
+            let upper = self.ranges[0].lower().decrement();
+            self.ranges.push(I::create(I::Bound::min_value(), upper));
+        }
+        for i in 1..drain_end {
+            let lower = self.ranges[i - 1].upper().increment();
+            let upper = self.ranges[i].lower().decrement();
+            self.ranges.push(I::create(lower, upper));
+        }
+        if self.ranges[drain_end - 1].upper() < I::Bound::max_value() {
+            let lower = self.ranges[drain_end - 1].upper().increment();
+            self.ranges.push(I::create(lower, I::Bound::max_value()));
+        }
+        self.ranges.drain(..drain_end);
+        // We don't need to update whether this set is folded or not, because
+        // it is conservatively preserved through negation. Namely, if a set
+        // is not folded, then it is possible that its negation is folded, for
+        // example, [^☃]. But we're fine with assuming that the set is not
+        // folded in that case. (`folded` permits false negatives but not false
+        // positives.)
+        //
+        // But what about when a set is folded, is its negation also
+        // necessarily folded? Yes. Because if a set is folded, then for every
+        // character in the set, it necessarily included its equivalence class
+        // of case folded characters. Negating it in turn means that all
+        // equivalence classes in the set are negated, and any equivalence
+        // class that was previously not in the set is now entirely in the set.
+    }
+
+    /// Converts this set into a canonical ordering.
+    fn canonicalize(&mut self) {
+        if self.is_canonical() {
+            return;
+        }
+        self.ranges.sort();
+        assert!(!self.ranges.is_empty());
+
+        // Is there a way to do this in-place with constant memory? I couldn't
+        // figure out a way to do it. So just append the canonicalization to
+        // the end of this range, and then drain it before we're done.
+        let drain_end = self.ranges.len();
+        for oldi in 0..drain_end {
+            // If we've added at least one new range, then check if we can
+            // merge this range in the previously added range.
+            if self.ranges.len() > drain_end {
+                let (last, rest) = self.ranges.split_last_mut().unwrap();
+                if let Some(union) = last.union(&rest[oldi]) {
+                    *last = union;
+                    continue;
+                }
+            }
+            let range = self.ranges[oldi];
+            self.ranges.push(range);
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Returns true if and only if this class is in a canonical ordering.
+    fn is_canonical(&self) -> bool {
+        for pair in self.ranges.windows(2) {
+            if pair[0] >= pair[1] {
+                return false;
+            }
+            if pair[0].is_contiguous(&pair[1]) {
+                return false;
+            }
+        }
+        true
+    }
+}
+
+/// An iterator over intervals.
+#[derive(Debug)]
+pub struct IntervalSetIter<'a, I>(slice::Iter<'a, I>);
+
+impl<'a, I> Iterator for IntervalSetIter<'a, I> {
+    type Item = &'a I;
+
+    fn next(&mut self) -> Option<&'a I> {
+        self.0.next()
+    }
+}
+
+pub trait Interval:
+    Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord
+{
+    type Bound: Bound;
+
+    fn lower(&self) -> Self::Bound;
+    fn upper(&self) -> Self::Bound;
+    fn set_lower(&mut self, bound: Self::Bound);
+    fn set_upper(&mut self, bound: Self::Bound);
+    fn case_fold_simple(
+        &self,
+        intervals: &mut Vec<Self>,
+    ) -> Result<(), unicode::CaseFoldError>;
+
+    /// Create a new interval.
+    fn create(lower: Self::Bound, upper: Self::Bound) -> Self {
+        let mut int = Self::default();
+        if lower <= upper {
+            int.set_lower(lower);
+            int.set_upper(upper);
+        } else {
+            int.set_lower(upper);
+            int.set_upper(lower);
+        }
+        int
+    }
+
+    /// Union the given overlapping range into this range.
+    ///
+    /// If the two ranges aren't contiguous, then this returns `None`.
+    fn union(&self, other: &Self) -> Option<Self> {
+        if !self.is_contiguous(other) {
+            return None;
+        }
+        let lower = cmp::min(self.lower(), other.lower());
+        let upper = cmp::max(self.upper(), other.upper());
+        Some(Self::create(lower, upper))
+    }
+
+    /// Intersect this range with the given range and return the result.
+    ///
+    /// If the intersection is empty, then this returns `None`.
+    fn intersect(&self, other: &Self) -> Option<Self> {
+        let lower = cmp::max(self.lower(), other.lower());
+        let upper = cmp::min(self.upper(), other.upper());
+        if lower <= upper {
+            Some(Self::create(lower, upper))
+        } else {
+            None
+        }
+    }
+
+    /// Subtract the given range from this range and return the resulting
+    /// ranges.
+    ///
+    /// If subtraction would result in an empty range, then no ranges are
+    /// returned.
+    fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) {
+        if self.is_subset(other) {
+            return (None, None);
+        }
+        if self.is_intersection_empty(other) {
+            return (Some(self.clone()), None);
+        }
+        let add_lower = other.lower() > self.lower();
+        let add_upper = other.upper() < self.upper();
+        // We know this because !self.is_subset(other) and the ranges have
+        // a non-empty intersection.
+        assert!(add_lower || add_upper);
+        let mut ret = (None, None);
+        if add_lower {
+            let upper = other.lower().decrement();
+            ret.0 = Some(Self::create(self.lower(), upper));
+        }
+        if add_upper {
+            let lower = other.upper().increment();
+            let range = Self::create(lower, self.upper());
+            if ret.0.is_none() {
+                ret.0 = Some(range);
+            } else {
+                ret.1 = Some(range);
+            }
+        }
+        ret
+    }
+
+    /// Compute the symmetric difference the given range from this range. This
+    /// returns the union of the two ranges minus its intersection.
+    fn symmetric_difference(
+        &self,
+        other: &Self,
+    ) -> (Option<Self>, Option<Self>) {
+        let union = match self.union(other) {
+            None => return (Some(self.clone()), Some(other.clone())),
+            Some(union) => union,
+        };
+        let intersection = match self.intersect(other) {
+            None => return (Some(self.clone()), Some(other.clone())),
+            Some(intersection) => intersection,
+        };
+        union.difference(&intersection)
+    }
+
+    /// Returns true if and only if the two ranges are contiguous. Two ranges
+    /// are contiguous if and only if the ranges are either overlapping or
+    /// adjacent.
+    fn is_contiguous(&self, other: &Self) -> bool {
+        let lower1 = self.lower().as_u32();
+        let upper1 = self.upper().as_u32();
+        let lower2 = other.lower().as_u32();
+        let upper2 = other.upper().as_u32();
+        cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1)
+    }
+
+    /// Returns true if and only if the intersection of this range and the
+    /// other range is empty.
+    fn is_intersection_empty(&self, other: &Self) -> bool {
+        let (lower1, upper1) = (self.lower(), self.upper());
+        let (lower2, upper2) = (other.lower(), other.upper());
+        cmp::max(lower1, lower2) > cmp::min(upper1, upper2)
+    }
+
+    /// Returns true if and only if this range is a subset of the other range.
+    fn is_subset(&self, other: &Self) -> bool {
+        let (lower1, upper1) = (self.lower(), self.upper());
+        let (lower2, upper2) = (other.lower(), other.upper());
+        (lower2 <= lower1 && lower1 <= upper2)
+            && (lower2 <= upper1 && upper1 <= upper2)
+    }
+}
+
+pub trait Bound:
+    Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord
+{
+    fn min_value() -> Self;
+    fn max_value() -> Self;
+    fn as_u32(self) -> u32;
+    fn increment(self) -> Self;
+    fn decrement(self) -> Self;
+}
+
+impl Bound for u8 {
+    fn min_value() -> Self {
+        u8::MIN
+    }
+    fn max_value() -> Self {
+        u8::MAX
+    }
+    fn as_u32(self) -> u32 {
+        u32::from(self)
+    }
+    fn increment(self) -> Self {
+        self.checked_add(1).unwrap()
+    }
+    fn decrement(self) -> Self {
+        self.checked_sub(1).unwrap()
+    }
+}
+
+impl Bound for char {
+    fn min_value() -> Self {
+        '\x00'
+    }
+    fn max_value() -> Self {
+        '\u{10FFFF}'
+    }
+    fn as_u32(self) -> u32 {
+        u32::from(self)
+    }
+
+    fn increment(self) -> Self {
+        match self {
+            '\u{D7FF}' => '\u{E000}',
+            c => char::from_u32(u32::from(c).checked_add(1).unwrap()).unwrap(),
+        }
+    }
+
+    fn decrement(self) -> Self {
+        match self {
+            '\u{E000}' => '\u{D7FF}',
+            c => char::from_u32(u32::from(c).checked_sub(1).unwrap()).unwrap(),
+        }
+    }
+}
+
+// Tests for interval sets are written in src/hir.rs against the public API.
diff --git a/vendor/regex-syntax/src/hir/literal.rs b/vendor/regex-syntax/src/hir/literal.rs
new file mode 100644
index 0000000..a5a3737
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/literal.rs
@@ -0,0 +1,3214 @@
+/*!
+Provides literal extraction from `Hir` expressions.
+
+An [`Extractor`] pulls literals out of [`Hir`] expressions and returns a
+[`Seq`] of [`Literal`]s.
+
+The purpose of literal extraction is generally to provide avenues for
+optimizing regex searches. The main idea is that substring searches can be an
+order of magnitude faster than a regex search. Therefore, if one can execute
+a substring search to find candidate match locations and only run the regex
+search at those locations, then it is possible for huge improvements in
+performance to be realized.
+
+With that said, literal optimizations are generally a black art because even
+though substring search is generally faster, if the number of candidates
+produced is high, then it can create a lot of overhead by ping-ponging between
+the substring search and the regex search.
+
+Here are some heuristics that might be used to help increase the chances of
+effective literal optimizations:
+
+* Stick to small [`Seq`]s. If you search for too many literals, it's likely
+to lead to substring search that is only a little faster than a regex search,
+and thus the overhead of using literal optimizations in the first place might
+make things slower overall.
+* The literals in your [`Seq`] shouldn't be too short. In general, longer is
+better. A sequence corresponding to single bytes that occur frequently in the
+haystack, for example, is probably a bad literal optimization because it's
+likely to produce many false positive candidates. Longer literals are less
+likely to match, and thus probably produce fewer false positives.
+* If it's possible to estimate the approximate frequency of each byte according
+to some pre-computed background distribution, it is possible to compute a score
+of how "good" a `Seq` is. If a `Seq` isn't good enough, you might consider
+skipping the literal optimization and just use the regex engine.
+
+(It should be noted that there are always pathological cases that can make
+any kind of literal optimization be a net slower result. This is why it
+might be a good idea to be conservative, or to even provide a means for
+literal optimizations to be dynamically disabled if they are determined to be
+ineffective according to some measure.)
+
+You're encouraged to explore the methods on [`Seq`], which permit shrinking
+the size of sequences in a preference-order preserving fashion.
+
+Finally, note that it isn't strictly necessary to use an [`Extractor`]. Namely,
+an `Extractor` only uses public APIs of the [`Seq`] and [`Literal`] types,
+so it is possible to implement your own extractor. For example, for n-grams
+or "inner" literals (i.e., not prefix or suffix literals). The `Extractor`
+is mostly responsible for the case analysis over `Hir` expressions. Much of
+the "trickier" parts are how to combine literal sequences, and that is all
+implemented on [`Seq`].
+*/
+
+use core::{cmp, mem, num::NonZeroUsize};
+
+use alloc::{vec, vec::Vec};
+
+use crate::hir::{self, Hir};
+
+/// Extracts prefix or suffix literal sequences from [`Hir`] expressions.
+///
+/// Literal extraction is based on the following observations:
+///
+/// * Many regexes start with one or a small number of literals.
+/// * Substring search for literals is often much faster (sometimes by an order
+/// of magnitude) than a regex search.
+///
+/// Thus, in many cases, one can search for literals to find candidate starting
+/// locations of a match, and then only run the full regex engine at each such
+/// location instead of over the full haystack.
+///
+/// The main downside of literal extraction is that it can wind up causing a
+/// search to be slower overall. For example, if there are many matches or if
+/// there are many candidates that don't ultimately lead to a match, then a
+/// lot of overhead will be spent in shuffing back-and-forth between substring
+/// search and the regex engine. This is the fundamental reason why literal
+/// optimizations for regex patterns is sometimes considered a "black art."
+///
+/// # Look-around assertions
+///
+/// Literal extraction treats all look-around assertions as-if they match every
+/// empty string. So for example, the regex `\bquux\b` will yield a sequence
+/// containing a single exact literal `quux`. However, not all occurrences
+/// of `quux` correspond to a match a of the regex. For example, `\bquux\b`
+/// does not match `ZquuxZ` anywhere because `quux` does not fall on a word
+/// boundary.
+///
+/// In effect, if your regex contains look-around assertions, then a match of
+/// an exact literal does not necessarily mean the regex overall matches. So
+/// you may still need to run the regex engine in such cases to confirm the
+/// match.
+///
+/// The precise guarantee you get from a literal sequence is: if every literal
+/// in the sequence is exact and the original regex contains zero look-around
+/// assertions, then a preference-order multi-substring search of those
+/// literals will precisely match a preference-order search of the original
+/// regex.
+///
+/// # Example
+///
+/// This shows how to extract prefixes:
+///
+/// ```
+/// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
+///
+/// let hir = parse(r"(a|b|c)(x|y|z)[A-Z]+foo")?;
+/// let got = Extractor::new().extract(&hir);
+/// // All literals returned are "inexact" because none of them reach the
+/// // match state.
+/// let expected = Seq::from_iter([
+///     Literal::inexact("ax"),
+///     Literal::inexact("ay"),
+///     Literal::inexact("az"),
+///     Literal::inexact("bx"),
+///     Literal::inexact("by"),
+///     Literal::inexact("bz"),
+///     Literal::inexact("cx"),
+///     Literal::inexact("cy"),
+///     Literal::inexact("cz"),
+/// ]);
+/// assert_eq!(expected, got);
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// This shows how to extract suffixes:
+///
+/// ```
+/// use regex_syntax::{
+///     hir::literal::{Extractor, ExtractKind, Literal, Seq},
+///     parse,
+/// };
+///
+/// let hir = parse(r"foo|[A-Z]+bar")?;
+/// let got = Extractor::new().kind(ExtractKind::Suffix).extract(&hir);
+/// // Since 'foo' gets to a match state, it is considered exact. But 'bar'
+/// // does not because of the '[A-Z]+', and thus is marked inexact.
+/// let expected = Seq::from_iter([
+///     Literal::exact("foo"),
+///     Literal::inexact("bar"),
+/// ]);
+/// assert_eq!(expected, got);
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Debug)]
+pub struct Extractor {
+    kind: ExtractKind,
+    limit_class: usize,
+    limit_repeat: usize,
+    limit_literal_len: usize,
+    limit_total: usize,
+}
+
+impl Extractor {
+    /// Create a new extractor with a default configuration.
+    ///
+    /// The extractor can be optionally configured before calling
+    /// [`Extractor::extract`] to get a literal sequence.
+    pub fn new() -> Extractor {
+        Extractor {
+            kind: ExtractKind::Prefix,
+            limit_class: 10,
+            limit_repeat: 10,
+            limit_literal_len: 100,
+            limit_total: 250,
+        }
+    }
+
+    /// Execute the extractor and return a sequence of literals.
+    pub fn extract(&self, hir: &Hir) -> Seq {
+        use crate::hir::HirKind::*;
+
+        match *hir.kind() {
+            Empty | Look(_) => Seq::singleton(self::Literal::exact(vec![])),
+            Literal(hir::Literal(ref bytes)) => {
+                let mut seq =
+                    Seq::singleton(self::Literal::exact(bytes.to_vec()));
+                self.enforce_literal_len(&mut seq);
+                seq
+            }
+            Class(hir::Class::Unicode(ref cls)) => {
+                self.extract_class_unicode(cls)
+            }
+            Class(hir::Class::Bytes(ref cls)) => self.extract_class_bytes(cls),
+            Repetition(ref rep) => self.extract_repetition(rep),
+            Capture(hir::Capture { ref sub, .. }) => self.extract(sub),
+            Concat(ref hirs) => match self.kind {
+                ExtractKind::Prefix => self.extract_concat(hirs.iter()),
+                ExtractKind::Suffix => self.extract_concat(hirs.iter().rev()),
+            },
+            Alternation(ref hirs) => {
+                // Unlike concat, we always union starting from the beginning,
+                // since the beginning corresponds to the highest preference,
+                // which doesn't change based on forwards vs reverse.
+                self.extract_alternation(hirs.iter())
+            }
+        }
+    }
+
+    /// Set the kind of literal sequence to extract from an [`Hir`] expression.
+    ///
+    /// The default is to extract prefixes, but suffixes can be selected
+    /// instead. The contract for prefixes is that every match of the
+    /// corresponding `Hir` must start with one of the literals in the sequence
+    /// returned. Moreover, the _order_ of the sequence returned corresponds to
+    /// the preference order.
+    ///
+    /// Suffixes satisfy a similar contract in that every match of the
+    /// corresponding `Hir` must end with one of the literals in the sequence
+    /// returned. However, there is no guarantee that the literals are in
+    /// preference order.
+    ///
+    /// Remember that a sequence can be infinite. For example, unless the
+    /// limits are configured to be impractically large, attempting to extract
+    /// prefixes (or suffixes) for the pattern `[A-Z]` will return an infinite
+    /// sequence. Generally speaking, if the sequence returned is infinite,
+    /// then it is presumed to be unwise to do prefix (or suffix) optimizations
+    /// for the pattern.
+    pub fn kind(&mut self, kind: ExtractKind) -> &mut Extractor {
+        self.kind = kind;
+        self
+    }
+
+    /// Configure a limit on the length of the sequence that is permitted for
+    /// a character class. If a character class exceeds this limit, then the
+    /// sequence returned for it is infinite.
+    ///
+    /// This prevents classes like `[A-Z]` or `\pL` from getting turned into
+    /// huge and likely unproductive sequences of literals.
+    ///
+    /// # Example
+    ///
+    /// This example shows how this limit can be lowered to decrease the tolerance
+    /// for character classes being turned into literal sequences.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::literal::{Extractor, Seq}, parse};
+    ///
+    /// let hir = parse(r"[0-9]")?;
+    ///
+    /// let got = Extractor::new().extract(&hir);
+    /// let expected = Seq::new([
+    ///     "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
+    /// ]);
+    /// assert_eq!(expected, got);
+    ///
+    /// // Now let's shrink the limit and see how that changes things.
+    /// let got = Extractor::new().limit_class(4).extract(&hir);
+    /// let expected = Seq::infinite();
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn limit_class(&mut self, limit: usize) -> &mut Extractor {
+        self.limit_class = limit;
+        self
+    }
+
+    /// Configure a limit on the total number of repetitions that is permitted
+    /// before literal extraction is stopped.
+    ///
+    /// This is useful for limiting things like `(abcde){50}`, or more
+    /// insidiously, `(?:){1000000000}`. This limit prevents any one single
+    /// repetition from adding too much to a literal sequence.
+    ///
+    /// With this limit set, repetitions that exceed it will be stopped and any
+    /// literals extracted up to that point will be made inexact.
+    ///
+    /// # Example
+    ///
+    /// This shows how to decrease the limit and compares it with the default.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
+    ///
+    /// let hir = parse(r"(abc){8}")?;
+    ///
+    /// let got = Extractor::new().extract(&hir);
+    /// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]);
+    /// assert_eq!(expected, got);
+    ///
+    /// // Now let's shrink the limit and see how that changes things.
+    /// let got = Extractor::new().limit_repeat(4).extract(&hir);
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("abcabcabcabc"),
+    /// ]);
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn limit_repeat(&mut self, limit: usize) -> &mut Extractor {
+        self.limit_repeat = limit;
+        self
+    }
+
+    /// Configure a limit on the maximum length of any literal in a sequence.
+    ///
+    /// This is useful for limiting things like `(abcde){5}{5}{5}{5}`. While
+    /// each repetition or literal in that regex is small, when all the
+    /// repetitions are applied, one ends up with a literal of length `5^4 =
+    /// 625`.
+    ///
+    /// With this limit set, literals that exceed it will be made inexact and
+    /// thus prevented from growing.
+    ///
+    /// # Example
+    ///
+    /// This shows how to decrease the limit and compares it with the default.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
+    ///
+    /// let hir = parse(r"(abc){2}{2}{2}")?;
+    ///
+    /// let got = Extractor::new().extract(&hir);
+    /// let expected = Seq::new(["abcabcabcabcabcabcabcabc"]);
+    /// assert_eq!(expected, got);
+    ///
+    /// // Now let's shrink the limit and see how that changes things.
+    /// let got = Extractor::new().limit_literal_len(14).extract(&hir);
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("abcabcabcabcab"),
+    /// ]);
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn limit_literal_len(&mut self, limit: usize) -> &mut Extractor {
+        self.limit_literal_len = limit;
+        self
+    }
+
+    /// Configure a limit on the total number of literals that will be
+    /// returned.
+    ///
+    /// This is useful as a practical measure for avoiding the creation of
+    /// large sequences of literals. While the extractor will automatically
+    /// handle local creations of large sequences (for example, `[A-Z]` yields
+    /// an infinite sequence by default), large sequences can be created
+    /// through non-local means as well.
+    ///
+    /// For example, `[ab]{3}{3}` would yield a sequence of length `512 = 2^9`
+    /// despite each of the repetitions being small on their own. This limit
+    /// thus represents a "catch all" for avoiding locally small sequences from
+    /// combining into large sequences.
+    ///
+    /// # Example
+    ///
+    /// This example shows how reducing the limit will change the literal
+    /// sequence returned.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::literal::{Extractor, Literal, Seq}, parse};
+    ///
+    /// let hir = parse(r"[ab]{2}{2}")?;
+    ///
+    /// let got = Extractor::new().extract(&hir);
+    /// let expected = Seq::new([
+    ///     "aaaa", "aaab", "aaba", "aabb",
+    ///     "abaa", "abab", "abba", "abbb",
+    ///     "baaa", "baab", "baba", "babb",
+    ///     "bbaa", "bbab", "bbba", "bbbb",
+    /// ]);
+    /// assert_eq!(expected, got);
+    ///
+    /// // The default limit is not too big, but big enough to extract all
+    /// // literals from '[ab]{2}{2}'. If we shrink the limit to less than 16,
+    /// // then we'll get a truncated set. Notice that it returns a sequence of
+    /// // length 4 even though our limit was 10. This is because the sequence
+    /// // is difficult to increase without blowing the limit. Notice also
+    /// // that every literal in the sequence is now inexact because they were
+    /// // stripped of some suffix.
+    /// let got = Extractor::new().limit_total(10).extract(&hir);
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("aa"),
+    ///     Literal::inexact("ab"),
+    ///     Literal::inexact("ba"),
+    ///     Literal::inexact("bb"),
+    /// ]);
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn limit_total(&mut self, limit: usize) -> &mut Extractor {
+        self.limit_total = limit;
+        self
+    }
+
+    /// Extract a sequence from the given concatenation. Sequences from each of
+    /// the child HIR expressions are combined via cross product.
+    ///
+    /// This short circuits once the cross product turns into a sequence
+    /// containing only inexact literals.
+    fn extract_concat<'a, I: Iterator<Item = &'a Hir>>(&self, it: I) -> Seq {
+        let mut seq = Seq::singleton(self::Literal::exact(vec![]));
+        for hir in it {
+            // If every element in the sequence is inexact, then a cross
+            // product will always be a no-op. Thus, there is nothing else we
+            // can add to it and can quit early. Note that this also includes
+            // infinite sequences.
+            if seq.is_inexact() {
+                break;
+            }
+            // Note that 'cross' also dispatches based on whether we're
+            // extracting prefixes or suffixes.
+            seq = self.cross(seq, &mut self.extract(hir));
+        }
+        seq
+    }
+
+    /// Extract a sequence from the given alternation.
+    ///
+    /// This short circuits once the union turns into an infinite sequence.
+    fn extract_alternation<'a, I: Iterator<Item = &'a Hir>>(
+        &self,
+        it: I,
+    ) -> Seq {
+        let mut seq = Seq::empty();
+        for hir in it {
+            // Once our 'seq' is infinite, every subsequent union
+            // operation on it will itself always result in an
+            // infinite sequence. Thus, it can never change and we can
+            // short-circuit.
+            if !seq.is_finite() {
+                break;
+            }
+            seq = self.union(seq, &mut self.extract(hir));
+        }
+        seq
+    }
+
+    /// Extract a sequence of literals from the given repetition. We do our
+    /// best, Some examples:
+    ///
+    ///   'a*'    => [inexact(a), exact("")]
+    ///   'a*?'   => [exact(""), inexact(a)]
+    ///   'a+'    => [inexact(a)]
+    ///   'a{3}'  => [exact(aaa)]
+    ///   'a{3,5} => [inexact(aaa)]
+    ///
+    /// The key here really is making sure we get the 'inexact' vs 'exact'
+    /// attributes correct on each of the literals we add. For example, the
+    /// fact that 'a*' gives us an inexact 'a' and an exact empty string means
+    /// that a regex like 'ab*c' will result in [inexact(ab), exact(ac)]
+    /// literals being extracted, which might actually be a better prefilter
+    /// than just 'a'.
+    fn extract_repetition(&self, rep: &hir::Repetition) -> Seq {
+        let mut subseq = self.extract(&rep.sub);
+        match *rep {
+            hir::Repetition { min: 0, max, greedy, .. } => {
+                // When 'max=1', we can retain exactness, since 'a?' is
+                // equivalent to 'a|'. Similarly below, 'a??' is equivalent to
+                // '|a'.
+                if max != Some(1) {
+                    subseq.make_inexact();
+                }
+                let mut empty = Seq::singleton(Literal::exact(vec![]));
+                if !greedy {
+                    mem::swap(&mut subseq, &mut empty);
+                }
+                self.union(subseq, &mut empty)
+            }
+            hir::Repetition { min, max: Some(max), .. } if min == max => {
+                assert!(min > 0); // handled above
+                let limit =
+                    u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
+                let mut seq = Seq::singleton(Literal::exact(vec![]));
+                for _ in 0..cmp::min(min, limit) {
+                    if seq.is_inexact() {
+                        break;
+                    }
+                    seq = self.cross(seq, &mut subseq.clone());
+                }
+                if usize::try_from(min).is_err() || min > limit {
+                    seq.make_inexact();
+                }
+                seq
+            }
+            hir::Repetition { min, .. } => {
+                assert!(min > 0); // handled above
+                let limit =
+                    u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
+                let mut seq = Seq::singleton(Literal::exact(vec![]));
+                for _ in 0..cmp::min(min, limit) {
+                    if seq.is_inexact() {
+                        break;
+                    }
+                    seq = self.cross(seq, &mut subseq.clone());
+                }
+                seq.make_inexact();
+                seq
+            }
+        }
+    }
+
+    /// Convert the given Unicode class into a sequence of literals if the
+    /// class is small enough. If the class is too big, return an infinite
+    /// sequence.
+    fn extract_class_unicode(&self, cls: &hir::ClassUnicode) -> Seq {
+        if self.class_over_limit_unicode(cls) {
+            return Seq::infinite();
+        }
+        let mut seq = Seq::empty();
+        for r in cls.iter() {
+            for ch in r.start()..=r.end() {
+                seq.push(Literal::from(ch));
+            }
+        }
+        self.enforce_literal_len(&mut seq);
+        seq
+    }
+
+    /// Convert the given byte class into a sequence of literals if the class
+    /// is small enough. If the class is too big, return an infinite sequence.
+    fn extract_class_bytes(&self, cls: &hir::ClassBytes) -> Seq {
+        if self.class_over_limit_bytes(cls) {
+            return Seq::infinite();
+        }
+        let mut seq = Seq::empty();
+        for r in cls.iter() {
+            for b in r.start()..=r.end() {
+                seq.push(Literal::from(b));
+            }
+        }
+        self.enforce_literal_len(&mut seq);
+        seq
+    }
+
+    /// Returns true if the given Unicode class exceeds the configured limits
+    /// on this extractor.
+    fn class_over_limit_unicode(&self, cls: &hir::ClassUnicode) -> bool {
+        let mut count = 0;
+        for r in cls.iter() {
+            if count > self.limit_class {
+                return true;
+            }
+            count += r.len();
+        }
+        count > self.limit_class
+    }
+
+    /// Returns true if the given byte class exceeds the configured limits on
+    /// this extractor.
+    fn class_over_limit_bytes(&self, cls: &hir::ClassBytes) -> bool {
+        let mut count = 0;
+        for r in cls.iter() {
+            if count > self.limit_class {
+                return true;
+            }
+            count += r.len();
+        }
+        count > self.limit_class
+    }
+
+    /// Compute the cross product of the two sequences if the result would be
+    /// within configured limits. Otherwise, make `seq2` infinite and cross the
+    /// infinite sequence with `seq1`.
+    fn cross(&self, mut seq1: Seq, seq2: &mut Seq) -> Seq {
+        if seq1.max_cross_len(seq2).map_or(false, |len| len > self.limit_total)
+        {
+            seq2.make_infinite();
+        }
+        if let ExtractKind::Suffix = self.kind {
+            seq1.cross_reverse(seq2);
+        } else {
+            seq1.cross_forward(seq2);
+        }
+        assert!(seq1.len().map_or(true, |x| x <= self.limit_total));
+        self.enforce_literal_len(&mut seq1);
+        seq1
+    }
+
+    /// Union the two sequences if the result would be within configured
+    /// limits. Otherwise, make `seq2` infinite and union the infinite sequence
+    /// with `seq1`.
+    fn union(&self, mut seq1: Seq, seq2: &mut Seq) -> Seq {
+        if seq1.max_union_len(seq2).map_or(false, |len| len > self.limit_total)
+        {
+            // We try to trim our literal sequences to see if we can make
+            // room for more literals. The idea is that we'd rather trim down
+            // literals already in our sequence if it means we can add a few
+            // more and retain a finite sequence. Otherwise, we'll union with
+            // an infinite sequence and that infects everything and effectively
+            // stops literal extraction in its tracks.
+            //
+            // We do we keep 4 bytes here? Well, it's a bit of an abstraction
+            // leakage. Downstream, the literals may wind up getting fed to
+            // the Teddy algorithm, which supports searching literals up to
+            // length 4. So that's why we pick that number here. Arguably this
+            // should be a tuneable parameter, but it seems a little tricky to
+            // describe. And I'm still unsure if this is the right way to go
+            // about culling literal sequences.
+            match self.kind {
+                ExtractKind::Prefix => {
+                    seq1.keep_first_bytes(4);
+                    seq2.keep_first_bytes(4);
+                }
+                ExtractKind::Suffix => {
+                    seq1.keep_last_bytes(4);
+                    seq2.keep_last_bytes(4);
+                }
+            }
+            seq1.dedup();
+            seq2.dedup();
+            if seq1
+                .max_union_len(seq2)
+                .map_or(false, |len| len > self.limit_total)
+            {
+                seq2.make_infinite();
+            }
+        }
+        seq1.union(seq2);
+        assert!(seq1.len().map_or(true, |x| x <= self.limit_total));
+        seq1
+    }
+
+    /// Applies the literal length limit to the given sequence. If none of the
+    /// literals in the sequence exceed the limit, then this is a no-op.
+    fn enforce_literal_len(&self, seq: &mut Seq) {
+        let len = self.limit_literal_len;
+        match self.kind {
+            ExtractKind::Prefix => seq.keep_first_bytes(len),
+            ExtractKind::Suffix => seq.keep_last_bytes(len),
+        }
+    }
+}
+
+impl Default for Extractor {
+    fn default() -> Extractor {
+        Extractor::new()
+    }
+}
+
+/// The kind of literals to extract from an [`Hir`] expression.
+///
+/// The default extraction kind is `Prefix`.
+#[non_exhaustive]
+#[derive(Clone, Debug)]
+pub enum ExtractKind {
+    /// Extracts only prefix literals from a regex.
+    Prefix,
+    /// Extracts only suffix literals from a regex.
+    ///
+    /// Note that the sequence returned by suffix literals currently may
+    /// not correctly represent leftmost-first or "preference" order match
+    /// semantics.
+    Suffix,
+}
+
+impl ExtractKind {
+    /// Returns true if this kind is the `Prefix` variant.
+    pub fn is_prefix(&self) -> bool {
+        matches!(*self, ExtractKind::Prefix)
+    }
+
+    /// Returns true if this kind is the `Suffix` variant.
+    pub fn is_suffix(&self) -> bool {
+        matches!(*self, ExtractKind::Suffix)
+    }
+}
+
+impl Default for ExtractKind {
+    fn default() -> ExtractKind {
+        ExtractKind::Prefix
+    }
+}
+
+/// A sequence of literals.
+///
+/// A `Seq` is very much like a set in that it represents a union of its
+/// members. That is, it corresponds to a set of literals where at least one
+/// must match in order for a particular [`Hir`] expression to match. (Whether
+/// this corresponds to the entire `Hir` expression, a prefix of it or a suffix
+/// of it depends on how the `Seq` was extracted from the `Hir`.)
+///
+/// It is also unlike a set in that multiple identical literals may appear,
+/// and that the order of the literals in the `Seq` matters. For example, if
+/// the sequence is `[sam, samwise]` and leftmost-first matching is used, then
+/// `samwise` can never match and the sequence is equivalent to `[sam]`.
+///
+/// # States of a sequence
+///
+/// A `Seq` has a few different logical states to consider:
+///
+/// * The sequence can represent "any" literal. When this happens, the set does
+/// not have a finite size. The purpose of this state is to inhibit callers
+/// from making assumptions about what literals are required in order to match
+/// a particular [`Hir`] expression. Generally speaking, when a set is in this
+/// state, literal optimizations are inhibited. A good example of a regex that
+/// will cause this sort of set to appear is `[A-Za-z]`. The character class
+/// is just too big (and also too narrow) to be usefully expanded into 52
+/// different literals. (Note that the decision for when a seq should become
+/// infinite is determined by the caller. A seq itself has no hard-coded
+/// limits.)
+/// * The sequence can be empty, in which case, it is an affirmative statement
+/// that there are no literals that can match the corresponding `Hir`.
+/// Consequently, the `Hir` never matches any input. For example, `[a&&b]`.
+/// * The sequence can be non-empty, in which case, at least one of the
+/// literals must match in order for the corresponding `Hir` to match.
+///
+/// # Example
+///
+/// This example shows how literal sequences can be simplified by stripping
+/// suffixes and minimizing while maintaining preference order.
+///
+/// ```
+/// use regex_syntax::hir::literal::{Literal, Seq};
+///
+/// let mut seq = Seq::new(&[
+///     "farm",
+///     "appliance",
+///     "faraway",
+///     "apple",
+///     "fare",
+///     "gap",
+///     "applicant",
+///     "applaud",
+/// ]);
+/// seq.keep_first_bytes(3);
+/// seq.minimize_by_preference();
+/// // Notice that 'far' comes before 'app', which matches the order in the
+/// // original sequence. This guarantees that leftmost-first semantics are
+/// // not altered by simplifying the set.
+/// let expected = Seq::from_iter([
+///     Literal::inexact("far"),
+///     Literal::inexact("app"),
+///     Literal::exact("gap"),
+/// ]);
+/// assert_eq!(expected, seq);
+/// ```
+#[derive(Clone, Eq, PartialEq)]
+pub struct Seq {
+    /// The members of this seq.
+    ///
+    /// When `None`, the seq represents all possible literals. That is, it
+    /// prevents one from making assumptions about specific literals in the
+    /// seq, and forces one to treat it as if any literal might be in the seq.
+    ///
+    /// Note that `Some(vec![])` is valid and corresponds to the empty seq of
+    /// literals, i.e., a regex that can never match. For example, `[a&&b]`.
+    /// It is distinct from `Some(vec![""])`, which corresponds to the seq
+    /// containing an empty string, which matches at every position.
+    literals: Option<Vec<Literal>>,
+}
+
+impl Seq {
+    /// Returns an empty sequence.
+    ///
+    /// An empty sequence matches zero literals, and thus corresponds to a
+    /// regex that itself can never match.
+    #[inline]
+    pub fn empty() -> Seq {
+        Seq { literals: Some(vec![]) }
+    }
+
+    /// Returns a sequence of literals without a finite size and may contain
+    /// any literal.
+    ///
+    /// A sequence without finite size does not reveal anything about the
+    /// characteristics of the literals in its set. There are no fixed prefixes
+    /// or suffixes, nor are lower or upper bounds on the length of the literals
+    /// in the set known.
+    ///
+    /// This is useful to represent constructs in a regex that are "too big"
+    /// to useful represent as a sequence of literals. For example, `[A-Za-z]`.
+    /// When sequences get too big, they lose their discriminating nature and
+    /// are more likely to produce false positives, which in turn makes them
+    /// less likely to speed up searches.
+    ///
+    /// More pragmatically, for many regexes, enumerating all possible literals
+    /// is itself not possible or might otherwise use too many resources. So
+    /// constraining the size of sets during extraction is a practical trade
+    /// off to make.
+    #[inline]
+    pub fn infinite() -> Seq {
+        Seq { literals: None }
+    }
+
+    /// Returns a sequence containing a single literal.
+    #[inline]
+    pub fn singleton(lit: Literal) -> Seq {
+        Seq { literals: Some(vec![lit]) }
+    }
+
+    /// Returns a sequence of exact literals from the given byte strings.
+    #[inline]
+    pub fn new<I, B>(it: I) -> Seq
+    where
+        I: IntoIterator<Item = B>,
+        B: AsRef<[u8]>,
+    {
+        it.into_iter().map(|b| Literal::exact(b.as_ref())).collect()
+    }
+
+    /// If this is a finite sequence, return its members as a slice of
+    /// literals.
+    ///
+    /// The slice returned may be empty, in which case, there are no literals
+    /// that can match this sequence.
+    #[inline]
+    pub fn literals(&self) -> Option<&[Literal]> {
+        self.literals.as_deref()
+    }
+
+    /// Push a literal to the end of this sequence.
+    ///
+    /// If this sequence is not finite, then this is a no-op.
+    ///
+    /// Similarly, if the most recently added item of this sequence is
+    /// equivalent to the literal given, then it is not added. This reflects
+    /// a `Seq`'s "set like" behavior, and represents a practical trade off.
+    /// Namely, there is never any need to have two adjacent and equivalent
+    /// literals in the same sequence, _and_ it is easy to detect in some
+    /// cases.
+    #[inline]
+    pub fn push(&mut self, lit: Literal) {
+        let lits = match self.literals {
+            None => return,
+            Some(ref mut lits) => lits,
+        };
+        if lits.last().map_or(false, |m| m == &lit) {
+            return;
+        }
+        lits.push(lit);
+    }
+
+    /// Make all of the literals in this sequence inexact.
+    ///
+    /// This is a no-op if this sequence is not finite.
+    #[inline]
+    pub fn make_inexact(&mut self) {
+        let lits = match self.literals {
+            None => return,
+            Some(ref mut lits) => lits,
+        };
+        for lit in lits.iter_mut() {
+            lit.make_inexact();
+        }
+    }
+
+    /// Converts this sequence to an infinite sequence.
+    ///
+    /// This is a no-op if the sequence is already infinite.
+    #[inline]
+    pub fn make_infinite(&mut self) {
+        self.literals = None;
+    }
+
+    /// Modify this sequence to contain the cross product between it and the
+    /// sequence given.
+    ///
+    /// The cross product only considers literals in this sequence that are
+    /// exact. That is, inexact literals are not extended.
+    ///
+    /// The literals are always drained from `other`, even if none are used.
+    /// This permits callers to reuse the sequence allocation elsewhere.
+    ///
+    /// If this sequence is infinite, then this is a no-op, regardless of what
+    /// `other` contains (and in this case, the literals are still drained from
+    /// `other`). If `other` is infinite and this sequence is finite, then this
+    /// is a no-op, unless this sequence contains a zero-length literal. In
+    /// which case, the infiniteness of `other` infects this sequence, and this
+    /// sequence is itself made infinite.
+    ///
+    /// Like [`Seq::union`], this may attempt to deduplicate literals. See
+    /// [`Seq::dedup`] for how deduplication deals with exact and inexact
+    /// literals.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage and how exact and inexact literals
+    /// interact.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::from_iter([
+    ///     Literal::inexact("quux"),
+    ///     Literal::exact("baz"),
+    /// ]);
+    /// seq1.cross_forward(&mut seq2);
+    ///
+    /// // The literals are pulled out of seq2.
+    /// assert_eq!(Some(0), seq2.len());
+    ///
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("fooquux"),
+    ///     Literal::exact("foobaz"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// assert_eq!(expected, seq1);
+    /// ```
+    ///
+    /// This example shows the behavior of when `other` is an infinite
+    /// sequence.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::infinite();
+    /// seq1.cross_forward(&mut seq2);
+    ///
+    /// // When seq2 is infinite, cross product doesn't add anything, but
+    /// // ensures all members of seq1 are inexact.
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// assert_eq!(expected, seq1);
+    /// ```
+    ///
+    /// This example is like the one above, but shows what happens when this
+    /// sequence contains an empty string. In this case, an infinite `other`
+    /// sequence infects this sequence (because the empty string means that
+    /// there are no finite prefixes):
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::exact(""), // inexact provokes same behavior
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::infinite();
+    /// seq1.cross_forward(&mut seq2);
+    ///
+    /// // seq1 is now infinite!
+    /// assert!(!seq1.is_finite());
+    /// ```
+    ///
+    /// This example shows the behavior of this sequence is infinite.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::infinite();
+    /// let mut seq2 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// seq1.cross_forward(&mut seq2);
+    ///
+    /// // seq1 remains unchanged.
+    /// assert!(!seq1.is_finite());
+    /// // Even though the literals in seq2 weren't used, it was still drained.
+    /// assert_eq!(Some(0), seq2.len());
+    /// ```
+    #[inline]
+    pub fn cross_forward(&mut self, other: &mut Seq) {
+        let (lits1, lits2) = match self.cross_preamble(other) {
+            None => return,
+            Some((lits1, lits2)) => (lits1, lits2),
+        };
+        let newcap = lits1.len().saturating_mul(lits2.len());
+        for selflit in mem::replace(lits1, Vec::with_capacity(newcap)) {
+            if !selflit.is_exact() {
+                lits1.push(selflit);
+                continue;
+            }
+            for otherlit in lits2.iter() {
+                let mut newlit = Literal::exact(Vec::with_capacity(
+                    selflit.len() + otherlit.len(),
+                ));
+                newlit.extend(&selflit);
+                newlit.extend(&otherlit);
+                if !otherlit.is_exact() {
+                    newlit.make_inexact();
+                }
+                lits1.push(newlit);
+            }
+        }
+        lits2.drain(..);
+        self.dedup();
+    }
+
+    /// Modify this sequence to contain the cross product between it and
+    /// the sequence given, where the sequences are treated as suffixes
+    /// instead of prefixes. Namely, the sequence `other` is *prepended*
+    /// to `self` (as opposed to `other` being *appended* to `self` in
+    /// [`Seq::cross_forward`]).
+    ///
+    /// The cross product only considers literals in this sequence that are
+    /// exact. That is, inexact literals are not extended.
+    ///
+    /// The literals are always drained from `other`, even if none are used.
+    /// This permits callers to reuse the sequence allocation elsewhere.
+    ///
+    /// If this sequence is infinite, then this is a no-op, regardless of what
+    /// `other` contains (and in this case, the literals are still drained from
+    /// `other`). If `other` is infinite and this sequence is finite, then this
+    /// is a no-op, unless this sequence contains a zero-length literal. In
+    /// which case, the infiniteness of `other` infects this sequence, and this
+    /// sequence is itself made infinite.
+    ///
+    /// Like [`Seq::union`], this may attempt to deduplicate literals. See
+    /// [`Seq::dedup`] for how deduplication deals with exact and inexact
+    /// literals.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage and how exact and inexact literals
+    /// interact.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::from_iter([
+    ///     Literal::inexact("quux"),
+    ///     Literal::exact("baz"),
+    /// ]);
+    /// seq1.cross_reverse(&mut seq2);
+    ///
+    /// // The literals are pulled out of seq2.
+    /// assert_eq!(Some(0), seq2.len());
+    ///
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("quuxfoo"),
+    ///     Literal::inexact("bar"),
+    ///     Literal::exact("bazfoo"),
+    /// ]);
+    /// assert_eq!(expected, seq1);
+    /// ```
+    ///
+    /// This example shows the behavior of when `other` is an infinite
+    /// sequence.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::infinite();
+    /// seq1.cross_reverse(&mut seq2);
+    ///
+    /// // When seq2 is infinite, cross product doesn't add anything, but
+    /// // ensures all members of seq1 are inexact.
+    /// let expected = Seq::from_iter([
+    ///     Literal::inexact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// assert_eq!(expected, seq1);
+    /// ```
+    ///
+    /// This example is like the one above, but shows what happens when this
+    /// sequence contains an empty string. In this case, an infinite `other`
+    /// sequence infects this sequence (because the empty string means that
+    /// there are no finite suffixes):
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::exact(""), // inexact provokes same behavior
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// let mut seq2 = Seq::infinite();
+    /// seq1.cross_reverse(&mut seq2);
+    ///
+    /// // seq1 is now infinite!
+    /// assert!(!seq1.is_finite());
+    /// ```
+    ///
+    /// This example shows the behavior when this sequence is infinite.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq1 = Seq::infinite();
+    /// let mut seq2 = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("bar"),
+    /// ]);
+    /// seq1.cross_reverse(&mut seq2);
+    ///
+    /// // seq1 remains unchanged.
+    /// assert!(!seq1.is_finite());
+    /// // Even though the literals in seq2 weren't used, it was still drained.
+    /// assert_eq!(Some(0), seq2.len());
+    /// ```
+    #[inline]
+    pub fn cross_reverse(&mut self, other: &mut Seq) {
+        let (lits1, lits2) = match self.cross_preamble(other) {
+            None => return,
+            Some((lits1, lits2)) => (lits1, lits2),
+        };
+        // We basically proceed as we do in 'cross_forward' at this point,
+        // except that the outer loop is now 'other' and the inner loop is now
+        // 'self'. That's because 'self' corresponds to suffixes and 'other'
+        // corresponds to the sequence we want to *prepend* to the suffixes.
+        let newcap = lits1.len().saturating_mul(lits2.len());
+        let selflits = mem::replace(lits1, Vec::with_capacity(newcap));
+        for (i, otherlit) in lits2.drain(..).enumerate() {
+            for selflit in selflits.iter() {
+                if !selflit.is_exact() {
+                    // If the suffix isn't exact, then we can't prepend
+                    // anything to it. However, we still want to keep it. But
+                    // we only want to keep one of them, to avoid duplication.
+                    // (The duplication is okay from a correctness perspective,
+                    // but wasteful.)
+                    if i == 0 {
+                        lits1.push(selflit.clone());
+                    }
+                    continue;
+                }
+                let mut newlit = Literal::exact(Vec::with_capacity(
+                    otherlit.len() + selflit.len(),
+                ));
+                newlit.extend(&otherlit);
+                newlit.extend(&selflit);
+                if !otherlit.is_exact() {
+                    newlit.make_inexact();
+                }
+                lits1.push(newlit);
+            }
+        }
+        self.dedup();
+    }
+
+    /// A helper function the corresponds to the subtle preamble for both
+    /// `cross_forward` and `cross_reverse`. In effect, it handles the cases
+    /// of infinite sequences for both `self` and `other`, as well as ensuring
+    /// that literals from `other` are drained even if they aren't used.
+    fn cross_preamble<'a>(
+        &'a mut self,
+        other: &'a mut Seq,
+    ) -> Option<(&'a mut Vec<Literal>, &'a mut Vec<Literal>)> {
+        let lits2 = match other.literals {
+            None => {
+                // If our current seq contains the empty string and the seq
+                // we're adding matches any literal, then it follows that the
+                // current seq must now also match any literal.
+                //
+                // Otherwise, we just have to make sure everything in this
+                // sequence is inexact.
+                if self.min_literal_len() == Some(0) {
+                    *self = Seq::infinite();
+                } else {
+                    self.make_inexact();
+                }
+                return None;
+            }
+            Some(ref mut lits) => lits,
+        };
+        let lits1 = match self.literals {
+            None => {
+                // If we aren't going to make it to the end of this routine
+                // where lits2 is drained, then we need to do it now.
+                lits2.drain(..);
+                return None;
+            }
+            Some(ref mut lits) => lits,
+        };
+        Some((lits1, lits2))
+    }
+
+    /// Unions the `other` sequence into this one.
+    ///
+    /// The literals are always drained out of the given `other` sequence,
+    /// even if they are being unioned into an infinite sequence. This permits
+    /// the caller to reuse the `other` sequence in another context.
+    ///
+    /// Some literal deduping may be performed. If any deduping happens,
+    /// any leftmost-first or "preference" order match semantics will be
+    /// preserved.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq1 = Seq::new(&["foo", "bar"]);
+    /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]);
+    /// seq1.union(&mut seq2);
+    ///
+    /// // The literals are pulled out of seq2.
+    /// assert_eq!(Some(0), seq2.len());
+    ///
+    /// // Adjacent literals are deduped, but non-adjacent literals may not be.
+    /// assert_eq!(Seq::new(&["foo", "bar", "quux", "foo"]), seq1);
+    /// ```
+    ///
+    /// This example shows that literals are drained from `other` even when
+    /// they aren't necessarily used.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq1 = Seq::infinite();
+    /// // Infinite sequences have no finite length.
+    /// assert_eq!(None, seq1.len());
+    ///
+    /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]);
+    /// seq1.union(&mut seq2);
+    ///
+    /// // seq1 is still infinite and seq2 has been drained.
+    /// assert_eq!(None, seq1.len());
+    /// assert_eq!(Some(0), seq2.len());
+    /// ```
+    #[inline]
+    pub fn union(&mut self, other: &mut Seq) {
+        let lits2 = match other.literals {
+            None => {
+                // Unioning with an infinite sequence always results in an
+                // infinite sequence.
+                self.make_infinite();
+                return;
+            }
+            Some(ref mut lits) => lits.drain(..),
+        };
+        let lits1 = match self.literals {
+            None => return,
+            Some(ref mut lits) => lits,
+        };
+        lits1.extend(lits2);
+        self.dedup();
+    }
+
+    /// Unions the `other` sequence into this one by splice the `other`
+    /// sequence at the position of the first zero-length literal.
+    ///
+    /// This is useful for preserving preference order semantics when combining
+    /// two literal sequences. For example, in the regex `(a||f)+foo`, the
+    /// correct preference order prefix sequence is `[a, foo, f]`.
+    ///
+    /// The literals are always drained out of the given `other` sequence,
+    /// even if they are being unioned into an infinite sequence. This permits
+    /// the caller to reuse the `other` sequence in another context. Note that
+    /// the literals are drained even if no union is performed as well, i.e.,
+    /// when this sequence does not contain a zero-length literal.
+    ///
+    /// Some literal deduping may be performed. If any deduping happens,
+    /// any leftmost-first or "preference" order match semantics will be
+    /// preserved.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq1 = Seq::new(&["a", "", "f", ""]);
+    /// let mut seq2 = Seq::new(&["foo"]);
+    /// seq1.union_into_empty(&mut seq2);
+    ///
+    /// // The literals are pulled out of seq2.
+    /// assert_eq!(Some(0), seq2.len());
+    /// // 'foo' gets spliced into seq1 where the first empty string occurs.
+    /// assert_eq!(Seq::new(&["a", "foo", "f"]), seq1);
+    /// ```
+    ///
+    /// This example shows that literals are drained from `other` even when
+    /// they aren't necessarily used.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq1 = Seq::new(&["foo", "bar"]);
+    /// let mut seq2 = Seq::new(&["bar", "quux", "foo"]);
+    /// seq1.union_into_empty(&mut seq2);
+    ///
+    /// // seq1 has no zero length literals, so no splicing happens.
+    /// assert_eq!(Seq::new(&["foo", "bar"]), seq1);
+    /// // Even though no splicing happens, seq2 is still drained.
+    /// assert_eq!(Some(0), seq2.len());
+    /// ```
+    #[inline]
+    pub fn union_into_empty(&mut self, other: &mut Seq) {
+        let lits2 = other.literals.as_mut().map(|lits| lits.drain(..));
+        let lits1 = match self.literals {
+            None => return,
+            Some(ref mut lits) => lits,
+        };
+        let first_empty = match lits1.iter().position(|m| m.is_empty()) {
+            None => return,
+            Some(i) => i,
+        };
+        let lits2 = match lits2 {
+            None => {
+                // Note that we are only here if we've found an empty literal,
+                // which implies that an infinite sequence infects this seq and
+                // also turns it into an infinite sequence.
+                self.literals = None;
+                return;
+            }
+            Some(lits) => lits,
+        };
+        // Clearing out the empties needs to come before the splice because
+        // the splice might add more empties that we don't want to get rid
+        // of. Since we're splicing into the position of the first empty, the
+        // 'first_empty' position computed above is still correct.
+        lits1.retain(|m| !m.is_empty());
+        lits1.splice(first_empty..first_empty, lits2);
+        self.dedup();
+    }
+
+    /// Deduplicate adjacent equivalent literals in this sequence.
+    ///
+    /// If adjacent literals are equivalent strings but one is exact and the
+    /// other inexact, the inexact literal is kept and the exact one is
+    /// removed.
+    ///
+    /// Deduping an infinite sequence is a no-op.
+    ///
+    /// # Example
+    ///
+    /// This example shows how literals that are duplicate byte strings but
+    /// are not equivalent with respect to exactness are resolved.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::inexact("foo"),
+    /// ]);
+    /// seq.dedup();
+    ///
+    /// assert_eq!(Seq::from_iter([Literal::inexact("foo")]), seq);
+    /// ```
+    #[inline]
+    pub fn dedup(&mut self) {
+        if let Some(ref mut lits) = self.literals {
+            lits.dedup_by(|lit1, lit2| {
+                if lit1.as_bytes() != lit2.as_bytes() {
+                    return false;
+                }
+                if lit1.is_exact() != lit2.is_exact() {
+                    lit1.make_inexact();
+                    lit2.make_inexact();
+                }
+                true
+            });
+        }
+    }
+
+    /// Sorts this sequence of literals lexicographically.
+    ///
+    /// Note that if, before sorting, if a literal that is a prefix of another
+    /// literal appears after it, then after sorting, the sequence will not
+    /// represent the same preference order match semantics. For example,
+    /// sorting the sequence `[samwise, sam]` yields the sequence `[sam,
+    /// samwise]`. Under preference order semantics, the latter sequence will
+    /// never match `samwise` where as the first sequence can.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq = Seq::new(&["foo", "quux", "bar"]);
+    /// seq.sort();
+    ///
+    /// assert_eq!(Seq::new(&["bar", "foo", "quux"]), seq);
+    /// ```
+    #[inline]
+    pub fn sort(&mut self) {
+        if let Some(ref mut lits) = self.literals {
+            lits.sort();
+        }
+    }
+
+    /// Reverses all of the literals in this sequence.
+    ///
+    /// The order of the sequence itself is preserved.
+    ///
+    /// # Example
+    ///
+    /// This example shows basic usage.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let mut seq = Seq::new(&["oof", "rab"]);
+    /// seq.reverse_literals();
+    /// assert_eq!(Seq::new(&["foo", "bar"]), seq);
+    /// ```
+    #[inline]
+    pub fn reverse_literals(&mut self) {
+        if let Some(ref mut lits) = self.literals {
+            for lit in lits.iter_mut() {
+                lit.reverse();
+            }
+        }
+    }
+
+    /// Shrinks this seq to its minimal size while respecting the preference
+    /// order of its literals.
+    ///
+    /// While this routine will remove duplicate literals from this seq, it
+    /// will also remove literals that can never match in a leftmost-first or
+    /// "preference order" search. Similar to [`Seq::dedup`], if a literal is
+    /// deduped, then the one that remains is made inexact.
+    ///
+    /// This is a no-op on seqs that are empty or not finite.
+    ///
+    /// # Example
+    ///
+    /// This example shows the difference between `{sam, samwise}` and
+    /// `{samwise, sam}`.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// // If 'sam' comes before 'samwise' and a preference order search is
+    /// // executed, then 'samwise' can never match.
+    /// let mut seq = Seq::new(&["sam", "samwise"]);
+    /// seq.minimize_by_preference();
+    /// assert_eq!(Seq::from_iter([Literal::inexact("sam")]), seq);
+    ///
+    /// // But if they are reversed, then it's possible for 'samwise' to match
+    /// // since it is given higher preference.
+    /// let mut seq = Seq::new(&["samwise", "sam"]);
+    /// seq.minimize_by_preference();
+    /// assert_eq!(Seq::new(&["samwise", "sam"]), seq);
+    /// ```
+    ///
+    /// This example shows that if an empty string is in this seq, then
+    /// anything that comes after it can never match.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// // An empty string is a prefix of all strings, so it automatically
+    /// // inhibits any subsequent strings from matching.
+    /// let mut seq = Seq::new(&["foo", "bar", "", "quux", "fox"]);
+    /// seq.minimize_by_preference();
+    /// let expected = Seq::from_iter([
+    ///     Literal::exact("foo"),
+    ///     Literal::exact("bar"),
+    ///     Literal::inexact(""),
+    /// ]);
+    /// assert_eq!(expected, seq);
+    ///
+    /// // And of course, if it's at the beginning, then it makes it impossible
+    /// // for anything else to match.
+    /// let mut seq = Seq::new(&["", "foo", "quux", "fox"]);
+    /// seq.minimize_by_preference();
+    /// assert_eq!(Seq::from_iter([Literal::inexact("")]), seq);
+    /// ```
+    #[inline]
+    pub fn minimize_by_preference(&mut self) {
+        if let Some(ref mut lits) = self.literals {
+            PreferenceTrie::minimize(lits, false);
+        }
+    }
+
+    /// Trims all literals in this seq such that only the first `len` bytes
+    /// remain. If a literal has less than or equal to `len` bytes, then it
+    /// remains unchanged. Otherwise, it is trimmed and made inexact.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq = Seq::new(&["a", "foo", "quux"]);
+    /// seq.keep_first_bytes(2);
+    ///
+    /// let expected = Seq::from_iter([
+    ///     Literal::exact("a"),
+    ///     Literal::inexact("fo"),
+    ///     Literal::inexact("qu"),
+    /// ]);
+    /// assert_eq!(expected, seq);
+    /// ```
+    #[inline]
+    pub fn keep_first_bytes(&mut self, len: usize) {
+        if let Some(ref mut lits) = self.literals {
+            for m in lits.iter_mut() {
+                m.keep_first_bytes(len);
+            }
+        }
+    }
+
+    /// Trims all literals in this seq such that only the last `len` bytes
+    /// remain. If a literal has less than or equal to `len` bytes, then it
+    /// remains unchanged. Otherwise, it is trimmed and made inexact.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Literal, Seq};
+    ///
+    /// let mut seq = Seq::new(&["a", "foo", "quux"]);
+    /// seq.keep_last_bytes(2);
+    ///
+    /// let expected = Seq::from_iter([
+    ///     Literal::exact("a"),
+    ///     Literal::inexact("oo"),
+    ///     Literal::inexact("ux"),
+    /// ]);
+    /// assert_eq!(expected, seq);
+    /// ```
+    #[inline]
+    pub fn keep_last_bytes(&mut self, len: usize) {
+        if let Some(ref mut lits) = self.literals {
+            for m in lits.iter_mut() {
+                m.keep_last_bytes(len);
+            }
+        }
+    }
+
+    /// Returns true if this sequence is finite.
+    ///
+    /// When false, this sequence is infinite and must be treated as if it
+    /// contains every possible literal.
+    #[inline]
+    pub fn is_finite(&self) -> bool {
+        self.literals.is_some()
+    }
+
+    /// Returns true if and only if this sequence is finite and empty.
+    ///
+    /// An empty sequence never matches anything. It can only be produced by
+    /// literal extraction when the corresponding regex itself cannot match.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len() == Some(0)
+    }
+
+    /// Returns the number of literals in this sequence if the sequence is
+    /// finite. If the sequence is infinite, then `None` is returned.
+    #[inline]
+    pub fn len(&self) -> Option<usize> {
+        self.literals.as_ref().map(|lits| lits.len())
+    }
+
+    /// Returns true if and only if all literals in this sequence are exact.
+    ///
+    /// This returns false if the sequence is infinite.
+    #[inline]
+    pub fn is_exact(&self) -> bool {
+        self.literals().map_or(false, |lits| lits.iter().all(|x| x.is_exact()))
+    }
+
+    /// Returns true if and only if all literals in this sequence are inexact.
+    ///
+    /// This returns true if the sequence is infinite.
+    #[inline]
+    pub fn is_inexact(&self) -> bool {
+        self.literals().map_or(true, |lits| lits.iter().all(|x| !x.is_exact()))
+    }
+
+    /// Return the maximum length of the sequence that would result from
+    /// unioning `self` with `other`. If either set is infinite, then this
+    /// returns `None`.
+    #[inline]
+    pub fn max_union_len(&self, other: &Seq) -> Option<usize> {
+        let len1 = self.len()?;
+        let len2 = other.len()?;
+        Some(len1.saturating_add(len2))
+    }
+
+    /// Return the maximum length of the sequence that would result from the
+    /// cross product of `self` with `other`. If either set is infinite, then
+    /// this returns `None`.
+    #[inline]
+    pub fn max_cross_len(&self, other: &Seq) -> Option<usize> {
+        let len1 = self.len()?;
+        let len2 = other.len()?;
+        Some(len1.saturating_mul(len2))
+    }
+
+    /// Returns the length of the shortest literal in this sequence.
+    ///
+    /// If the sequence is infinite or empty, then this returns `None`.
+    #[inline]
+    pub fn min_literal_len(&self) -> Option<usize> {
+        self.literals.as_ref()?.iter().map(|x| x.len()).min()
+    }
+
+    /// Returns the length of the longest literal in this sequence.
+    ///
+    /// If the sequence is infinite or empty, then this returns `None`.
+    #[inline]
+    pub fn max_literal_len(&self) -> Option<usize> {
+        self.literals.as_ref()?.iter().map(|x| x.len()).max()
+    }
+
+    /// Returns the longest common prefix from this seq.
+    ///
+    /// If the seq matches any literal or other contains no literals, then
+    /// there is no meaningful prefix and this returns `None`.
+    ///
+    /// # Example
+    ///
+    /// This shows some example seqs and their longest common prefix.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let seq = Seq::new(&["foo", "foobar", "fo"]);
+    /// assert_eq!(Some(&b"fo"[..]), seq.longest_common_prefix());
+    /// let seq = Seq::new(&["foo", "foo"]);
+    /// assert_eq!(Some(&b"foo"[..]), seq.longest_common_prefix());
+    /// let seq = Seq::new(&["foo", "bar"]);
+    /// assert_eq!(Some(&b""[..]), seq.longest_common_prefix());
+    /// let seq = Seq::new(&[""]);
+    /// assert_eq!(Some(&b""[..]), seq.longest_common_prefix());
+    ///
+    /// let seq = Seq::infinite();
+    /// assert_eq!(None, seq.longest_common_prefix());
+    /// let seq = Seq::empty();
+    /// assert_eq!(None, seq.longest_common_prefix());
+    /// ```
+    #[inline]
+    pub fn longest_common_prefix(&self) -> Option<&[u8]> {
+        // If we match everything or match nothing, then there's no meaningful
+        // longest common prefix.
+        let lits = match self.literals {
+            None => return None,
+            Some(ref lits) => lits,
+        };
+        if lits.len() == 0 {
+            return None;
+        }
+        let base = lits[0].as_bytes();
+        let mut len = base.len();
+        for m in lits.iter().skip(1) {
+            len = m
+                .as_bytes()
+                .iter()
+                .zip(base[..len].iter())
+                .take_while(|&(a, b)| a == b)
+                .count();
+            if len == 0 {
+                return Some(&[]);
+            }
+        }
+        Some(&base[..len])
+    }
+
+    /// Returns the longest common suffix from this seq.
+    ///
+    /// If the seq matches any literal or other contains no literals, then
+    /// there is no meaningful suffix and this returns `None`.
+    ///
+    /// # Example
+    ///
+    /// This shows some example seqs and their longest common suffix.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::Seq;
+    ///
+    /// let seq = Seq::new(&["oof", "raboof", "of"]);
+    /// assert_eq!(Some(&b"of"[..]), seq.longest_common_suffix());
+    /// let seq = Seq::new(&["foo", "foo"]);
+    /// assert_eq!(Some(&b"foo"[..]), seq.longest_common_suffix());
+    /// let seq = Seq::new(&["foo", "bar"]);
+    /// assert_eq!(Some(&b""[..]), seq.longest_common_suffix());
+    /// let seq = Seq::new(&[""]);
+    /// assert_eq!(Some(&b""[..]), seq.longest_common_suffix());
+    ///
+    /// let seq = Seq::infinite();
+    /// assert_eq!(None, seq.longest_common_suffix());
+    /// let seq = Seq::empty();
+    /// assert_eq!(None, seq.longest_common_suffix());
+    /// ```
+    #[inline]
+    pub fn longest_common_suffix(&self) -> Option<&[u8]> {
+        // If we match everything or match nothing, then there's no meaningful
+        // longest common suffix.
+        let lits = match self.literals {
+            None => return None,
+            Some(ref lits) => lits,
+        };
+        if lits.len() == 0 {
+            return None;
+        }
+        let base = lits[0].as_bytes();
+        let mut len = base.len();
+        for m in lits.iter().skip(1) {
+            len = m
+                .as_bytes()
+                .iter()
+                .rev()
+                .zip(base[base.len() - len..].iter().rev())
+                .take_while(|&(a, b)| a == b)
+                .count();
+            if len == 0 {
+                return Some(&[]);
+            }
+        }
+        Some(&base[base.len() - len..])
+    }
+
+    /// Optimizes this seq while treating its literals as prefixes and
+    /// respecting the preference order of its literals.
+    ///
+    /// The specific way "optimization" works is meant to be an implementation
+    /// detail, as it essentially represents a set of heuristics. The goal
+    /// that optimization tries to accomplish is to make the literals in this
+    /// set reflect inputs that will result in a more effective prefilter.
+    /// Principally by reducing the false positive rate of candidates found by
+    /// the literals in this sequence. That is, when a match of a literal is
+    /// found, we would like it to be a strong predictor of the overall match
+    /// of the regex. If it isn't, then much time will be spent starting and
+    /// stopping the prefilter search and attempting to confirm the match only
+    /// to have it fail.
+    ///
+    /// Some of those heuristics might be:
+    ///
+    /// * Identifying a common prefix from a larger sequence of literals, and
+    /// shrinking the sequence down to that single common prefix.
+    /// * Rejecting the sequence entirely if it is believed to result in very
+    /// high false positive rate. When this happens, the sequence is made
+    /// infinite.
+    /// * Shrinking the sequence to a smaller number of literals representing
+    /// prefixes, but not shrinking it so much as to make literals too short.
+    /// (A sequence with very short literals, of 1 or 2 bytes, will typically
+    /// result in a higher false positive rate.)
+    ///
+    /// Optimization should only be run once extraction is complete. Namely,
+    /// optimization may make assumptions that do not compose with other
+    /// operations in the middle of extraction. For example, optimization will
+    /// reduce `[E(sam), E(samwise)]` to `[E(sam)]`, but such a transformation
+    /// is only valid if no other extraction will occur. If other extraction
+    /// may occur, then the correct transformation would be to `[I(sam)]`.
+    ///
+    /// The [`Seq::optimize_for_suffix_by_preference`] does the same thing, but
+    /// for suffixes.
+    ///
+    /// # Example
+    ///
+    /// This shows how optimization might transform a sequence. Note that
+    /// the specific behavior is not a documented guarantee. The heuristics
+    /// used are an implementation detail and may change over time in semver
+    /// compatible releases.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Seq, Literal};
+    ///
+    /// let mut seq = Seq::new(&[
+    ///     "samantha",
+    ///     "sam",
+    ///     "samwise",
+    ///     "frodo",
+    /// ]);
+    /// seq.optimize_for_prefix_by_preference();
+    /// assert_eq!(Seq::from_iter([
+    ///     Literal::exact("samantha"),
+    ///     // Kept exact even though 'samwise' got pruned
+    ///     // because optimization assumes literal extraction
+    ///     // has finished.
+    ///     Literal::exact("sam"),
+    ///     Literal::exact("frodo"),
+    /// ]), seq);
+    /// ```
+    ///
+    /// # Example: optimization may make the sequence infinite
+    ///
+    /// If the heuristics deem that the sequence could cause a very high false
+    /// positive rate, then it may make the sequence infinite, effectively
+    /// disabling its use as a prefilter.
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Seq, Literal};
+    ///
+    /// let mut seq = Seq::new(&[
+    ///     "samantha",
+    ///     // An empty string matches at every position,
+    ///     // thus rendering the prefilter completely
+    ///     // ineffective.
+    ///     "",
+    ///     "sam",
+    ///     "samwise",
+    ///     "frodo",
+    /// ]);
+    /// seq.optimize_for_prefix_by_preference();
+    /// assert!(!seq.is_finite());
+    /// ```
+    ///
+    /// Do note that just because there is a `" "` in the sequence, that
+    /// doesn't mean the sequence will always be made infinite after it is
+    /// optimized. Namely, if the sequence is considered exact (any match
+    /// corresponds to an overall match of the original regex), then any match
+    /// is an overall match, and so the false positive rate is always `0`.
+    ///
+    /// To demonstrate this, we remove `samwise` from our sequence. This
+    /// results in no optimization happening and all literals remain exact.
+    /// Thus the entire sequence is exact, and it is kept as-is, even though
+    /// one is an ASCII space:
+    ///
+    /// ```
+    /// use regex_syntax::hir::literal::{Seq, Literal};
+    ///
+    /// let mut seq = Seq::new(&[
+    ///     "samantha",
+    ///     " ",
+    ///     "sam",
+    ///     "frodo",
+    /// ]);
+    /// seq.optimize_for_prefix_by_preference();
+    /// assert!(seq.is_finite());
+    /// ```
+    #[inline]
+    pub fn optimize_for_prefix_by_preference(&mut self) {
+        self.optimize_by_preference(true);
+    }
+
+    /// Optimizes this seq while treating its literals as suffixes and
+    /// respecting the preference order of its literals.
+    ///
+    /// Optimization should only be run once extraction is complete.
+    ///
+    /// The [`Seq::optimize_for_prefix_by_preference`] does the same thing, but
+    /// for prefixes. See its documentation for more explanation.
+    #[inline]
+    pub fn optimize_for_suffix_by_preference(&mut self) {
+        self.optimize_by_preference(false);
+    }
+
+    fn optimize_by_preference(&mut self, prefix: bool) {
+        let origlen = match self.len() {
+            None => return,
+            Some(len) => len,
+        };
+        // Just give up now if our sequence contains an empty string.
+        if self.min_literal_len().map_or(false, |len| len == 0) {
+            // We squash the sequence so that nobody else gets any bright
+            // ideas to try and use it. An empty string implies a match at
+            // every position. A prefilter cannot help you here.
+            self.make_infinite();
+            return;
+        }
+        // Make sure we start with the smallest sequence possible. We use a
+        // special version of preference minimization that retains exactness.
+        // This is legal because optimization is only expected to occur once
+        // extraction is complete.
+        if prefix {
+            if let Some(ref mut lits) = self.literals {
+                PreferenceTrie::minimize(lits, true);
+            }
+        }
+
+        // Look for a common prefix (or suffix). If we found one of those and
+        // it's long enough, then it's a good bet that it will be our fastest
+        // possible prefilter since single-substring search is so fast.
+        let fix = if prefix {
+            self.longest_common_prefix()
+        } else {
+            self.longest_common_suffix()
+        };
+        if let Some(fix) = fix {
+            // As a special case, if we have a common prefix and the leading
+            // byte of that prefix is one that we think probably occurs rarely,
+            // then strip everything down to just that single byte. This should
+            // promote the use of memchr.
+            //
+            // ... we only do this though if our sequence has more than one
+            // literal. Otherwise, we'd rather just stick with a single literal
+            // scan. That is, using memchr is probably better than looking
+            // for 2 or more literals, but probably not as good as a straight
+            // memmem search.
+            //
+            // ... and also only do this when the prefix is short and probably
+            // not too discriminatory anyway. If it's longer, then it's
+            // probably quite discriminatory and thus is likely to have a low
+            // false positive rate.
+            if prefix
+                && origlen > 1
+                && fix.len() >= 1
+                && fix.len() <= 3
+                && rank(fix[0]) < 200
+            {
+                self.keep_first_bytes(1);
+                self.dedup();
+                return;
+            }
+            // We only strip down to the common prefix/suffix if we think
+            // the existing set of literals isn't great, or if the common
+            // prefix/suffix is expected to be particularly discriminatory.
+            let isfast =
+                self.is_exact() && self.len().map_or(false, |len| len <= 16);
+            let usefix = fix.len() > 4 || (fix.len() > 1 && !isfast);
+            if usefix {
+                // If we keep exactly the number of bytes equal to the length
+                // of the prefix (or suffix), then by the definition of a
+                // prefix, every literal in the sequence will be equivalent.
+                // Thus, 'dedup' will leave us with one literal.
+                //
+                // We do it this way to avoid an alloc, but also to make sure
+                // the exactness of literals is kept (or not).
+                if prefix {
+                    self.keep_first_bytes(fix.len());
+                } else {
+                    self.keep_last_bytes(fix.len());
+                }
+                self.dedup();
+                assert_eq!(Some(1), self.len());
+                // We still fall through here. In particular, we want our
+                // longest common prefix to be subject to the poison check.
+            }
+        }
+        // If we have an exact sequence, we *probably* just want to keep it
+        // as-is. But there are some cases where we don't. So we save a copy of
+        // the exact sequence now, and then try to do some more optimizations
+        // below. If those don't work out, we go back to this exact sequence.
+        //
+        // The specific motivation for this is that we sometimes wind up with
+        // an exact sequence with a hefty number of literals. Say, 100. If we
+        // stuck with that, it would be too big for Teddy and would result in
+        // using Aho-Corasick. Which is fine... but the lazy DFA is plenty
+        // suitable in such cases. The real issue is that we will wind up not
+        // using a fast prefilter at all. So in cases like this, even though
+        // we have an exact sequence, it would be better to try and shrink the
+        // sequence (which we do below) and use it as a prefilter that can
+        // produce false positive matches.
+        //
+        // But if the shrinking below results in a sequence that "sucks," then
+        // we don't want to use that because we already have an exact sequence
+        // in hand.
+        let exact: Option<Seq> =
+            if self.is_exact() { Some(self.clone()) } else { None };
+        // Now we attempt to shorten the sequence. The idea here is that we
+        // don't want to look for too many literals, but we want to shorten
+        // our sequence enough to improve our odds of using better algorithms
+        // downstream (such as Teddy).
+        //
+        // The pair of numbers in this list corresponds to the maximal prefix
+        // (in bytes) to keep for all literals and the length of the sequence
+        // at which to do it.
+        //
+        // So for example, the pair (3, 500) would mean, "if we have more than
+        // 500 literals in our sequence, then truncate all of our literals
+        // such that they are at most 3 bytes in length and the minimize the
+        // sequence."
+        const ATTEMPTS: [(usize, usize); 5] =
+            [(5, 10), (4, 10), (3, 64), (2, 64), (1, 10)];
+        for (keep, limit) in ATTEMPTS {
+            let len = match self.len() {
+                None => break,
+                Some(len) => len,
+            };
+            if len <= limit {
+                break;
+            }
+            if prefix {
+                self.keep_first_bytes(keep);
+            } else {
+                self.keep_last_bytes(keep);
+            }
+            if prefix {
+                if let Some(ref mut lits) = self.literals {
+                    PreferenceTrie::minimize(lits, true);
+                }
+            }
+        }
+        // Check for a poison literal. A poison literal is one that is short
+        // and is believed to have a very high match count. These poisons
+        // generally lead to a prefilter with a very high false positive rate,
+        // and thus overall worse performance.
+        //
+        // We do this last because we could have gone from a non-poisonous
+        // sequence to a poisonous one. Perhaps we should add some code to
+        // prevent such transitions in the first place, but then again, we
+        // likely only made the transition in the first place if the sequence
+        // was itself huge. And huge sequences are themselves poisonous. So...
+        if let Some(lits) = self.literals() {
+            if lits.iter().any(|lit| lit.is_poisonous()) {
+                self.make_infinite();
+            }
+        }
+        // OK, if we had an exact sequence before attempting more optimizations
+        // above and our post-optimized sequence sucks for some reason or
+        // another, then we go back to the exact sequence.
+        if let Some(exact) = exact {
+            // If optimizing resulted in dropping our literals, then certainly
+            // backup and use the exact sequence that we had.
+            if !self.is_finite() {
+                *self = exact;
+                return;
+            }
+            // If our optimized sequence contains a short literal, then it's
+            // *probably* not so great. So throw it away and revert to the
+            // exact sequence.
+            if self.min_literal_len().map_or(true, |len| len <= 2) {
+                *self = exact;
+                return;
+            }
+            // Finally, if our optimized sequence is "big" (i.e., can't use
+            // Teddy), then also don't use it and rely on the exact sequence.
+            if self.len().map_or(true, |len| len > 64) {
+                *self = exact;
+                return;
+            }
+        }
+    }
+}
+
+impl core::fmt::Debug for Seq {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "Seq")?;
+        if let Some(lits) = self.literals() {
+            f.debug_list().entries(lits.iter()).finish()
+        } else {
+            write!(f, "[∞]")
+        }
+    }
+}
+
+impl FromIterator<Literal> for Seq {
+    fn from_iter<T: IntoIterator<Item = Literal>>(it: T) -> Seq {
+        let mut seq = Seq::empty();
+        for literal in it {
+            seq.push(literal);
+        }
+        seq
+    }
+}
+
+/// A single literal extracted from an [`Hir`] expression.
+///
+/// A literal is composed of two things:
+///
+/// * A sequence of bytes. No guarantees with respect to UTF-8 are provided.
+/// In particular, even if the regex a literal is extracted from is UTF-8, the
+/// literal extracted may not be valid UTF-8. (For example, if an [`Extractor`]
+/// limit resulted in trimming a literal in a way that splits a codepoint.)
+/// * Whether the literal is "exact" or not. An "exact" literal means that it
+/// has not been trimmed, and may continue to be extended. If a literal is
+/// "exact" after visiting the entire `Hir` expression, then this implies that
+/// the literal leads to a match state. (Although it doesn't necessarily imply
+/// all occurrences of the literal correspond to a match of the regex, since
+/// literal extraction ignores look-around assertions.)
+#[derive(Clone, Eq, PartialEq, PartialOrd, Ord)]
+pub struct Literal {
+    bytes: Vec<u8>,
+    exact: bool,
+}
+
+impl Literal {
+    /// Returns a new exact literal containing the bytes given.
+    #[inline]
+    pub fn exact<B: Into<Vec<u8>>>(bytes: B) -> Literal {
+        Literal { bytes: bytes.into(), exact: true }
+    }
+
+    /// Returns a new inexact literal containing the bytes given.
+    #[inline]
+    pub fn inexact<B: Into<Vec<u8>>>(bytes: B) -> Literal {
+        Literal { bytes: bytes.into(), exact: false }
+    }
+
+    /// Returns the bytes in this literal.
+    #[inline]
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.bytes
+    }
+
+    /// Yields ownership of the bytes inside this literal.
+    ///
+    /// Note that this throws away whether the literal is "exact" or not.
+    #[inline]
+    pub fn into_bytes(self) -> Vec<u8> {
+        self.bytes
+    }
+
+    /// Returns the length of this literal in bytes.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.as_bytes().len()
+    }
+
+    /// Returns true if and only if this literal has zero bytes.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns true if and only if this literal is exact.
+    #[inline]
+    pub fn is_exact(&self) -> bool {
+        self.exact
+    }
+
+    /// Marks this literal as inexact.
+    ///
+    /// Inexact literals can never be extended. For example,
+    /// [`Seq::cross_forward`] will not extend inexact literals.
+    #[inline]
+    pub fn make_inexact(&mut self) {
+        self.exact = false;
+    }
+
+    /// Reverse the bytes in this literal.
+    #[inline]
+    pub fn reverse(&mut self) {
+        self.bytes.reverse();
+    }
+
+    /// Extend this literal with the literal given.
+    ///
+    /// If this literal is inexact, then this is a no-op.
+    #[inline]
+    pub fn extend(&mut self, lit: &Literal) {
+        if !self.is_exact() {
+            return;
+        }
+        self.bytes.extend_from_slice(&lit.bytes);
+    }
+
+    /// Trims this literal such that only the first `len` bytes remain. If
+    /// this literal has fewer than `len` bytes, then it remains unchanged.
+    /// Otherwise, the literal is marked as inexact.
+    #[inline]
+    pub fn keep_first_bytes(&mut self, len: usize) {
+        if len >= self.len() {
+            return;
+        }
+        self.make_inexact();
+        self.bytes.truncate(len);
+    }
+
+    /// Trims this literal such that only the last `len` bytes remain. If this
+    /// literal has fewer than `len` bytes, then it remains unchanged.
+    /// Otherwise, the literal is marked as inexact.
+    #[inline]
+    pub fn keep_last_bytes(&mut self, len: usize) {
+        if len >= self.len() {
+            return;
+        }
+        self.make_inexact();
+        self.bytes.drain(..self.len() - len);
+    }
+
+    /// Returns true if it is believe that this literal is likely to match very
+    /// frequently, and is thus not a good candidate for a prefilter.
+    fn is_poisonous(&self) -> bool {
+        self.is_empty() || (self.len() == 1 && rank(self.as_bytes()[0]) >= 250)
+    }
+}
+
+impl From<u8> for Literal {
+    fn from(byte: u8) -> Literal {
+        Literal::exact(vec![byte])
+    }
+}
+
+impl From<char> for Literal {
+    fn from(ch: char) -> Literal {
+        use alloc::string::ToString;
+        Literal::exact(ch.encode_utf8(&mut [0; 4]).to_string())
+    }
+}
+
+impl AsRef<[u8]> for Literal {
+    fn as_ref(&self) -> &[u8] {
+        self.as_bytes()
+    }
+}
+
+impl core::fmt::Debug for Literal {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let tag = if self.exact { "E" } else { "I" };
+        f.debug_tuple(tag)
+            .field(&crate::debug::Bytes(self.as_bytes()))
+            .finish()
+    }
+}
+
+/// A "preference" trie that rejects literals that will never match when
+/// executing a leftmost first or "preference" search.
+///
+/// For example, if 'sam' is inserted, then trying to insert 'samwise' will be
+/// rejected because 'samwise' can never match since 'sam' will always take
+/// priority. However, if 'samwise' is inserted first, then inserting 'sam'
+/// after it is accepted. In this case, either 'samwise' or 'sam' can match in
+/// a "preference" search.
+///
+/// Note that we only use this trie as a "set." That is, given a sequence of
+/// literals, we insert each one in order. An `insert` will reject a literal
+/// if a prefix of that literal already exists in the trie. Thus, to rebuild
+/// the "minimal" sequence, we simply only keep literals that were successfully
+/// inserted. (Since we don't need traversal, one wonders whether we can make
+/// some simplifications here, but I haven't given it a ton of thought and I've
+/// never seen this show up on a profile. Because of the heuristic limits
+/// imposed on literal extractions, the size of the inputs here is usually
+/// very small.)
+#[derive(Debug)]
+struct PreferenceTrie {
+    /// The states in this trie. The index of a state in this vector is its ID.
+    states: Vec<State>,
+    /// This vec indicates which states are match states. It always has
+    /// the same length as `states` and is indexed by the same state ID.
+    /// A state with identifier `sid` is a match state if and only if
+    /// `matches[sid].is_some()`. The option contains the index of the literal
+    /// corresponding to the match. The index is offset by 1 so that it fits in
+    /// a NonZeroUsize.
+    matches: Vec<Option<NonZeroUsize>>,
+    /// The index to allocate to the next literal added to this trie. Starts at
+    /// 1 and increments by 1 for every literal successfully added to the trie.
+    next_literal_index: usize,
+}
+
+/// A single state in a trie. Uses a sparse representation for its transitions.
+#[derive(Debug, Default)]
+struct State {
+    /// Sparse representation of the transitions out of this state. Transitions
+    /// are sorted by byte. There is at most one such transition for any
+    /// particular byte.
+    trans: Vec<(u8, usize)>,
+}
+
+impl PreferenceTrie {
+    /// Minimizes the given sequence of literals while preserving preference
+    /// order semantics.
+    ///
+    /// When `keep_exact` is true, the exactness of every literal retained is
+    /// kept. This is useful when dealing with a fully extracted `Seq` that
+    /// only contains exact literals. In that case, we can keep all retained
+    /// literals as exact because we know we'll never need to match anything
+    /// after them and because any removed literals are guaranteed to never
+    /// match.
+    fn minimize(literals: &mut Vec<Literal>, keep_exact: bool) {
+        let mut trie = PreferenceTrie {
+            states: vec![],
+            matches: vec![],
+            next_literal_index: 1,
+        };
+        let mut make_inexact = vec![];
+        literals.retain_mut(|lit| match trie.insert(lit.as_bytes()) {
+            Ok(_) => true,
+            Err(i) => {
+                if !keep_exact {
+                    make_inexact.push(i.checked_sub(1).unwrap());
+                }
+                false
+            }
+        });
+        for i in make_inexact {
+            literals[i].make_inexact();
+        }
+    }
+
+    /// Returns `Ok` if the given byte string is accepted into this trie and
+    /// `Err` otherwise. The index for the success case corresponds to the
+    /// index of the literal added. The index for the error case corresponds to
+    /// the index of the literal already in the trie that prevented the given
+    /// byte string from being added. (Which implies it is a prefix of the one
+    /// given.)
+    ///
+    /// In short, the byte string given is accepted into the trie if and only
+    /// if it is possible for it to match when executing a preference order
+    /// search.
+    fn insert(&mut self, bytes: &[u8]) -> Result<usize, usize> {
+        let mut prev = self.root();
+        if let Some(idx) = self.matches[prev] {
+            return Err(idx.get());
+        }
+        for &b in bytes.iter() {
+            match self.states[prev].trans.binary_search_by_key(&b, |t| t.0) {
+                Ok(i) => {
+                    prev = self.states[prev].trans[i].1;
+                    if let Some(idx) = self.matches[prev] {
+                        return Err(idx.get());
+                    }
+                }
+                Err(i) => {
+                    let next = self.create_state();
+                    self.states[prev].trans.insert(i, (b, next));
+                    prev = next;
+                }
+            }
+        }
+        let idx = self.next_literal_index;
+        self.next_literal_index += 1;
+        self.matches[prev] = NonZeroUsize::new(idx);
+        Ok(idx)
+    }
+
+    /// Returns the root state ID, and if it doesn't exist, creates it.
+    fn root(&mut self) -> usize {
+        if !self.states.is_empty() {
+            0
+        } else {
+            self.create_state()
+        }
+    }
+
+    /// Creates a new empty state and returns its ID.
+    fn create_state(&mut self) -> usize {
+        let id = self.states.len();
+        self.states.push(State::default());
+        self.matches.push(None);
+        id
+    }
+}
+
+/// Returns the "rank" of the given byte.
+///
+/// The minimum rank value is `0` and the maximum rank value is `255`.
+///
+/// The rank of a byte is derived from a heuristic background distribution of
+/// relative frequencies of bytes. The heuristic says that lower the rank of a
+/// byte, the less likely that byte is to appear in any arbitrary haystack.
+pub fn rank(byte: u8) -> u8 {
+    crate::rank::BYTE_FREQUENCIES[usize::from(byte)]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn parse(pattern: &str) -> Hir {
+        crate::ParserBuilder::new().utf8(false).build().parse(pattern).unwrap()
+    }
+
+    fn prefixes(pattern: &str) -> Seq {
+        Extractor::new().kind(ExtractKind::Prefix).extract(&parse(pattern))
+    }
+
+    fn suffixes(pattern: &str) -> Seq {
+        Extractor::new().kind(ExtractKind::Suffix).extract(&parse(pattern))
+    }
+
+    fn e(pattern: &str) -> (Seq, Seq) {
+        (prefixes(pattern), suffixes(pattern))
+    }
+
+    #[allow(non_snake_case)]
+    fn E(x: &str) -> Literal {
+        Literal::exact(x.as_bytes())
+    }
+
+    #[allow(non_snake_case)]
+    fn I(x: &str) -> Literal {
+        Literal::inexact(x.as_bytes())
+    }
+
+    fn seq<I: IntoIterator<Item = Literal>>(it: I) -> Seq {
+        Seq::from_iter(it)
+    }
+
+    fn infinite() -> (Seq, Seq) {
+        (Seq::infinite(), Seq::infinite())
+    }
+
+    fn inexact<I1, I2>(it1: I1, it2: I2) -> (Seq, Seq)
+    where
+        I1: IntoIterator<Item = Literal>,
+        I2: IntoIterator<Item = Literal>,
+    {
+        (Seq::from_iter(it1), Seq::from_iter(it2))
+    }
+
+    fn exact<B: AsRef<[u8]>, I: IntoIterator<Item = B>>(it: I) -> (Seq, Seq) {
+        let s1 = Seq::new(it);
+        let s2 = s1.clone();
+        (s1, s2)
+    }
+
+    fn opt<B: AsRef<[u8]>, I: IntoIterator<Item = B>>(it: I) -> (Seq, Seq) {
+        let (mut p, mut s) = exact(it);
+        p.optimize_for_prefix_by_preference();
+        s.optimize_for_suffix_by_preference();
+        (p, s)
+    }
+
+    #[test]
+    fn literal() {
+        assert_eq!(exact(["a"]), e("a"));
+        assert_eq!(exact(["aaaaa"]), e("aaaaa"));
+        assert_eq!(exact(["A", "a"]), e("(?i-u)a"));
+        assert_eq!(exact(["AB", "Ab", "aB", "ab"]), e("(?i-u)ab"));
+        assert_eq!(exact(["abC", "abc"]), e("ab(?i-u)c"));
+
+        assert_eq!(exact([b"\xFF"]), e(r"(?-u:\xFF)"));
+
+        #[cfg(feature = "unicode-case")]
+        {
+            assert_eq!(exact(["☃"]), e("☃"));
+            assert_eq!(exact(["☃"]), e("(?i)☃"));
+            assert_eq!(exact(["☃☃☃☃☃"]), e("☃☃☃☃☃"));
+
+            assert_eq!(exact(["Δ"]), e("Δ"));
+            assert_eq!(exact(["δ"]), e("δ"));
+            assert_eq!(exact(["Δ", "δ"]), e("(?i)Δ"));
+            assert_eq!(exact(["Δ", "δ"]), e("(?i)δ"));
+
+            assert_eq!(exact(["S", "s", "ſ"]), e("(?i)S"));
+            assert_eq!(exact(["S", "s", "ſ"]), e("(?i)s"));
+            assert_eq!(exact(["S", "s", "ſ"]), e("(?i)ſ"));
+        }
+
+        let letters = "ͱͳͷΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋ";
+        assert_eq!(exact([letters]), e(letters));
+    }
+
+    #[test]
+    fn class() {
+        assert_eq!(exact(["a", "b", "c"]), e("[abc]"));
+        assert_eq!(exact(["a1b", "a2b", "a3b"]), e("a[123]b"));
+        assert_eq!(exact(["δ", "ε"]), e("[εδ]"));
+        #[cfg(feature = "unicode-case")]
+        {
+            assert_eq!(exact(["Δ", "Ε", "δ", "ε", "ϵ"]), e(r"(?i)[εδ]"));
+        }
+    }
+
+    #[test]
+    fn look() {
+        assert_eq!(exact(["ab"]), e(r"a\Ab"));
+        assert_eq!(exact(["ab"]), e(r"a\zb"));
+        assert_eq!(exact(["ab"]), e(r"a(?m:^)b"));
+        assert_eq!(exact(["ab"]), e(r"a(?m:$)b"));
+        assert_eq!(exact(["ab"]), e(r"a\bb"));
+        assert_eq!(exact(["ab"]), e(r"a\Bb"));
+        assert_eq!(exact(["ab"]), e(r"a(?-u:\b)b"));
+        assert_eq!(exact(["ab"]), e(r"a(?-u:\B)b"));
+
+        assert_eq!(exact(["ab"]), e(r"^ab"));
+        assert_eq!(exact(["ab"]), e(r"$ab"));
+        assert_eq!(exact(["ab"]), e(r"(?m:^)ab"));
+        assert_eq!(exact(["ab"]), e(r"(?m:$)ab"));
+        assert_eq!(exact(["ab"]), e(r"\bab"));
+        assert_eq!(exact(["ab"]), e(r"\Bab"));
+        assert_eq!(exact(["ab"]), e(r"(?-u:\b)ab"));
+        assert_eq!(exact(["ab"]), e(r"(?-u:\B)ab"));
+
+        assert_eq!(exact(["ab"]), e(r"ab^"));
+        assert_eq!(exact(["ab"]), e(r"ab$"));
+        assert_eq!(exact(["ab"]), e(r"ab(?m:^)"));
+        assert_eq!(exact(["ab"]), e(r"ab(?m:$)"));
+        assert_eq!(exact(["ab"]), e(r"ab\b"));
+        assert_eq!(exact(["ab"]), e(r"ab\B"));
+        assert_eq!(exact(["ab"]), e(r"ab(?-u:\b)"));
+        assert_eq!(exact(["ab"]), e(r"ab(?-u:\B)"));
+
+        let expected = (seq([I("aZ"), E("ab")]), seq([I("Zb"), E("ab")]));
+        assert_eq!(expected, e(r"^aZ*b"));
+    }
+
+    #[test]
+    fn repetition() {
+        assert_eq!(exact(["a", ""]), e(r"a?"));
+        assert_eq!(exact(["", "a"]), e(r"a??"));
+        assert_eq!(inexact([I("a"), E("")], [I("a"), E("")]), e(r"a*"));
+        assert_eq!(inexact([E(""), I("a")], [E(""), I("a")]), e(r"a*?"));
+        assert_eq!(inexact([I("a")], [I("a")]), e(r"a+"));
+        assert_eq!(inexact([I("a")], [I("a")]), e(r"(a+)+"));
+
+        assert_eq!(exact(["ab"]), e(r"aZ{0}b"));
+        assert_eq!(exact(["aZb", "ab"]), e(r"aZ?b"));
+        assert_eq!(exact(["ab", "aZb"]), e(r"aZ??b"));
+        assert_eq!(
+            inexact([I("aZ"), E("ab")], [I("Zb"), E("ab")]),
+            e(r"aZ*b")
+        );
+        assert_eq!(
+            inexact([E("ab"), I("aZ")], [E("ab"), I("Zb")]),
+            e(r"aZ*?b")
+        );
+        assert_eq!(inexact([I("aZ")], [I("Zb")]), e(r"aZ+b"));
+        assert_eq!(inexact([I("aZ")], [I("Zb")]), e(r"aZ+?b"));
+
+        assert_eq!(exact(["aZZb"]), e(r"aZ{2}b"));
+        assert_eq!(inexact([I("aZZ")], [I("ZZb")]), e(r"aZ{2,3}b"));
+
+        assert_eq!(exact(["abc", ""]), e(r"(abc)?"));
+        assert_eq!(exact(["", "abc"]), e(r"(abc)??"));
+
+        assert_eq!(inexact([I("a"), E("b")], [I("ab"), E("b")]), e(r"a*b"));
+        assert_eq!(inexact([E("b"), I("a")], [E("b"), I("ab")]), e(r"a*?b"));
+        assert_eq!(inexact([I("ab")], [I("b")]), e(r"ab+"));
+        assert_eq!(inexact([I("a"), I("b")], [I("b")]), e(r"a*b+"));
+
+        // FIXME: The suffixes for this don't look quite right to me. I think
+        // the right suffixes would be: [I(ac), I(bc), E(c)]. The main issue I
+        // think is that suffixes are computed by iterating over concatenations
+        // in reverse, and then [bc, ac, c] ordering is indeed correct from
+        // that perspective. We also test a few more equivalent regexes, and
+        // we get the same result, so it is consistent at least I suppose.
+        //
+        // The reason why this isn't an issue is that it only messes up
+        // preference order, and currently, suffixes are never used in a
+        // context where preference order matters. For prefixes it matters
+        // because we sometimes want to use prefilters without confirmation
+        // when all of the literals are exact (and there's no look-around). But
+        // we never do that for suffixes. Any time we use suffixes, we always
+        // include a confirmation step. If that ever changes, then it's likely
+        // this bug will need to be fixed, but last time I looked, it appears
+        // hard to do so.
+        assert_eq!(
+            inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]),
+            e(r"a*b*c")
+        );
+        assert_eq!(
+            inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]),
+            e(r"(a+)?(b+)?c")
+        );
+        assert_eq!(
+            inexact([I("a"), I("b"), E("c")], [I("bc"), I("ac"), E("c")]),
+            e(r"(a+|)(b+|)c")
+        );
+        // A few more similarish but not identical regexes. These may have a
+        // similar problem as above.
+        assert_eq!(
+            inexact(
+                [I("a"), I("b"), I("c"), E("")],
+                [I("c"), I("b"), I("a"), E("")]
+            ),
+            e(r"a*b*c*")
+        );
+        assert_eq!(inexact([I("a"), I("b"), I("c")], [I("c")]), e(r"a*b*c+"));
+        assert_eq!(inexact([I("a"), I("b")], [I("bc")]), e(r"a*b+c"));
+        assert_eq!(inexact([I("a"), I("b")], [I("c"), I("b")]), e(r"a*b+c*"));
+        assert_eq!(inexact([I("ab"), E("a")], [I("b"), E("a")]), e(r"ab*"));
+        assert_eq!(
+            inexact([I("ab"), E("ac")], [I("bc"), E("ac")]),
+            e(r"ab*c")
+        );
+        assert_eq!(inexact([I("ab")], [I("b")]), e(r"ab+"));
+        assert_eq!(inexact([I("ab")], [I("bc")]), e(r"ab+c"));
+
+        assert_eq!(
+            inexact([I("z"), E("azb")], [I("zazb"), E("azb")]),
+            e(r"z*azb")
+        );
+
+        let expected =
+            exact(["aaa", "aab", "aba", "abb", "baa", "bab", "bba", "bbb"]);
+        assert_eq!(expected, e(r"[ab]{3}"));
+        let expected = inexact(
+            [
+                I("aaa"),
+                I("aab"),
+                I("aba"),
+                I("abb"),
+                I("baa"),
+                I("bab"),
+                I("bba"),
+                I("bbb"),
+            ],
+            [
+                I("aaa"),
+                I("aab"),
+                I("aba"),
+                I("abb"),
+                I("baa"),
+                I("bab"),
+                I("bba"),
+                I("bbb"),
+            ],
+        );
+        assert_eq!(expected, e(r"[ab]{3,4}"));
+    }
+
+    #[test]
+    fn concat() {
+        let empty: [&str; 0] = [];
+
+        assert_eq!(exact(["abcxyz"]), e(r"abc()xyz"));
+        assert_eq!(exact(["abcxyz"]), e(r"(abc)(xyz)"));
+        assert_eq!(exact(["abcmnoxyz"]), e(r"abc()mno()xyz"));
+        assert_eq!(exact(empty), e(r"abc[a&&b]xyz"));
+        assert_eq!(exact(["abcxyz"]), e(r"abc[a&&b]*xyz"));
+    }
+
+    #[test]
+    fn alternation() {
+        assert_eq!(exact(["abc", "mno", "xyz"]), e(r"abc|mno|xyz"));
+        assert_eq!(
+            inexact(
+                [E("abc"), I("mZ"), E("mo"), E("xyz")],
+                [E("abc"), I("Zo"), E("mo"), E("xyz")]
+            ),
+            e(r"abc|mZ*o|xyz")
+        );
+        assert_eq!(exact(["abc", "xyz"]), e(r"abc|M[a&&b]N|xyz"));
+        assert_eq!(exact(["abc", "MN", "xyz"]), e(r"abc|M[a&&b]*N|xyz"));
+
+        assert_eq!(exact(["aaa", "aaaaa"]), e(r"(?:|aa)aaa"));
+        assert_eq!(
+            inexact(
+                [I("aaa"), E(""), I("aaaaa"), E("aa")],
+                [I("aaa"), E(""), E("aa")]
+            ),
+            e(r"(?:|aa)(?:aaa)*")
+        );
+        assert_eq!(
+            inexact(
+                [E(""), I("aaa"), E("aa"), I("aaaaa")],
+                [E(""), I("aaa"), E("aa")]
+            ),
+            e(r"(?:|aa)(?:aaa)*?")
+        );
+
+        assert_eq!(
+            inexact([E("a"), I("b"), E("")], [E("a"), I("b"), E("")]),
+            e(r"a|b*")
+        );
+        assert_eq!(inexact([E("a"), I("b")], [E("a"), I("b")]), e(r"a|b+"));
+
+        assert_eq!(
+            inexact([I("a"), E("b"), E("c")], [I("ab"), E("b"), E("c")]),
+            e(r"a*b|c")
+        );
+
+        assert_eq!(
+            inexact(
+                [E("a"), E("b"), I("c"), E("")],
+                [E("a"), E("b"), I("c"), E("")]
+            ),
+            e(r"a|(?:b|c*)")
+        );
+
+        assert_eq!(
+            inexact(
+                [I("a"), I("b"), E("c"), I("a"), I("ab"), E("c")],
+                [I("ac"), I("bc"), E("c"), I("ac"), I("abc"), E("c")],
+            ),
+            e(r"(a|b)*c|(a|ab)*c")
+        );
+
+        assert_eq!(
+            exact(["abef", "abgh", "cdef", "cdgh"]),
+            e(r"(ab|cd)(ef|gh)")
+        );
+        assert_eq!(
+            exact([
+                "abefij", "abefkl", "abghij", "abghkl", "cdefij", "cdefkl",
+                "cdghij", "cdghkl",
+            ]),
+            e(r"(ab|cd)(ef|gh)(ij|kl)")
+        );
+
+        assert_eq!(inexact([E("abab")], [E("abab")]), e(r"(ab){2}"));
+
+        assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,3}"));
+
+        assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,}"));
+    }
+
+    #[test]
+    fn impossible() {
+        let empty: [&str; 0] = [];
+
+        assert_eq!(exact(empty), e(r"[a&&b]"));
+        assert_eq!(exact(empty), e(r"a[a&&b]"));
+        assert_eq!(exact(empty), e(r"[a&&b]b"));
+        assert_eq!(exact(empty), e(r"a[a&&b]b"));
+        assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]|b"));
+        assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]|b"));
+        assert_eq!(exact(["a", "b"]), e(r"a|[a&&b]d|b"));
+        assert_eq!(exact(["a", "b"]), e(r"a|c[a&&b]d|b"));
+        assert_eq!(exact([""]), e(r"[a&&b]*"));
+        assert_eq!(exact(["MN"]), e(r"M[a&&b]*N"));
+    }
+
+    // This tests patterns that contain something that defeats literal
+    // detection, usually because it would blow some limit on the total number
+    // of literals that can be returned.
+    //
+    // The main idea is that when literal extraction sees something that
+    // it knows will blow a limit, it replaces it with a marker that says
+    // "any literal will match here." While not necessarily true, the
+    // over-estimation is just fine for the purposes of literal extraction,
+    // because the imprecision doesn't matter: too big is too big.
+    //
+    // This is one of the trickier parts of literal extraction, since we need
+    // to make sure all of our literal extraction operations correctly compose
+    // with the markers.
+    #[test]
+    fn anything() {
+        assert_eq!(infinite(), e(r"."));
+        assert_eq!(infinite(), e(r"(?s)."));
+        assert_eq!(infinite(), e(r"[A-Za-z]"));
+        assert_eq!(infinite(), e(r"[A-Z]"));
+        assert_eq!(exact([""]), e(r"[A-Z]{0}"));
+        assert_eq!(infinite(), e(r"[A-Z]?"));
+        assert_eq!(infinite(), e(r"[A-Z]*"));
+        assert_eq!(infinite(), e(r"[A-Z]+"));
+        assert_eq!((seq([I("1")]), Seq::infinite()), e(r"1[A-Z]"));
+        assert_eq!((seq([I("1")]), seq([I("2")])), e(r"1[A-Z]2"));
+        assert_eq!((Seq::infinite(), seq([I("123")])), e(r"[A-Z]+123"));
+        assert_eq!(infinite(), e(r"[A-Z]+123[A-Z]+"));
+        assert_eq!(infinite(), e(r"1|[A-Z]|3"));
+        assert_eq!(
+            (seq([E("1"), I("2"), E("3")]), Seq::infinite()),
+            e(r"1|2[A-Z]|3"),
+        );
+        assert_eq!(
+            (Seq::infinite(), seq([E("1"), I("2"), E("3")])),
+            e(r"1|[A-Z]2|3"),
+        );
+        assert_eq!(
+            (seq([E("1"), I("2"), E("4")]), seq([E("1"), I("3"), E("4")])),
+            e(r"1|2[A-Z]3|4"),
+        );
+        assert_eq!((Seq::infinite(), seq([I("2")])), e(r"(?:|1)[A-Z]2"));
+        assert_eq!(inexact([I("a")], [I("z")]), e(r"a.z"));
+    }
+
+    // Like the 'anything' test, but it uses smaller limits in order to test
+    // the logic for effectively aborting literal extraction when the seqs get
+    // too big.
+    #[test]
+    fn anything_small_limits() {
+        fn prefixes(pattern: &str) -> Seq {
+            Extractor::new()
+                .kind(ExtractKind::Prefix)
+                .limit_total(10)
+                .extract(&parse(pattern))
+        }
+
+        fn suffixes(pattern: &str) -> Seq {
+            Extractor::new()
+                .kind(ExtractKind::Suffix)
+                .limit_total(10)
+                .extract(&parse(pattern))
+        }
+
+        fn e(pattern: &str) -> (Seq, Seq) {
+            (prefixes(pattern), suffixes(pattern))
+        }
+
+        assert_eq!(
+            (
+                seq([
+                    I("aaa"),
+                    I("aab"),
+                    I("aba"),
+                    I("abb"),
+                    I("baa"),
+                    I("bab"),
+                    I("bba"),
+                    I("bbb")
+                ]),
+                seq([
+                    I("aaa"),
+                    I("aab"),
+                    I("aba"),
+                    I("abb"),
+                    I("baa"),
+                    I("bab"),
+                    I("bba"),
+                    I("bbb")
+                ])
+            ),
+            e(r"[ab]{3}{3}")
+        );
+
+        assert_eq!(infinite(), e(r"ab|cd|ef|gh|ij|kl|mn|op|qr|st|uv|wx|yz"));
+    }
+
+    #[test]
+    fn empty() {
+        assert_eq!(exact([""]), e(r""));
+        assert_eq!(exact([""]), e(r"^"));
+        assert_eq!(exact([""]), e(r"$"));
+        assert_eq!(exact([""]), e(r"(?m:^)"));
+        assert_eq!(exact([""]), e(r"(?m:$)"));
+        assert_eq!(exact([""]), e(r"\b"));
+        assert_eq!(exact([""]), e(r"\B"));
+        assert_eq!(exact([""]), e(r"(?-u:\b)"));
+        assert_eq!(exact([""]), e(r"(?-u:\B)"));
+    }
+
+    #[test]
+    fn odds_and_ends() {
+        assert_eq!((Seq::infinite(), seq([I("a")])), e(r".a"));
+        assert_eq!((seq([I("a")]), Seq::infinite()), e(r"a."));
+        assert_eq!(infinite(), e(r"a|."));
+        assert_eq!(infinite(), e(r".|a"));
+
+        let pat = r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]";
+        let expected = inexact(
+            ["Mo'am", "Moam", "Mu'am", "Muam"].map(I),
+            [
+                "ddafi", "ddafy", "dhafi", "dhafy", "dzafi", "dzafy", "dafi",
+                "dafy", "tdafi", "tdafy", "thafi", "thafy", "tzafi", "tzafy",
+                "tafi", "tafy", "zdafi", "zdafy", "zhafi", "zhafy", "zzafi",
+                "zzafy", "zafi", "zafy",
+            ]
+            .map(I),
+        );
+        assert_eq!(expected, e(pat));
+
+        assert_eq!(
+            (seq(["fn is_", "fn as_"].map(I)), Seq::infinite()),
+            e(r"fn is_([A-Z]+)|fn as_([A-Z]+)"),
+        );
+        assert_eq!(
+            inexact([I("foo")], [I("quux")]),
+            e(r"foo[A-Z]+bar[A-Z]+quux")
+        );
+        assert_eq!(infinite(), e(r"[A-Z]+bar[A-Z]+"));
+        assert_eq!(
+            exact(["Sherlock Holmes"]),
+            e(r"(?m)^Sherlock Holmes|Sherlock Holmes$")
+        );
+
+        assert_eq!(exact(["sa", "sb"]), e(r"\bs(?:[ab])"));
+    }
+
+    // This tests a specific regex along with some heuristic steps to reduce
+    // the sequences extracted. This is meant to roughly correspond to the
+    // types of heuristics used to shrink literal sets in practice. (Shrinking
+    // is done because you want to balance "spend too much work looking for
+    // too many literals" and "spend too much work processing false positive
+    // matches from short literals.")
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn holmes() {
+        let expected = inexact(
+            ["HOL", "HOl", "HoL", "Hol", "hOL", "hOl", "hoL", "hol"].map(I),
+            [
+                "MES", "MEs", "Eſ", "MeS", "Mes", "eſ", "mES", "mEs", "meS",
+                "mes",
+            ]
+            .map(I),
+        );
+        let (mut prefixes, mut suffixes) = e(r"(?i)Holmes");
+        prefixes.keep_first_bytes(3);
+        suffixes.keep_last_bytes(3);
+        prefixes.minimize_by_preference();
+        suffixes.minimize_by_preference();
+        assert_eq!(expected, (prefixes, suffixes));
+    }
+
+    // This tests that we get some kind of literals extracted for a beefier
+    // alternation with case insensitive mode enabled. At one point during
+    // development, this returned nothing, and motivated some special case
+    // code in Extractor::union to try and trim down the literal sequences
+    // if the union would blow the limits set.
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn holmes_alt() {
+        let mut pre =
+            prefixes(r"(?i)Sherlock|Holmes|Watson|Irene|Adler|John|Baker");
+        assert!(pre.len().unwrap() > 0);
+        pre.optimize_for_prefix_by_preference();
+        assert!(pre.len().unwrap() > 0);
+    }
+
+    // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8
+    // See: CVE-2022-24713
+    //
+    // We test this here to ensure literal extraction completes in reasonable
+    // time and isn't materially impacted by these sorts of pathological
+    // repeats.
+    #[test]
+    fn crazy_repeats() {
+        assert_eq!(inexact([E("")], [E("")]), e(r"(?:){4294967295}"));
+        assert_eq!(
+            inexact([E("")], [E("")]),
+            e(r"(?:){64}{64}{64}{64}{64}{64}")
+        );
+        assert_eq!(inexact([E("")], [E("")]), e(r"x{0}{4294967295}"));
+        assert_eq!(inexact([E("")], [E("")]), e(r"(?:|){4294967295}"));
+
+        assert_eq!(
+            inexact([E("")], [E("")]),
+            e(r"(?:){8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}")
+        );
+        let repa = "a".repeat(100);
+        assert_eq!(
+            inexact([I(&repa)], [I(&repa)]),
+            e(r"a{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}{8}")
+        );
+    }
+
+    #[test]
+    fn huge() {
+        let pat = r#"(?-u)
+        2(?:
+          [45]\d{3}|
+          7(?:
+            1[0-267]|
+            2[0-289]|
+            3[0-29]|
+            4[01]|
+            5[1-3]|
+            6[013]|
+            7[0178]|
+            91
+          )|
+          8(?:
+            0[125]|
+            [139][1-6]|
+            2[0157-9]|
+            41|
+            6[1-35]|
+            7[1-5]|
+            8[1-8]|
+            90
+          )|
+          9(?:
+            0[0-2]|
+            1[0-4]|
+            2[568]|
+            3[3-6]|
+            5[5-7]|
+            6[0167]|
+            7[15]|
+            8[0146-9]
+          )
+        )\d{4}|
+        3(?:
+          12?[5-7]\d{2}|
+          0(?:
+            2(?:
+              [025-79]\d|
+              [348]\d{1,2}
+            )|
+            3(?:
+              [2-4]\d|
+              [56]\d?
+            )
+          )|
+          2(?:
+            1\d{2}|
+            2(?:
+              [12]\d|
+              [35]\d{1,2}|
+              4\d?
+            )
+          )|
+          3(?:
+            1\d{2}|
+            2(?:
+              [2356]\d|
+              4\d{1,2}
+            )
+          )|
+          4(?:
+            1\d{2}|
+            2(?:
+              2\d{1,2}|
+              [47]|
+              5\d{2}
+            )
+          )|
+          5(?:
+            1\d{2}|
+            29
+          )|
+          [67]1\d{2}|
+          8(?:
+            1\d{2}|
+            2(?:
+              2\d{2}|
+              3|
+              4\d
+            )
+          )
+        )\d{3}|
+        4(?:
+          0(?:
+            2(?:
+              [09]\d|
+              7
+            )|
+            33\d{2}
+          )|
+          1\d{3}|
+          2(?:
+            1\d{2}|
+            2(?:
+              [25]\d?|
+              [348]\d|
+              [67]\d{1,2}
+            )
+          )|
+          3(?:
+            1\d{2}(?:
+              \d{2}
+            )?|
+            2(?:
+              [045]\d|
+              [236-9]\d{1,2}
+            )|
+            32\d{2}
+          )|
+          4(?:
+            [18]\d{2}|
+            2(?:
+              [2-46]\d{2}|
+              3
+            )|
+            5[25]\d{2}
+          )|
+          5(?:
+            1\d{2}|
+            2(?:
+              3\d|
+              5
+            )
+          )|
+          6(?:
+            [18]\d{2}|
+            2(?:
+              3(?:
+                \d{2}
+              )?|
+              [46]\d{1,2}|
+              5\d{2}|
+              7\d
+            )|
+            5(?:
+              3\d?|
+              4\d|
+              [57]\d{1,2}|
+              6\d{2}|
+              8
+            )
+          )|
+          71\d{2}|
+          8(?:
+            [18]\d{2}|
+            23\d{2}|
+            54\d{2}
+          )|
+          9(?:
+            [18]\d{2}|
+            2[2-5]\d{2}|
+            53\d{1,2}
+          )
+        )\d{3}|
+        5(?:
+          02[03489]\d{2}|
+          1\d{2}|
+          2(?:
+            1\d{2}|
+            2(?:
+              2(?:
+                \d{2}
+              )?|
+              [457]\d{2}
+            )
+          )|
+          3(?:
+            1\d{2}|
+            2(?:
+              [37](?:
+                \d{2}
+              )?|
+              [569]\d{2}
+            )
+          )|
+          4(?:
+            1\d{2}|
+            2[46]\d{2}
+          )|
+          5(?:
+            1\d{2}|
+            26\d{1,2}
+          )|
+          6(?:
+            [18]\d{2}|
+            2|
+            53\d{2}
+          )|
+          7(?:
+            1|
+            24
+          )\d{2}|
+          8(?:
+            1|
+            26
+          )\d{2}|
+          91\d{2}
+        )\d{3}|
+        6(?:
+          0(?:
+            1\d{2}|
+            2(?:
+              3\d{2}|
+              4\d{1,2}
+            )
+          )|
+          2(?:
+            2[2-5]\d{2}|
+            5(?:
+              [3-5]\d{2}|
+              7
+            )|
+            8\d{2}
+          )|
+          3(?:
+            1|
+            2[3478]
+          )\d{2}|
+          4(?:
+            1|
+            2[34]
+          )\d{2}|
+          5(?:
+            1|
+            2[47]
+          )\d{2}|
+          6(?:
+            [18]\d{2}|
+            6(?:
+              2(?:
+                2\d|
+                [34]\d{2}
+              )|
+              5(?:
+                [24]\d{2}|
+                3\d|
+                5\d{1,2}
+              )
+            )
+          )|
+          72[2-5]\d{2}|
+          8(?:
+            1\d{2}|
+            2[2-5]\d{2}
+          )|
+          9(?:
+            1\d{2}|
+            2[2-6]\d{2}
+          )
+        )\d{3}|
+        7(?:
+          (?:
+            02|
+            [3-589]1|
+            6[12]|
+            72[24]
+          )\d{2}|
+          21\d{3}|
+          32
+        )\d{3}|
+        8(?:
+          (?:
+            4[12]|
+            [5-7]2|
+            1\d?
+          )|
+          (?:
+            0|
+            3[12]|
+            [5-7]1|
+            217
+          )\d
+        )\d{4}|
+        9(?:
+          [35]1|
+          (?:
+            [024]2|
+            81
+          )\d|
+          (?:
+            1|
+            [24]1
+          )\d{2}
+        )\d{3}
+        "#;
+        // TODO: This is a good candidate of a seq of literals that could be
+        // shrunk quite a bit and still be very productive with respect to
+        // literal optimizations.
+        let (prefixes, suffixes) = e(pat);
+        assert!(!suffixes.is_finite());
+        assert_eq!(Some(243), prefixes.len());
+    }
+
+    #[test]
+    fn optimize() {
+        // This gets a common prefix that isn't too short.
+        let (p, s) =
+            opt(["foobarfoobar", "foobar", "foobarzfoobar", "foobarfoobar"]);
+        assert_eq!(seq([I("foobar")]), p);
+        assert_eq!(seq([I("foobar")]), s);
+
+        // This also finds a common prefix, but since it's only one byte, it
+        // prefers the multiple literals.
+        let (p, s) = opt(["abba", "akka", "abccba"]);
+        assert_eq!(exact(["abba", "akka", "abccba"]), (p, s));
+
+        let (p, s) = opt(["sam", "samwise"]);
+        assert_eq!((seq([E("sam")]), seq([E("sam"), E("samwise")])), (p, s));
+
+        // The empty string is poisonous, so our seq becomes infinite, even
+        // though all literals are exact.
+        let (p, s) = opt(["foobarfoo", "foo", "", "foozfoo", "foofoo"]);
+        assert!(!p.is_finite());
+        assert!(!s.is_finite());
+
+        // A space is also poisonous, so our seq becomes infinite. But this
+        // only gets triggered when we don't have a completely exact sequence.
+        // When the sequence is exact, spaces are okay, since we presume that
+        // any prefilter will match a space more quickly than the regex engine.
+        // (When the sequence is exact, there's a chance of the prefilter being
+        // used without needing the regex engine at all.)
+        let mut p = seq([E("foobarfoo"), I("foo"), E(" "), E("foofoo")]);
+        p.optimize_for_prefix_by_preference();
+        assert!(!p.is_finite());
+    }
+}
diff --git a/vendor/regex-syntax/src/hir/mod.rs b/vendor/regex-syntax/src/hir/mod.rs
new file mode 100644
index 0000000..ce38ead
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/mod.rs
@@ -0,0 +1,3861 @@
+/*!
+Defines a high-level intermediate (HIR) representation for regular expressions.
+
+The HIR is represented by the [`Hir`] type, and it principally constructed via
+[translation](translate) from an [`Ast`](crate::ast::Ast). Alternatively, users
+may use the smart constructors defined on `Hir` to build their own by hand. The
+smart constructors simultaneously simplify and "optimize" the HIR, and are also
+the same routines used by translation.
+
+Most regex engines only have an HIR like this, and usually construct it
+directly from the concrete syntax. This crate however first parses the
+concrete syntax into an `Ast`, and only then creates the HIR from the `Ast`,
+as mentioned above. It's done this way to facilitate better error reporting,
+and to have a structured representation of a regex that faithfully represents
+its concrete syntax. Namely, while an `Hir` value can be converted back to an
+equivalent regex pattern string, it is unlikely to look like the original due
+to its simplified structure.
+*/
+
+use core::{char, cmp};
+
+use alloc::{
+    boxed::Box,
+    format,
+    string::{String, ToString},
+    vec,
+    vec::Vec,
+};
+
+use crate::{
+    ast::Span,
+    hir::interval::{Interval, IntervalSet, IntervalSetIter},
+    unicode,
+};
+
+pub use crate::{
+    hir::visitor::{visit, Visitor},
+    unicode::CaseFoldError,
+};
+
+mod interval;
+pub mod literal;
+pub mod print;
+pub mod translate;
+mod visitor;
+
+/// An error that can occur while translating an `Ast` to a `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+    /// The kind of error.
+    kind: ErrorKind,
+    /// The original pattern that the translator's Ast was parsed from. Every
+    /// span in an error is a valid range into this string.
+    pattern: String,
+    /// The span of this error, derived from the Ast given to the translator.
+    span: Span,
+}
+
+impl Error {
+    /// Return the type of this error.
+    pub fn kind(&self) -> &ErrorKind {
+        &self.kind
+    }
+
+    /// The original pattern string in which this error occurred.
+    ///
+    /// Every span reported by this error is reported in terms of this string.
+    pub fn pattern(&self) -> &str {
+        &self.pattern
+    }
+
+    /// Return the span at which this error occurred.
+    pub fn span(&self) -> &Span {
+        &self.span
+    }
+}
+
+/// The type of an error that occurred while building an `Hir`.
+///
+/// This error type is marked as `non_exhaustive`. This means that adding a
+/// new variant is not considered a breaking change.
+#[non_exhaustive]
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ErrorKind {
+    /// This error occurs when a Unicode feature is used when Unicode
+    /// support is disabled. For example `(?-u:\pL)` would trigger this error.
+    UnicodeNotAllowed,
+    /// This error occurs when translating a pattern that could match a byte
+    /// sequence that isn't UTF-8 and `utf8` was enabled.
+    InvalidUtf8,
+    /// This error occurs when one uses a non-ASCII byte for a line terminator,
+    /// but where Unicode mode is enabled and UTF-8 mode is disabled.
+    InvalidLineTerminator,
+    /// This occurs when an unrecognized Unicode property name could not
+    /// be found.
+    UnicodePropertyNotFound,
+    /// This occurs when an unrecognized Unicode property value could not
+    /// be found.
+    UnicodePropertyValueNotFound,
+    /// This occurs when a Unicode-aware Perl character class (`\w`, `\s` or
+    /// `\d`) could not be found. This can occur when the `unicode-perl`
+    /// crate feature is not enabled.
+    UnicodePerlClassNotFound,
+    /// This occurs when the Unicode simple case mapping tables are not
+    /// available, and the regular expression required Unicode aware case
+    /// insensitivity.
+    UnicodeCaseUnavailable,
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for Error {}
+
+impl core::fmt::Display for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        crate::error::Formatter::from(self).fmt(f)
+    }
+}
+
+impl core::fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        use self::ErrorKind::*;
+
+        let msg = match *self {
+            UnicodeNotAllowed => "Unicode not allowed here",
+            InvalidUtf8 => "pattern can match invalid UTF-8",
+            InvalidLineTerminator => "invalid line terminator, must be ASCII",
+            UnicodePropertyNotFound => "Unicode property not found",
+            UnicodePropertyValueNotFound => "Unicode property value not found",
+            UnicodePerlClassNotFound => {
+                "Unicode-aware Perl class not found \
+                 (make sure the unicode-perl feature is enabled)"
+            }
+            UnicodeCaseUnavailable => {
+                "Unicode-aware case insensitivity matching is not available \
+                 (make sure the unicode-case feature is enabled)"
+            }
+        };
+        f.write_str(msg)
+    }
+}
+
+/// A high-level intermediate representation (HIR) for a regular expression.
+///
+/// An HIR value is a combination of a [`HirKind`] and a set of [`Properties`].
+/// An `HirKind` indicates what kind of regular expression it is (a literal,
+/// a repetition, a look-around assertion, etc.), where as a `Properties`
+/// describes various facts about the regular expression. For example, whether
+/// it matches UTF-8 or if it matches the empty string.
+///
+/// The HIR of a regular expression represents an intermediate step between
+/// its abstract syntax (a structured description of the concrete syntax) and
+/// an actual regex matcher. The purpose of HIR is to make regular expressions
+/// easier to analyze. In particular, the AST is much more complex than the
+/// HIR. For example, while an AST supports arbitrarily nested character
+/// classes, the HIR will flatten all nested classes into a single set. The HIR
+/// will also "compile away" every flag present in the concrete syntax. For
+/// example, users of HIR expressions never need to worry about case folding;
+/// it is handled automatically by the translator (e.g., by translating
+/// `(?i:A)` to `[aA]`).
+///
+/// The specific type of an HIR expression can be accessed via its `kind`
+/// or `into_kind` methods. This extra level of indirection exists for two
+/// reasons:
+///
+/// 1. Construction of an HIR expression *must* use the constructor methods on
+/// this `Hir` type instead of building the `HirKind` values directly. This
+/// permits construction to enforce invariants like "concatenations always
+/// consist of two or more sub-expressions."
+/// 2. Every HIR expression contains attributes that are defined inductively,
+/// and can be computed cheaply during the construction process. For example,
+/// one such attribute is whether the expression must match at the beginning of
+/// the haystack.
+///
+/// In particular, if you have an `HirKind` value, then there is intentionally
+/// no way to build an `Hir` value from it. You instead need to do case
+/// analysis on the `HirKind` value and build the `Hir` value using its smart
+/// constructors.
+///
+/// # UTF-8
+///
+/// If the HIR was produced by a translator with
+/// [`TranslatorBuilder::utf8`](translate::TranslatorBuilder::utf8) enabled,
+/// then the HIR is guaranteed to match UTF-8 exclusively for all non-empty
+/// matches.
+///
+/// For empty matches, those can occur at any position. It is the
+/// responsibility of the regex engine to determine whether empty matches are
+/// permitted between the code units of a single codepoint.
+///
+/// # Stack space
+///
+/// This type defines its own destructor that uses constant stack space and
+/// heap space proportional to the size of the HIR.
+///
+/// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular
+/// expression pattern string, and uses constant stack space and heap space
+/// proportional to the size of the `Hir`. The regex it prints is guaranteed to
+/// be _semantically_ equivalent to the original concrete syntax, but it may
+/// look very different. (And potentially not practically readable by a human.)
+///
+/// An `Hir`'s `fmt::Debug` implementation currently does not use constant
+/// stack space. The implementation will also suppress some details (such as
+/// the `Properties` inlined into every `Hir` value to make it less noisy).
+#[derive(Clone, Eq, PartialEq)]
+pub struct Hir {
+    /// The underlying HIR kind.
+    kind: HirKind,
+    /// Analysis info about this HIR, computed during construction.
+    props: Properties,
+}
+
+/// Methods for accessing the underlying `HirKind` and `Properties`.
+impl Hir {
+    /// Returns a reference to the underlying HIR kind.
+    pub fn kind(&self) -> &HirKind {
+        &self.kind
+    }
+
+    /// Consumes ownership of this HIR expression and returns its underlying
+    /// `HirKind`.
+    pub fn into_kind(mut self) -> HirKind {
+        core::mem::replace(&mut self.kind, HirKind::Empty)
+    }
+
+    /// Returns the properties computed for this `Hir`.
+    pub fn properties(&self) -> &Properties {
+        &self.props
+    }
+
+    /// Splits this HIR into its constituent parts.
+    ///
+    /// This is useful because `let Hir { kind, props } = hir;` does not work
+    /// because of `Hir`'s custom `Drop` implementation.
+    fn into_parts(mut self) -> (HirKind, Properties) {
+        (
+            core::mem::replace(&mut self.kind, HirKind::Empty),
+            core::mem::replace(&mut self.props, Properties::empty()),
+        )
+    }
+}
+
+/// Smart constructors for HIR values.
+///
+/// These constructors are called "smart" because they do inductive work or
+/// simplifications. For example, calling `Hir::repetition` with a repetition
+/// like `a{0}` will actually return a `Hir` with a `HirKind::Empty` kind
+/// since it is equivalent to an empty regex. Another example is calling
+/// `Hir::concat(vec![expr])`. Instead of getting a `HirKind::Concat`, you'll
+/// just get back the original `expr` since it's precisely equivalent.
+///
+/// Smart constructors enable maintaining invariants about the HIR data type
+/// while also simulanteously keeping the representation as simple as possible.
+impl Hir {
+    /// Returns an empty HIR expression.
+    ///
+    /// An empty HIR expression always matches, including the empty string.
+    #[inline]
+    pub fn empty() -> Hir {
+        let props = Properties::empty();
+        Hir { kind: HirKind::Empty, props }
+    }
+
+    /// Returns an HIR expression that can never match anything. That is,
+    /// the size of the set of strings in the language described by the HIR
+    /// returned is `0`.
+    ///
+    /// This is distinct from [`Hir::empty`] in that the empty string matches
+    /// the HIR returned by `Hir::empty`. That is, the set of strings in the
+    /// language describe described by `Hir::empty` is non-empty.
+    ///
+    /// Note that currently, the HIR returned uses an empty character class to
+    /// indicate that nothing can match. An equivalent expression that cannot
+    /// match is an empty alternation, but all such "fail" expressions are
+    /// normalized (via smart constructors) to empty character classes. This is
+    /// because empty character classes can be spelled in the concrete syntax
+    /// of a regex (e.g., `\P{any}` or `(?-u:[^\x00-\xFF])` or `[a&&b]`), but
+    /// empty alternations cannot.
+    #[inline]
+    pub fn fail() -> Hir {
+        let class = Class::Bytes(ClassBytes::empty());
+        let props = Properties::class(&class);
+        // We can't just call Hir::class here because it defers to Hir::fail
+        // in order to canonicalize the Hir value used to represent "cannot
+        // match."
+        Hir { kind: HirKind::Class(class), props }
+    }
+
+    /// Creates a literal HIR expression.
+    ///
+    /// This accepts anything that can be converted into a `Box<[u8]>`.
+    ///
+    /// Note that there is no mechanism for storing a `char` or a `Box<str>`
+    /// in an HIR. Everything is "just bytes." Whether a `Literal` (or
+    /// any HIR node) matches valid UTF-8 exclusively can be queried via
+    /// [`Properties::is_utf8`].
+    ///
+    /// # Example
+    ///
+    /// This example shows that concatenations of `Literal` HIR values will
+    /// automatically get flattened and combined together. So for example, even
+    /// if you concat multiple `Literal` values that are themselves not valid
+    /// UTF-8, they might add up to valid UTF-8. This also demonstrates just
+    /// how "smart" Hir's smart constructors are.
+    ///
+    /// ```
+    /// use regex_syntax::hir::{Hir, HirKind, Literal};
+    ///
+    /// let literals = vec![
+    ///     Hir::literal([0xE2]),
+    ///     Hir::literal([0x98]),
+    ///     Hir::literal([0x83]),
+    /// ];
+    /// // Each literal, on its own, is invalid UTF-8.
+    /// assert!(literals.iter().all(|hir| !hir.properties().is_utf8()));
+    ///
+    /// let concat = Hir::concat(literals);
+    /// // But the concatenation is valid UTF-8!
+    /// assert!(concat.properties().is_utf8());
+    ///
+    /// // And also notice that the literals have been concatenated into a
+    /// // single `Literal`, to the point where there is no explicit `Concat`!
+    /// let expected = HirKind::Literal(Literal(Box::from("☃".as_bytes())));
+    /// assert_eq!(&expected, concat.kind());
+    /// ```
+    #[inline]
+    pub fn literal<B: Into<Box<[u8]>>>(lit: B) -> Hir {
+        let bytes = lit.into();
+        if bytes.is_empty() {
+            return Hir::empty();
+        }
+
+        let lit = Literal(bytes);
+        let props = Properties::literal(&lit);
+        Hir { kind: HirKind::Literal(lit), props }
+    }
+
+    /// Creates a class HIR expression. The class may either be defined over
+    /// ranges of Unicode codepoints or ranges of raw byte values.
+    ///
+    /// Note that an empty class is permitted. An empty class is equivalent to
+    /// `Hir::fail()`.
+    #[inline]
+    pub fn class(class: Class) -> Hir {
+        if class.is_empty() {
+            return Hir::fail();
+        } else if let Some(bytes) = class.literal() {
+            return Hir::literal(bytes);
+        }
+        let props = Properties::class(&class);
+        Hir { kind: HirKind::Class(class), props }
+    }
+
+    /// Creates a look-around assertion HIR expression.
+    #[inline]
+    pub fn look(look: Look) -> Hir {
+        let props = Properties::look(look);
+        Hir { kind: HirKind::Look(look), props }
+    }
+
+    /// Creates a repetition HIR expression.
+    #[inline]
+    pub fn repetition(mut rep: Repetition) -> Hir {
+        // If the sub-expression of a repetition can only match the empty
+        // string, then we force its maximum to be at most 1.
+        if rep.sub.properties().maximum_len() == Some(0) {
+            rep.min = cmp::min(rep.min, 1);
+            rep.max = rep.max.map(|n| cmp::min(n, 1)).or(Some(1));
+        }
+        // The regex 'a{0}' is always equivalent to the empty regex. This is
+        // true even when 'a' is an expression that never matches anything
+        // (like '\P{any}').
+        //
+        // Additionally, the regex 'a{1}' is always equivalent to 'a'.
+        if rep.min == 0 && rep.max == Some(0) {
+            return Hir::empty();
+        } else if rep.min == 1 && rep.max == Some(1) {
+            return *rep.sub;
+        }
+        let props = Properties::repetition(&rep);
+        Hir { kind: HirKind::Repetition(rep), props }
+    }
+
+    /// Creates a capture HIR expression.
+    ///
+    /// Note that there is no explicit HIR value for a non-capturing group.
+    /// Since a non-capturing group only exists to override precedence in the
+    /// concrete syntax and since an HIR already does its own grouping based on
+    /// what is parsed, there is no need to explicitly represent non-capturing
+    /// groups in the HIR.
+    #[inline]
+    pub fn capture(capture: Capture) -> Hir {
+        let props = Properties::capture(&capture);
+        Hir { kind: HirKind::Capture(capture), props }
+    }
+
+    /// Returns the concatenation of the given expressions.
+    ///
+    /// This attempts to flatten and simplify the concatenation as appropriate.
+    ///
+    /// # Example
+    ///
+    /// This shows a simple example of basic flattening of both concatenations
+    /// and literals.
+    ///
+    /// ```
+    /// use regex_syntax::hir::Hir;
+    ///
+    /// let hir = Hir::concat(vec![
+    ///     Hir::concat(vec![
+    ///         Hir::literal([b'a']),
+    ///         Hir::literal([b'b']),
+    ///         Hir::literal([b'c']),
+    ///     ]),
+    ///     Hir::concat(vec![
+    ///         Hir::literal([b'x']),
+    ///         Hir::literal([b'y']),
+    ///         Hir::literal([b'z']),
+    ///     ]),
+    /// ]);
+    /// let expected = Hir::literal("abcxyz".as_bytes());
+    /// assert_eq!(expected, hir);
+    /// ```
+    pub fn concat(subs: Vec<Hir>) -> Hir {
+        // We rebuild the concatenation by simplifying it. Would be nice to do
+        // it in place, but that seems a little tricky?
+        let mut new = vec![];
+        // This gobbles up any adjacent literals in a concatenation and smushes
+        // them together. Basically, when we see a literal, we add its bytes
+        // to 'prior_lit', and whenever we see anything else, we first take
+        // any bytes in 'prior_lit' and add it to the 'new' concatenation.
+        let mut prior_lit: Option<Vec<u8>> = None;
+        for sub in subs {
+            let (kind, props) = sub.into_parts();
+            match kind {
+                HirKind::Literal(Literal(bytes)) => {
+                    if let Some(ref mut prior_bytes) = prior_lit {
+                        prior_bytes.extend_from_slice(&bytes);
+                    } else {
+                        prior_lit = Some(bytes.to_vec());
+                    }
+                }
+                // We also flatten concats that are direct children of another
+                // concat. We only need to do this one level deep since
+                // Hir::concat is the only way to build concatenations, and so
+                // flattening happens inductively.
+                HirKind::Concat(subs2) => {
+                    for sub2 in subs2 {
+                        let (kind2, props2) = sub2.into_parts();
+                        match kind2 {
+                            HirKind::Literal(Literal(bytes)) => {
+                                if let Some(ref mut prior_bytes) = prior_lit {
+                                    prior_bytes.extend_from_slice(&bytes);
+                                } else {
+                                    prior_lit = Some(bytes.to_vec());
+                                }
+                            }
+                            kind2 => {
+                                if let Some(prior_bytes) = prior_lit.take() {
+                                    new.push(Hir::literal(prior_bytes));
+                                }
+                                new.push(Hir { kind: kind2, props: props2 });
+                            }
+                        }
+                    }
+                }
+                // We can just skip empty HIRs.
+                HirKind::Empty => {}
+                kind => {
+                    if let Some(prior_bytes) = prior_lit.take() {
+                        new.push(Hir::literal(prior_bytes));
+                    }
+                    new.push(Hir { kind, props });
+                }
+            }
+        }
+        if let Some(prior_bytes) = prior_lit.take() {
+            new.push(Hir::literal(prior_bytes));
+        }
+        if new.is_empty() {
+            return Hir::empty();
+        } else if new.len() == 1 {
+            return new.pop().unwrap();
+        }
+        let props = Properties::concat(&new);
+        Hir { kind: HirKind::Concat(new), props }
+    }
+
+    /// Returns the alternation of the given expressions.
+    ///
+    /// This flattens and simplifies the alternation as appropriate. This may
+    /// include factoring out common prefixes or even rewriting the alternation
+    /// as a character class.
+    ///
+    /// Note that an empty alternation is equivalent to `Hir::fail()`. (It
+    /// is not possible for one to write an empty alternation, or even an
+    /// alternation with a single sub-expression, in the concrete syntax of a
+    /// regex.)
+    ///
+    /// # Example
+    ///
+    /// This is a simple example showing how an alternation might get
+    /// simplified.
+    ///
+    /// ```
+    /// use regex_syntax::hir::{Hir, Class, ClassUnicode, ClassUnicodeRange};
+    ///
+    /// let hir = Hir::alternation(vec![
+    ///     Hir::literal([b'a']),
+    ///     Hir::literal([b'b']),
+    ///     Hir::literal([b'c']),
+    ///     Hir::literal([b'd']),
+    ///     Hir::literal([b'e']),
+    ///     Hir::literal([b'f']),
+    /// ]);
+    /// let expected = Hir::class(Class::Unicode(ClassUnicode::new([
+    ///     ClassUnicodeRange::new('a', 'f'),
+    /// ])));
+    /// assert_eq!(expected, hir);
+    /// ```
+    ///
+    /// And another example showing how common prefixes might get factored
+    /// out.
+    ///
+    /// ```
+    /// use regex_syntax::hir::{Hir, Class, ClassUnicode, ClassUnicodeRange};
+    ///
+    /// let hir = Hir::alternation(vec![
+    ///     Hir::concat(vec![
+    ///         Hir::literal("abc".as_bytes()),
+    ///         Hir::class(Class::Unicode(ClassUnicode::new([
+    ///             ClassUnicodeRange::new('A', 'Z'),
+    ///         ]))),
+    ///     ]),
+    ///     Hir::concat(vec![
+    ///         Hir::literal("abc".as_bytes()),
+    ///         Hir::class(Class::Unicode(ClassUnicode::new([
+    ///             ClassUnicodeRange::new('a', 'z'),
+    ///         ]))),
+    ///     ]),
+    /// ]);
+    /// let expected = Hir::concat(vec![
+    ///     Hir::literal("abc".as_bytes()),
+    ///     Hir::alternation(vec![
+    ///         Hir::class(Class::Unicode(ClassUnicode::new([
+    ///             ClassUnicodeRange::new('A', 'Z'),
+    ///         ]))),
+    ///         Hir::class(Class::Unicode(ClassUnicode::new([
+    ///             ClassUnicodeRange::new('a', 'z'),
+    ///         ]))),
+    ///     ]),
+    /// ]);
+    /// assert_eq!(expected, hir);
+    /// ```
+    ///
+    /// Note that these sorts of simplifications are not guaranteed.
+    pub fn alternation(subs: Vec<Hir>) -> Hir {
+        // We rebuild the alternation by simplifying it. We proceed similarly
+        // as the concatenation case. But in this case, there's no literal
+        // simplification happening. We're just flattening alternations.
+        let mut new = Vec::with_capacity(subs.len());
+        for sub in subs {
+            let (kind, props) = sub.into_parts();
+            match kind {
+                HirKind::Alternation(subs2) => {
+                    new.extend(subs2);
+                }
+                kind => {
+                    new.push(Hir { kind, props });
+                }
+            }
+        }
+        if new.is_empty() {
+            return Hir::fail();
+        } else if new.len() == 1 {
+            return new.pop().unwrap();
+        }
+        // Now that it's completely flattened, look for the special case of
+        // 'char1|char2|...|charN' and collapse that into a class. Note that
+        // we look for 'char' first and then bytes. The issue here is that if
+        // we find both non-ASCII codepoints and non-ASCII singleton bytes,
+        // then it isn't actually possible to smush them into a single class.
+        // (Because classes are either "all codepoints" or "all bytes." You
+        // can have a class that both matches non-ASCII but valid UTF-8 and
+        // invalid UTF-8.) So we look for all chars and then all bytes, and
+        // don't handle anything else.
+        if let Some(singletons) = singleton_chars(&new) {
+            let it = singletons
+                .into_iter()
+                .map(|ch| ClassUnicodeRange { start: ch, end: ch });
+            return Hir::class(Class::Unicode(ClassUnicode::new(it)));
+        }
+        if let Some(singletons) = singleton_bytes(&new) {
+            let it = singletons
+                .into_iter()
+                .map(|b| ClassBytesRange { start: b, end: b });
+            return Hir::class(Class::Bytes(ClassBytes::new(it)));
+        }
+        // Similar to singleton chars, we can also look for alternations of
+        // classes. Those can be smushed into a single class.
+        if let Some(cls) = class_chars(&new) {
+            return Hir::class(cls);
+        }
+        if let Some(cls) = class_bytes(&new) {
+            return Hir::class(cls);
+        }
+        // Factor out a common prefix if we can, which might potentially
+        // simplify the expression and unlock other optimizations downstream.
+        // It also might generally make NFA matching and DFA construction
+        // faster by reducing the scope of branching in the regex.
+        new = match lift_common_prefix(new) {
+            Ok(hir) => return hir,
+            Err(unchanged) => unchanged,
+        };
+        let props = Properties::alternation(&new);
+        Hir { kind: HirKind::Alternation(new), props }
+    }
+
+    /// Returns an HIR expression for `.`.
+    ///
+    /// * [`Dot::AnyChar`] maps to `(?su-R:.)`.
+    /// * [`Dot::AnyByte`] maps to `(?s-Ru:.)`.
+    /// * [`Dot::AnyCharExceptLF`] maps to `(?u-Rs:.)`.
+    /// * [`Dot::AnyCharExceptCRLF`] maps to `(?Ru-s:.)`.
+    /// * [`Dot::AnyByteExceptLF`] maps to `(?-Rsu:.)`.
+    /// * [`Dot::AnyByteExceptCRLF`] maps to `(?R-su:.)`.
+    ///
+    /// # Example
+    ///
+    /// Note that this is a convenience routine for constructing the correct
+    /// character class based on the value of `Dot`. There is no explicit "dot"
+    /// HIR value. It is just an abbreviation for a common character class.
+    ///
+    /// ```
+    /// use regex_syntax::hir::{Hir, Dot, Class, ClassBytes, ClassBytesRange};
+    ///
+    /// let hir = Hir::dot(Dot::AnyByte);
+    /// let expected = Hir::class(Class::Bytes(ClassBytes::new([
+    ///     ClassBytesRange::new(0x00, 0xFF),
+    /// ])));
+    /// assert_eq!(expected, hir);
+    /// ```
+    #[inline]
+    pub fn dot(dot: Dot) -> Hir {
+        match dot {
+            Dot::AnyChar => {
+                let mut cls = ClassUnicode::empty();
+                cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
+                Hir::class(Class::Unicode(cls))
+            }
+            Dot::AnyByte => {
+                let mut cls = ClassBytes::empty();
+                cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
+                Hir::class(Class::Bytes(cls))
+            }
+            Dot::AnyCharExcept(ch) => {
+                let mut cls =
+                    ClassUnicode::new([ClassUnicodeRange::new(ch, ch)]);
+                cls.negate();
+                Hir::class(Class::Unicode(cls))
+            }
+            Dot::AnyCharExceptLF => {
+                let mut cls = ClassUnicode::empty();
+                cls.push(ClassUnicodeRange::new('\0', '\x09'));
+                cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
+                Hir::class(Class::Unicode(cls))
+            }
+            Dot::AnyCharExceptCRLF => {
+                let mut cls = ClassUnicode::empty();
+                cls.push(ClassUnicodeRange::new('\0', '\x09'));
+                cls.push(ClassUnicodeRange::new('\x0B', '\x0C'));
+                cls.push(ClassUnicodeRange::new('\x0E', '\u{10FFFF}'));
+                Hir::class(Class::Unicode(cls))
+            }
+            Dot::AnyByteExcept(byte) => {
+                let mut cls =
+                    ClassBytes::new([ClassBytesRange::new(byte, byte)]);
+                cls.negate();
+                Hir::class(Class::Bytes(cls))
+            }
+            Dot::AnyByteExceptLF => {
+                let mut cls = ClassBytes::empty();
+                cls.push(ClassBytesRange::new(b'\0', b'\x09'));
+                cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
+                Hir::class(Class::Bytes(cls))
+            }
+            Dot::AnyByteExceptCRLF => {
+                let mut cls = ClassBytes::empty();
+                cls.push(ClassBytesRange::new(b'\0', b'\x09'));
+                cls.push(ClassBytesRange::new(b'\x0B', b'\x0C'));
+                cls.push(ClassBytesRange::new(b'\x0E', b'\xFF'));
+                Hir::class(Class::Bytes(cls))
+            }
+        }
+    }
+}
+
+/// The underlying kind of an arbitrary [`Hir`] expression.
+///
+/// An `HirKind` is principally useful for doing case analysis on the type
+/// of a regular expression. If you're looking to build new `Hir` values,
+/// then you _must_ use the smart constructors defined on `Hir`, like
+/// [`Hir::repetition`], to build new `Hir` values. The API intentionally does
+/// not expose any way of building an `Hir` directly from an `HirKind`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HirKind {
+    /// The empty regular expression, which matches everything, including the
+    /// empty string.
+    Empty,
+    /// A literalstring that matches exactly these bytes.
+    Literal(Literal),
+    /// A single character class that matches any of the characters in the
+    /// class. A class can either consist of Unicode scalar values as
+    /// characters, or it can use bytes.
+    ///
+    /// A class may be empty. In which case, it matches nothing.
+    Class(Class),
+    /// A look-around assertion. A look-around match always has zero length.
+    Look(Look),
+    /// A repetition operation applied to a sub-expression.
+    Repetition(Repetition),
+    /// A capturing group, which contains a sub-expression.
+    Capture(Capture),
+    /// A concatenation of expressions.
+    ///
+    /// A concatenation matches only if each of its sub-expressions match one
+    /// after the other.
+    ///
+    /// Concatenations are guaranteed by `Hir`'s smart constructors to always
+    /// have at least two sub-expressions.
+    Concat(Vec<Hir>),
+    /// An alternation of expressions.
+    ///
+    /// An alternation matches only if at least one of its sub-expressions
+    /// match. If multiple sub-expressions match, then the leftmost is
+    /// preferred.
+    ///
+    /// Alternations are guaranteed by `Hir`'s smart constructors to always
+    /// have at least two sub-expressions.
+    Alternation(Vec<Hir>),
+}
+
+impl HirKind {
+    /// Returns a slice of this kind's sub-expressions, if any.
+    pub fn subs(&self) -> &[Hir] {
+        use core::slice::from_ref;
+
+        match *self {
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Look(_) => &[],
+            HirKind::Repetition(Repetition { ref sub, .. }) => from_ref(sub),
+            HirKind::Capture(Capture { ref sub, .. }) => from_ref(sub),
+            HirKind::Concat(ref subs) => subs,
+            HirKind::Alternation(ref subs) => subs,
+        }
+    }
+}
+
+impl core::fmt::Debug for Hir {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        self.kind.fmt(f)
+    }
+}
+
+/// Print a display representation of this Hir.
+///
+/// The result of this is a valid regular expression pattern string.
+///
+/// This implementation uses constant stack space and heap space proportional
+/// to the size of the `Hir`.
+impl core::fmt::Display for Hir {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        crate::hir::print::Printer::new().print(self, f)
+    }
+}
+
+/// The high-level intermediate representation of a literal.
+///
+/// A literal corresponds to `0` or more bytes that should be matched
+/// literally. The smart constructors defined on `Hir` will automatically
+/// concatenate adjacent literals into one literal, and will even automatically
+/// replace empty literals with `Hir::empty()`.
+///
+/// Note that despite a literal being represented by a sequence of bytes, its
+/// `Debug` implementation will attempt to print it as a normal string. (That
+/// is, not a sequence of decimal numbers.)
+#[derive(Clone, Eq, PartialEq)]
+pub struct Literal(pub Box<[u8]>);
+
+impl core::fmt::Debug for Literal {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        crate::debug::Bytes(&self.0).fmt(f)
+    }
+}
+
+/// The high-level intermediate representation of a character class.
+///
+/// A character class corresponds to a set of characters. A character is either
+/// defined by a Unicode scalar value or a byte.
+///
+/// A character class, regardless of its character type, is represented by a
+/// sequence of non-overlapping non-adjacent ranges of characters.
+///
+/// There are no guarantees about which class variant is used. Generally
+/// speaking, the Unicode variat is used whenever a class needs to contain
+/// non-ASCII Unicode scalar values. But the Unicode variant can be used even
+/// when Unicode mode is disabled. For example, at the time of writing, the
+/// regex `(?-u:a|\xc2\xa0)` will compile down to HIR for the Unicode class
+/// `[a\u00A0]` due to optimizations.
+///
+/// Note that `Bytes` variant may be produced even when it exclusively matches
+/// valid UTF-8. This is because a `Bytes` variant represents an intention by
+/// the author of the regular expression to disable Unicode mode, which in turn
+/// impacts the semantics of case insensitive matching. For example, `(?i)k`
+/// and `(?i-u)k` will not match the same set of strings.
+#[derive(Clone, Eq, PartialEq)]
+pub enum Class {
+    /// A set of characters represented by Unicode scalar values.
+    Unicode(ClassUnicode),
+    /// A set of characters represented by arbitrary bytes (one byte per
+    /// character).
+    Bytes(ClassBytes),
+}
+
+impl Class {
+    /// Apply Unicode simple case folding to this character class, in place.
+    /// The character class will be expanded to include all simple case folded
+    /// character variants.
+    ///
+    /// If this is a byte oriented character class, then this will be limited
+    /// to the ASCII ranges `A-Z` and `a-z`.
+    ///
+    /// # Panics
+    ///
+    /// This routine panics when the case mapping data necessary for this
+    /// routine to complete is unavailable. This occurs when the `unicode-case`
+    /// feature is not enabled and the underlying class is Unicode oriented.
+    ///
+    /// Callers should prefer using `try_case_fold_simple` instead, which will
+    /// return an error instead of panicking.
+    pub fn case_fold_simple(&mut self) {
+        match *self {
+            Class::Unicode(ref mut x) => x.case_fold_simple(),
+            Class::Bytes(ref mut x) => x.case_fold_simple(),
+        }
+    }
+
+    /// Apply Unicode simple case folding to this character class, in place.
+    /// The character class will be expanded to include all simple case folded
+    /// character variants.
+    ///
+    /// If this is a byte oriented character class, then this will be limited
+    /// to the ASCII ranges `A-Z` and `a-z`.
+    ///
+    /// # Error
+    ///
+    /// This routine returns an error when the case mapping data necessary
+    /// for this routine to complete is unavailable. This occurs when the
+    /// `unicode-case` feature is not enabled and the underlying class is
+    /// Unicode oriented.
+    pub fn try_case_fold_simple(
+        &mut self,
+    ) -> core::result::Result<(), CaseFoldError> {
+        match *self {
+            Class::Unicode(ref mut x) => x.try_case_fold_simple()?,
+            Class::Bytes(ref mut x) => x.case_fold_simple(),
+        }
+        Ok(())
+    }
+
+    /// Negate this character class in place.
+    ///
+    /// After completion, this character class will contain precisely the
+    /// characters that weren't previously in the class.
+    pub fn negate(&mut self) {
+        match *self {
+            Class::Unicode(ref mut x) => x.negate(),
+            Class::Bytes(ref mut x) => x.negate(),
+        }
+    }
+
+    /// Returns true if and only if this character class will only ever match
+    /// valid UTF-8.
+    ///
+    /// A character class can match invalid UTF-8 only when the following
+    /// conditions are met:
+    ///
+    /// 1. The translator was configured to permit generating an expression
+    ///    that can match invalid UTF-8. (By default, this is disabled.)
+    /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
+    ///    syntax or in the parser builder. By default, Unicode mode is
+    ///    enabled.
+    pub fn is_utf8(&self) -> bool {
+        match *self {
+            Class::Unicode(_) => true,
+            Class::Bytes(ref x) => x.is_ascii(),
+        }
+    }
+
+    /// Returns the length, in bytes, of the smallest string matched by this
+    /// character class.
+    ///
+    /// For non-empty byte oriented classes, this always returns `1`. For
+    /// non-empty Unicode oriented classes, this can return `1`, `2`, `3` or
+    /// `4`. For empty classes, `None` is returned. It is impossible for `0` to
+    /// be returned.
+    ///
+    /// # Example
+    ///
+    /// This example shows some examples of regexes and their corresponding
+    /// minimum length, if any.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::Properties, parse};
+    ///
+    /// // The empty string has a min length of 0.
+    /// let hir = parse(r"")?;
+    /// assert_eq!(Some(0), hir.properties().minimum_len());
+    /// // As do other types of regexes that only match the empty string.
+    /// let hir = parse(r"^$\b\B")?;
+    /// assert_eq!(Some(0), hir.properties().minimum_len());
+    /// // A regex that can match the empty string but match more is still 0.
+    /// let hir = parse(r"a*")?;
+    /// assert_eq!(Some(0), hir.properties().minimum_len());
+    /// // A regex that matches nothing has no minimum defined.
+    /// let hir = parse(r"[a&&b]")?;
+    /// assert_eq!(None, hir.properties().minimum_len());
+    /// // Character classes usually have a minimum length of 1.
+    /// let hir = parse(r"\w")?;
+    /// assert_eq!(Some(1), hir.properties().minimum_len());
+    /// // But sometimes Unicode classes might be bigger!
+    /// let hir = parse(r"\p{Cyrillic}")?;
+    /// assert_eq!(Some(2), hir.properties().minimum_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn minimum_len(&self) -> Option<usize> {
+        match *self {
+            Class::Unicode(ref x) => x.minimum_len(),
+            Class::Bytes(ref x) => x.minimum_len(),
+        }
+    }
+
+    /// Returns the length, in bytes, of the longest string matched by this
+    /// character class.
+    ///
+    /// For non-empty byte oriented classes, this always returns `1`. For
+    /// non-empty Unicode oriented classes, this can return `1`, `2`, `3` or
+    /// `4`. For empty classes, `None` is returned. It is impossible for `0` to
+    /// be returned.
+    ///
+    /// # Example
+    ///
+    /// This example shows some examples of regexes and their corresponding
+    /// maximum length, if any.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::Properties, parse};
+    ///
+    /// // The empty string has a max length of 0.
+    /// let hir = parse(r"")?;
+    /// assert_eq!(Some(0), hir.properties().maximum_len());
+    /// // As do other types of regexes that only match the empty string.
+    /// let hir = parse(r"^$\b\B")?;
+    /// assert_eq!(Some(0), hir.properties().maximum_len());
+    /// // A regex that matches nothing has no maximum defined.
+    /// let hir = parse(r"[a&&b]")?;
+    /// assert_eq!(None, hir.properties().maximum_len());
+    /// // Bounded repeats work as you expect.
+    /// let hir = parse(r"x{2,10}")?;
+    /// assert_eq!(Some(10), hir.properties().maximum_len());
+    /// // An unbounded repeat means there is no maximum.
+    /// let hir = parse(r"x{2,}")?;
+    /// assert_eq!(None, hir.properties().maximum_len());
+    /// // With Unicode enabled, \w can match up to 4 bytes!
+    /// let hir = parse(r"\w")?;
+    /// assert_eq!(Some(4), hir.properties().maximum_len());
+    /// // Without Unicode enabled, \w matches at most 1 byte.
+    /// let hir = parse(r"(?-u)\w")?;
+    /// assert_eq!(Some(1), hir.properties().maximum_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn maximum_len(&self) -> Option<usize> {
+        match *self {
+            Class::Unicode(ref x) => x.maximum_len(),
+            Class::Bytes(ref x) => x.maximum_len(),
+        }
+    }
+
+    /// Returns true if and only if this character class is empty. That is,
+    /// it has no elements.
+    ///
+    /// An empty character can never match anything, including an empty string.
+    pub fn is_empty(&self) -> bool {
+        match *self {
+            Class::Unicode(ref x) => x.ranges().is_empty(),
+            Class::Bytes(ref x) => x.ranges().is_empty(),
+        }
+    }
+
+    /// If this class consists of exactly one element (whether a codepoint or a
+    /// byte), then return it as a literal byte string.
+    ///
+    /// If this class is empty or contains more than one element, then `None`
+    /// is returned.
+    pub fn literal(&self) -> Option<Vec<u8>> {
+        match *self {
+            Class::Unicode(ref x) => x.literal(),
+            Class::Bytes(ref x) => x.literal(),
+        }
+    }
+}
+
+impl core::fmt::Debug for Class {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        use crate::debug::Byte;
+
+        let mut fmter = f.debug_set();
+        match *self {
+            Class::Unicode(ref cls) => {
+                for r in cls.ranges().iter() {
+                    fmter.entry(&(r.start..=r.end));
+                }
+            }
+            Class::Bytes(ref cls) => {
+                for r in cls.ranges().iter() {
+                    fmter.entry(&(Byte(r.start)..=Byte(r.end)));
+                }
+            }
+        }
+        fmter.finish()
+    }
+}
+
+/// A set of characters represented by Unicode scalar values.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassUnicode {
+    set: IntervalSet<ClassUnicodeRange>,
+}
+
+impl ClassUnicode {
+    /// Create a new class from a sequence of ranges.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap. Ranges will automatically be sorted into a canonical
+    /// non-overlapping order.
+    pub fn new<I>(ranges: I) -> ClassUnicode
+    where
+        I: IntoIterator<Item = ClassUnicodeRange>,
+    {
+        ClassUnicode { set: IntervalSet::new(ranges) }
+    }
+
+    /// Create a new class with no ranges.
+    ///
+    /// An empty class matches nothing. That is, it is equivalent to
+    /// [`Hir::fail`].
+    pub fn empty() -> ClassUnicode {
+        ClassUnicode::new(vec![])
+    }
+
+    /// Add a new range to this set.
+    pub fn push(&mut self, range: ClassUnicodeRange) {
+        self.set.push(range);
+    }
+
+    /// Return an iterator over all ranges in this class.
+    ///
+    /// The iterator yields ranges in ascending order.
+    pub fn iter(&self) -> ClassUnicodeIter<'_> {
+        ClassUnicodeIter(self.set.iter())
+    }
+
+    /// Return the underlying ranges as a slice.
+    pub fn ranges(&self) -> &[ClassUnicodeRange] {
+        self.set.intervals()
+    }
+
+    /// Expand this character class such that it contains all case folded
+    /// characters, according to Unicode's "simple" mapping. For example, if
+    /// this class consists of the range `a-z`, then applying case folding will
+    /// result in the class containing both the ranges `a-z` and `A-Z`.
+    ///
+    /// # Panics
+    ///
+    /// This routine panics when the case mapping data necessary for this
+    /// routine to complete is unavailable. This occurs when the `unicode-case`
+    /// feature is not enabled.
+    ///
+    /// Callers should prefer using `try_case_fold_simple` instead, which will
+    /// return an error instead of panicking.
+    pub fn case_fold_simple(&mut self) {
+        self.set
+            .case_fold_simple()
+            .expect("unicode-case feature must be enabled");
+    }
+
+    /// Expand this character class such that it contains all case folded
+    /// characters, according to Unicode's "simple" mapping. For example, if
+    /// this class consists of the range `a-z`, then applying case folding will
+    /// result in the class containing both the ranges `a-z` and `A-Z`.
+    ///
+    /// # Error
+    ///
+    /// This routine returns an error when the case mapping data necessary
+    /// for this routine to complete is unavailable. This occurs when the
+    /// `unicode-case` feature is not enabled.
+    pub fn try_case_fold_simple(
+        &mut self,
+    ) -> core::result::Result<(), CaseFoldError> {
+        self.set.case_fold_simple()
+    }
+
+    /// Negate this character class.
+    ///
+    /// For all `c` where `c` is a Unicode scalar value, if `c` was in this
+    /// set, then it will not be in this set after negation.
+    pub fn negate(&mut self) {
+        self.set.negate();
+    }
+
+    /// Union this character class with the given character class, in place.
+    pub fn union(&mut self, other: &ClassUnicode) {
+        self.set.union(&other.set);
+    }
+
+    /// Intersect this character class with the given character class, in
+    /// place.
+    pub fn intersect(&mut self, other: &ClassUnicode) {
+        self.set.intersect(&other.set);
+    }
+
+    /// Subtract the given character class from this character class, in place.
+    pub fn difference(&mut self, other: &ClassUnicode) {
+        self.set.difference(&other.set);
+    }
+
+    /// Compute the symmetric difference of the given character classes, in
+    /// place.
+    ///
+    /// This computes the symmetric difference of two character classes. This
+    /// removes all elements in this class that are also in the given class,
+    /// but all adds all elements from the given class that aren't in this
+    /// class. That is, the class will contain all elements in either class,
+    /// but will not contain any elements that are in both classes.
+    pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
+        self.set.symmetric_difference(&other.set);
+    }
+
+    /// Returns true if and only if this character class will either match
+    /// nothing or only ASCII bytes. Stated differently, this returns false
+    /// if and only if this class contains a non-ASCII codepoint.
+    pub fn is_ascii(&self) -> bool {
+        self.set.intervals().last().map_or(true, |r| r.end <= '\x7F')
+    }
+
+    /// Returns the length, in bytes, of the smallest string matched by this
+    /// character class.
+    ///
+    /// Returns `None` when the class is empty.
+    pub fn minimum_len(&self) -> Option<usize> {
+        let first = self.ranges().get(0)?;
+        // Correct because c1 < c2 implies c1.len_utf8() < c2.len_utf8().
+        Some(first.start.len_utf8())
+    }
+
+    /// Returns the length, in bytes, of the longest string matched by this
+    /// character class.
+    ///
+    /// Returns `None` when the class is empty.
+    pub fn maximum_len(&self) -> Option<usize> {
+        let last = self.ranges().last()?;
+        // Correct because c1 < c2 implies c1.len_utf8() < c2.len_utf8().
+        Some(last.end.len_utf8())
+    }
+
+    /// If this class consists of exactly one codepoint, then return it as
+    /// a literal byte string.
+    ///
+    /// If this class is empty or contains more than one codepoint, then `None`
+    /// is returned.
+    pub fn literal(&self) -> Option<Vec<u8>> {
+        let rs = self.ranges();
+        if rs.len() == 1 && rs[0].start == rs[0].end {
+            Some(rs[0].start.encode_utf8(&mut [0; 4]).to_string().into_bytes())
+        } else {
+            None
+        }
+    }
+
+    /// If this class consists of only ASCII ranges, then return its
+    /// corresponding and equivalent byte class.
+    pub fn to_byte_class(&self) -> Option<ClassBytes> {
+        if !self.is_ascii() {
+            return None;
+        }
+        Some(ClassBytes::new(self.ranges().iter().map(|r| {
+            // Since we are guaranteed that our codepoint range is ASCII, the
+            // 'u8::try_from' calls below are guaranteed to be correct.
+            ClassBytesRange {
+                start: u8::try_from(r.start).unwrap(),
+                end: u8::try_from(r.end).unwrap(),
+            }
+        })))
+    }
+}
+
+/// An iterator over all ranges in a Unicode character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>);
+
+impl<'a> Iterator for ClassUnicodeIter<'a> {
+    type Item = &'a ClassUnicodeRange;
+
+    fn next(&mut self) -> Option<&'a ClassUnicodeRange> {
+        self.0.next()
+    }
+}
+
+/// A single range of characters represented by Unicode scalar values.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassUnicodeRange {
+    start: char,
+    end: char,
+}
+
+impl core::fmt::Debug for ClassUnicodeRange {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let start = if !self.start.is_whitespace() && !self.start.is_control()
+        {
+            self.start.to_string()
+        } else {
+            format!("0x{:X}", u32::from(self.start))
+        };
+        let end = if !self.end.is_whitespace() && !self.end.is_control() {
+            self.end.to_string()
+        } else {
+            format!("0x{:X}", u32::from(self.end))
+        };
+        f.debug_struct("ClassUnicodeRange")
+            .field("start", &start)
+            .field("end", &end)
+            .finish()
+    }
+}
+
+impl Interval for ClassUnicodeRange {
+    type Bound = char;
+
+    #[inline]
+    fn lower(&self) -> char {
+        self.start
+    }
+    #[inline]
+    fn upper(&self) -> char {
+        self.end
+    }
+    #[inline]
+    fn set_lower(&mut self, bound: char) {
+        self.start = bound;
+    }
+    #[inline]
+    fn set_upper(&mut self, bound: char) {
+        self.end = bound;
+    }
+
+    /// Apply simple case folding to this Unicode scalar value range.
+    ///
+    /// Additional ranges are appended to the given vector. Canonical ordering
+    /// is *not* maintained in the given vector.
+    fn case_fold_simple(
+        &self,
+        ranges: &mut Vec<ClassUnicodeRange>,
+    ) -> Result<(), unicode::CaseFoldError> {
+        let mut folder = unicode::SimpleCaseFolder::new()?;
+        if !folder.overlaps(self.start, self.end) {
+            return Ok(());
+        }
+        let (start, end) = (u32::from(self.start), u32::from(self.end));
+        for cp in (start..=end).filter_map(char::from_u32) {
+            for &cp_folded in folder.mapping(cp) {
+                ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
+            }
+        }
+        Ok(())
+    }
+}
+
+impl ClassUnicodeRange {
+    /// Create a new Unicode scalar value range for a character class.
+    ///
+    /// The returned range is always in a canonical form. That is, the range
+    /// returned always satisfies the invariant that `start <= end`.
+    pub fn new(start: char, end: char) -> ClassUnicodeRange {
+        ClassUnicodeRange::create(start, end)
+    }
+
+    /// Return the start of this range.
+    ///
+    /// The start of a range is always less than or equal to the end of the
+    /// range.
+    pub fn start(&self) -> char {
+        self.start
+    }
+
+    /// Return the end of this range.
+    ///
+    /// The end of a range is always greater than or equal to the start of the
+    /// range.
+    pub fn end(&self) -> char {
+        self.end
+    }
+
+    /// Returns the number of codepoints in this range.
+    pub fn len(&self) -> usize {
+        let diff = 1 + u32::from(self.end) - u32::from(self.start);
+        // This is likely to panic in 16-bit targets since a usize can only fit
+        // 2^16. It's not clear what to do here, other than to return an error
+        // when building a Unicode class that contains a range whose length
+        // overflows usize. (Which, to be honest, is probably quite common on
+        // 16-bit targets. For example, this would imply that '.' and '\p{any}'
+        // would be impossible to build.)
+        usize::try_from(diff).expect("char class len fits in usize")
+    }
+}
+
+/// A set of characters represented by arbitrary bytes.
+///
+/// Each byte corresponds to one character.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassBytes {
+    set: IntervalSet<ClassBytesRange>,
+}
+
+impl ClassBytes {
+    /// Create a new class from a sequence of ranges.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap. Ranges will automatically be sorted into a canonical
+    /// non-overlapping order.
+    pub fn new<I>(ranges: I) -> ClassBytes
+    where
+        I: IntoIterator<Item = ClassBytesRange>,
+    {
+        ClassBytes { set: IntervalSet::new(ranges) }
+    }
+
+    /// Create a new class with no ranges.
+    ///
+    /// An empty class matches nothing. That is, it is equivalent to
+    /// [`Hir::fail`].
+    pub fn empty() -> ClassBytes {
+        ClassBytes::new(vec![])
+    }
+
+    /// Add a new range to this set.
+    pub fn push(&mut self, range: ClassBytesRange) {
+        self.set.push(range);
+    }
+
+    /// Return an iterator over all ranges in this class.
+    ///
+    /// The iterator yields ranges in ascending order.
+    pub fn iter(&self) -> ClassBytesIter<'_> {
+        ClassBytesIter(self.set.iter())
+    }
+
+    /// Return the underlying ranges as a slice.
+    pub fn ranges(&self) -> &[ClassBytesRange] {
+        self.set.intervals()
+    }
+
+    /// Expand this character class such that it contains all case folded
+    /// characters. For example, if this class consists of the range `a-z`,
+    /// then applying case folding will result in the class containing both the
+    /// ranges `a-z` and `A-Z`.
+    ///
+    /// Note that this only applies ASCII case folding, which is limited to the
+    /// characters `a-z` and `A-Z`.
+    pub fn case_fold_simple(&mut self) {
+        self.set.case_fold_simple().expect("ASCII case folding never fails");
+    }
+
+    /// Negate this byte class.
+    ///
+    /// For all `b` where `b` is a any byte, if `b` was in this set, then it
+    /// will not be in this set after negation.
+    pub fn negate(&mut self) {
+        self.set.negate();
+    }
+
+    /// Union this byte class with the given byte class, in place.
+    pub fn union(&mut self, other: &ClassBytes) {
+        self.set.union(&other.set);
+    }
+
+    /// Intersect this byte class with the given byte class, in place.
+    pub fn intersect(&mut self, other: &ClassBytes) {
+        self.set.intersect(&other.set);
+    }
+
+    /// Subtract the given byte class from this byte class, in place.
+    pub fn difference(&mut self, other: &ClassBytes) {
+        self.set.difference(&other.set);
+    }
+
+    /// Compute the symmetric difference of the given byte classes, in place.
+    ///
+    /// This computes the symmetric difference of two byte classes. This
+    /// removes all elements in this class that are also in the given class,
+    /// but all adds all elements from the given class that aren't in this
+    /// class. That is, the class will contain all elements in either class,
+    /// but will not contain any elements that are in both classes.
+    pub fn symmetric_difference(&mut self, other: &ClassBytes) {
+        self.set.symmetric_difference(&other.set);
+    }
+
+    /// Returns true if and only if this character class will either match
+    /// nothing or only ASCII bytes. Stated differently, this returns false
+    /// if and only if this class contains a non-ASCII byte.
+    pub fn is_ascii(&self) -> bool {
+        self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
+    }
+
+    /// Returns the length, in bytes, of the smallest string matched by this
+    /// character class.
+    ///
+    /// Returns `None` when the class is empty.
+    pub fn minimum_len(&self) -> Option<usize> {
+        if self.ranges().is_empty() {
+            None
+        } else {
+            Some(1)
+        }
+    }
+
+    /// Returns the length, in bytes, of the longest string matched by this
+    /// character class.
+    ///
+    /// Returns `None` when the class is empty.
+    pub fn maximum_len(&self) -> Option<usize> {
+        if self.ranges().is_empty() {
+            None
+        } else {
+            Some(1)
+        }
+    }
+
+    /// If this class consists of exactly one byte, then return it as
+    /// a literal byte string.
+    ///
+    /// If this class is empty or contains more than one byte, then `None`
+    /// is returned.
+    pub fn literal(&self) -> Option<Vec<u8>> {
+        let rs = self.ranges();
+        if rs.len() == 1 && rs[0].start == rs[0].end {
+            Some(vec![rs[0].start])
+        } else {
+            None
+        }
+    }
+
+    /// If this class consists of only ASCII ranges, then return its
+    /// corresponding and equivalent Unicode class.
+    pub fn to_unicode_class(&self) -> Option<ClassUnicode> {
+        if !self.is_ascii() {
+            return None;
+        }
+        Some(ClassUnicode::new(self.ranges().iter().map(|r| {
+            // Since we are guaranteed that our byte range is ASCII, the
+            // 'char::from' calls below are correct and will not erroneously
+            // convert a raw byte value into its corresponding codepoint.
+            ClassUnicodeRange {
+                start: char::from(r.start),
+                end: char::from(r.end),
+            }
+        })))
+    }
+}
+
+/// An iterator over all ranges in a byte character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
+
+impl<'a> Iterator for ClassBytesIter<'a> {
+    type Item = &'a ClassBytesRange;
+
+    fn next(&mut self) -> Option<&'a ClassBytesRange> {
+        self.0.next()
+    }
+}
+
+/// A single range of characters represented by arbitrary bytes.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassBytesRange {
+    start: u8,
+    end: u8,
+}
+
+impl Interval for ClassBytesRange {
+    type Bound = u8;
+
+    #[inline]
+    fn lower(&self) -> u8 {
+        self.start
+    }
+    #[inline]
+    fn upper(&self) -> u8 {
+        self.end
+    }
+    #[inline]
+    fn set_lower(&mut self, bound: u8) {
+        self.start = bound;
+    }
+    #[inline]
+    fn set_upper(&mut self, bound: u8) {
+        self.end = bound;
+    }
+
+    /// Apply simple case folding to this byte range. Only ASCII case mappings
+    /// (for a-z) are applied.
+    ///
+    /// Additional ranges are appended to the given vector. Canonical ordering
+    /// is *not* maintained in the given vector.
+    fn case_fold_simple(
+        &self,
+        ranges: &mut Vec<ClassBytesRange>,
+    ) -> Result<(), unicode::CaseFoldError> {
+        if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
+            let lower = cmp::max(self.start, b'a');
+            let upper = cmp::min(self.end, b'z');
+            ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
+        }
+        if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
+            let lower = cmp::max(self.start, b'A');
+            let upper = cmp::min(self.end, b'Z');
+            ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
+        }
+        Ok(())
+    }
+}
+
+impl ClassBytesRange {
+    /// Create a new byte range for a character class.
+    ///
+    /// The returned range is always in a canonical form. That is, the range
+    /// returned always satisfies the invariant that `start <= end`.
+    pub fn new(start: u8, end: u8) -> ClassBytesRange {
+        ClassBytesRange::create(start, end)
+    }
+
+    /// Return the start of this range.
+    ///
+    /// The start of a range is always less than or equal to the end of the
+    /// range.
+    pub fn start(&self) -> u8 {
+        self.start
+    }
+
+    /// Return the end of this range.
+    ///
+    /// The end of a range is always greater than or equal to the start of the
+    /// range.
+    pub fn end(&self) -> u8 {
+        self.end
+    }
+
+    /// Returns the number of bytes in this range.
+    pub fn len(&self) -> usize {
+        usize::from(self.end.checked_sub(self.start).unwrap())
+            .checked_add(1)
+            .unwrap()
+    }
+}
+
+impl core::fmt::Debug for ClassBytesRange {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("ClassBytesRange")
+            .field("start", &crate::debug::Byte(self.start))
+            .field("end", &crate::debug::Byte(self.end))
+            .finish()
+    }
+}
+
+/// The high-level intermediate representation for a look-around assertion.
+///
+/// An assertion match is always zero-length. Also called an "empty match."
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Look {
+    /// Match the beginning of text. Specifically, this matches at the starting
+    /// position of the input.
+    Start = 1 << 0,
+    /// Match the end of text. Specifically, this matches at the ending
+    /// position of the input.
+    End = 1 << 1,
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following a `\n` character.
+    StartLF = 1 << 2,
+    /// Match the end of a line or the end of text. Specifically, this matches
+    /// at the end position of the input, or at the position immediately
+    /// preceding a `\n` character.
+    EndLF = 1 << 3,
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following either a `\r` or `\n` character, but never after
+    /// a `\r` when a `\n` follows.
+    StartCRLF = 1 << 4,
+    /// Match the end of a line or the end of text. Specifically, this matches
+    /// at the end position of the input, or at the position immediately
+    /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r`
+    /// precedes it.
+    EndCRLF = 1 << 5,
+    /// Match an ASCII-only word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    WordAscii = 1 << 6,
+    /// Match an ASCII-only negation of a word boundary.
+    WordAsciiNegate = 1 << 7,
+    /// Match a Unicode-aware word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    WordUnicode = 1 << 8,
+    /// Match a Unicode-aware negation of a word boundary.
+    WordUnicodeNegate = 1 << 9,
+    /// Match the start of an ASCII-only word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStartAscii = 1 << 10,
+    /// Match the end of an ASCII-only word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEndAscii = 1 << 11,
+    /// Match the start of a Unicode word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStartUnicode = 1 << 12,
+    /// Match the end of a Unicode word boundary. That is, this matches a
+    /// position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEndUnicode = 1 << 13,
+    /// Match the start half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalfAscii = 1 << 14,
+    /// Match the end half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the end of the haystack or where the
+    /// following character is not a word character.
+    WordEndHalfAscii = 1 << 15,
+    /// Match the start half of a Unicode word boundary. That is, this matches
+    /// a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalfUnicode = 1 << 16,
+    /// Match the end half of a Unicode word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the following
+    /// character is not a word character.
+    WordEndHalfUnicode = 1 << 17,
+}
+
+impl Look {
+    /// Flip the look-around assertion to its equivalent for reverse searches.
+    /// For example, `StartLF` gets translated to `EndLF`.
+    ///
+    /// Some assertions, such as `WordUnicode`, remain the same since they
+    /// match the same positions regardless of the direction of the search.
+    #[inline]
+    pub const fn reversed(self) -> Look {
+        match self {
+            Look::Start => Look::End,
+            Look::End => Look::Start,
+            Look::StartLF => Look::EndLF,
+            Look::EndLF => Look::StartLF,
+            Look::StartCRLF => Look::EndCRLF,
+            Look::EndCRLF => Look::StartCRLF,
+            Look::WordAscii => Look::WordAscii,
+            Look::WordAsciiNegate => Look::WordAsciiNegate,
+            Look::WordUnicode => Look::WordUnicode,
+            Look::WordUnicodeNegate => Look::WordUnicodeNegate,
+            Look::WordStartAscii => Look::WordEndAscii,
+            Look::WordEndAscii => Look::WordStartAscii,
+            Look::WordStartUnicode => Look::WordEndUnicode,
+            Look::WordEndUnicode => Look::WordStartUnicode,
+            Look::WordStartHalfAscii => Look::WordEndHalfAscii,
+            Look::WordEndHalfAscii => Look::WordStartHalfAscii,
+            Look::WordStartHalfUnicode => Look::WordEndHalfUnicode,
+            Look::WordEndHalfUnicode => Look::WordStartHalfUnicode,
+        }
+    }
+
+    /// Return the underlying representation of this look-around enumeration
+    /// as an integer. Giving the return value to the [`Look::from_repr`]
+    /// constructor is guaranteed to return the same look-around variant that
+    /// one started with within a semver compatible release of this crate.
+    #[inline]
+    pub const fn as_repr(self) -> u32 {
+        // AFAIK, 'as' is the only way to zero-cost convert an int enum to an
+        // actual int.
+        self as u32
+    }
+
+    /// Given the underlying representation of a `Look` value, return the
+    /// corresponding `Look` value if the representation is valid. Otherwise
+    /// `None` is returned.
+    #[inline]
+    pub const fn from_repr(repr: u32) -> Option<Look> {
+        match repr {
+            0b00_0000_0000_0000_0001 => Some(Look::Start),
+            0b00_0000_0000_0000_0010 => Some(Look::End),
+            0b00_0000_0000_0000_0100 => Some(Look::StartLF),
+            0b00_0000_0000_0000_1000 => Some(Look::EndLF),
+            0b00_0000_0000_0001_0000 => Some(Look::StartCRLF),
+            0b00_0000_0000_0010_0000 => Some(Look::EndCRLF),
+            0b00_0000_0000_0100_0000 => Some(Look::WordAscii),
+            0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate),
+            0b00_0000_0001_0000_0000 => Some(Look::WordUnicode),
+            0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate),
+            0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii),
+            0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii),
+            0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode),
+            0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode),
+            0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii),
+            0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii),
+            0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode),
+            0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode),
+            _ => None,
+        }
+    }
+
+    /// Returns a convenient single codepoint representation of this
+    /// look-around assertion. Each assertion is guaranteed to be represented
+    /// by a distinct character.
+    ///
+    /// This is useful for succinctly representing a look-around assertion in
+    /// human friendly but succinct output intended for a programmer working on
+    /// regex internals.
+    #[inline]
+    pub const fn as_char(self) -> char {
+        match self {
+            Look::Start => 'A',
+            Look::End => 'z',
+            Look::StartLF => '^',
+            Look::EndLF => '$',
+            Look::StartCRLF => 'r',
+            Look::EndCRLF => 'R',
+            Look::WordAscii => 'b',
+            Look::WordAsciiNegate => 'B',
+            Look::WordUnicode => '𝛃',
+            Look::WordUnicodeNegate => '𝚩',
+            Look::WordStartAscii => '<',
+            Look::WordEndAscii => '>',
+            Look::WordStartUnicode => '〈',
+            Look::WordEndUnicode => '〉',
+            Look::WordStartHalfAscii => '◁',
+            Look::WordEndHalfAscii => '▷',
+            Look::WordStartHalfUnicode => '◀',
+            Look::WordEndHalfUnicode => '▶',
+        }
+    }
+}
+
+/// The high-level intermediate representation for a capturing group.
+///
+/// A capturing group always has an index and a child expression. It may
+/// also have a name associated with it (e.g., `(?P<foo>\w)`), but it's not
+/// necessary.
+///
+/// Note that there is no explicit representation of a non-capturing group
+/// in a `Hir`. Instead, non-capturing grouping is handled automatically by
+/// the recursive structure of the `Hir` itself.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Capture {
+    /// The capture index of the capture.
+    pub index: u32,
+    /// The name of the capture, if it exists.
+    pub name: Option<Box<str>>,
+    /// The expression inside the capturing group, which may be empty.
+    pub sub: Box<Hir>,
+}
+
+/// The high-level intermediate representation of a repetition operator.
+///
+/// A repetition operator permits the repetition of an arbitrary
+/// sub-expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Repetition {
+    /// The minimum range of the repetition.
+    ///
+    /// Note that special cases like `?`, `+` and `*` all get translated into
+    /// the ranges `{0,1}`, `{1,}` and `{0,}`, respectively.
+    ///
+    /// When `min` is zero, this expression can match the empty string
+    /// regardless of what its sub-expression is.
+    pub min: u32,
+    /// The maximum range of the repetition.
+    ///
+    /// Note that when `max` is `None`, `min` acts as a lower bound but where
+    /// there is no upper bound. For something like `x{5}` where the min and
+    /// max are equivalent, `min` will be set to `5` and `max` will be set to
+    /// `Some(5)`.
+    pub max: Option<u32>,
+    /// Whether this repetition operator is greedy or not. A greedy operator
+    /// will match as much as it can. A non-greedy operator will match as
+    /// little as it can.
+    ///
+    /// Typically, operators are greedy by default and are only non-greedy when
+    /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
+    /// not. However, this can be inverted via the `U` "ungreedy" flag.
+    pub greedy: bool,
+    /// The expression being repeated.
+    pub sub: Box<Hir>,
+}
+
+impl Repetition {
+    /// Returns a new repetition with the same `min`, `max` and `greedy`
+    /// values, but with its sub-expression replaced with the one given.
+    pub fn with(&self, sub: Hir) -> Repetition {
+        Repetition {
+            min: self.min,
+            max: self.max,
+            greedy: self.greedy,
+            sub: Box::new(sub),
+        }
+    }
+}
+
+/// A type describing the different flavors of `.`.
+///
+/// This type is meant to be used with [`Hir::dot`], which is a convenience
+/// routine for building HIR values derived from the `.` regex.
+#[non_exhaustive]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Dot {
+    /// Matches the UTF-8 encoding of any Unicode scalar value.
+    ///
+    /// This is equivalent to `(?su:.)` and also `\p{any}`.
+    AnyChar,
+    /// Matches any byte value.
+    ///
+    /// This is equivalent to `(?s-u:.)` and also `(?-u:[\x00-\xFF])`.
+    AnyByte,
+    /// Matches the UTF-8 encoding of any Unicode scalar value except for the
+    /// `char` given.
+    ///
+    /// This is equivalent to using `(?u-s:.)` with the line terminator set
+    /// to a particular ASCII byte. (Because of peculiarities in the regex
+    /// engines, a line terminator must be a single byte. It follows that when
+    /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar
+    /// value. That is, ti must be ASCII.)
+    ///
+    /// (This and `AnyCharExceptLF` both exist because of legacy reasons.
+    /// `AnyCharExceptLF` will be dropped in the next breaking change release.)
+    AnyCharExcept(char),
+    /// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`.
+    ///
+    /// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`.
+    AnyCharExceptLF,
+    /// Matches the UTF-8 encoding of any Unicode scalar value except for `\r`
+    /// and `\n`.
+    ///
+    /// This is equivalent to `(?uR-s:.)` and also `[\p{any}--\r\n]`.
+    AnyCharExceptCRLF,
+    /// Matches any byte value except for the `u8` given.
+    ///
+    /// This is equivalent to using `(?-us:.)` with the line terminator set
+    /// to a particular ASCII byte. (Because of peculiarities in the regex
+    /// engines, a line terminator must be a single byte. It follows that when
+    /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar
+    /// value. That is, ti must be ASCII.)
+    ///
+    /// (This and `AnyByteExceptLF` both exist because of legacy reasons.
+    /// `AnyByteExceptLF` will be dropped in the next breaking change release.)
+    AnyByteExcept(u8),
+    /// Matches any byte value except for `\n`.
+    ///
+    /// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`.
+    AnyByteExceptLF,
+    /// Matches any byte value except for `\r` and `\n`.
+    ///
+    /// This is equivalent to `(?R-su:.)` and also `(?-u:[[\x00-\xFF]--\r\n])`.
+    AnyByteExceptCRLF,
+}
+
+/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
+/// space but heap space proportional to the depth of the total `Hir`.
+impl Drop for Hir {
+    fn drop(&mut self) {
+        use core::mem;
+
+        match *self.kind() {
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Look(_) => return,
+            HirKind::Capture(ref x) if x.sub.kind.subs().is_empty() => return,
+            HirKind::Repetition(ref x) if x.sub.kind.subs().is_empty() => {
+                return
+            }
+            HirKind::Concat(ref x) if x.is_empty() => return,
+            HirKind::Alternation(ref x) if x.is_empty() => return,
+            _ => {}
+        }
+
+        let mut stack = vec![mem::replace(self, Hir::empty())];
+        while let Some(mut expr) = stack.pop() {
+            match expr.kind {
+                HirKind::Empty
+                | HirKind::Literal(_)
+                | HirKind::Class(_)
+                | HirKind::Look(_) => {}
+                HirKind::Capture(ref mut x) => {
+                    stack.push(mem::replace(&mut x.sub, Hir::empty()));
+                }
+                HirKind::Repetition(ref mut x) => {
+                    stack.push(mem::replace(&mut x.sub, Hir::empty()));
+                }
+                HirKind::Concat(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+                HirKind::Alternation(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+            }
+        }
+    }
+}
+
+/// A type that collects various properties of an HIR value.
+///
+/// Properties are always scalar values and represent meta data that is
+/// computed inductively on an HIR value. Properties are defined for all
+/// HIR values.
+///
+/// All methods on a `Properties` value take constant time and are meant to
+/// be cheap to call.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Properties(Box<PropertiesI>);
+
+/// The property definition. It is split out so that we can box it, and
+/// there by make `Properties` use less stack size. This is kind-of important
+/// because every HIR value has a `Properties` attached to it.
+///
+/// This does have the unfortunate consequence that creating any HIR value
+/// always leads to at least one alloc for properties, but this is generally
+/// true anyway (for pretty much all HirKinds except for look-arounds).
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct PropertiesI {
+    minimum_len: Option<usize>,
+    maximum_len: Option<usize>,
+    look_set: LookSet,
+    look_set_prefix: LookSet,
+    look_set_suffix: LookSet,
+    look_set_prefix_any: LookSet,
+    look_set_suffix_any: LookSet,
+    utf8: bool,
+    explicit_captures_len: usize,
+    static_explicit_captures_len: Option<usize>,
+    literal: bool,
+    alternation_literal: bool,
+}
+
+impl Properties {
+    /// Returns the length (in bytes) of the smallest string matched by this
+    /// HIR.
+    ///
+    /// A return value of `0` is possible and occurs when the HIR can match an
+    /// empty string.
+    ///
+    /// `None` is returned when there is no minimum length. This occurs in
+    /// precisely the cases where the HIR matches nothing. i.e., The language
+    /// the regex matches is empty. An example of such a regex is `\P{any}`.
+    #[inline]
+    pub fn minimum_len(&self) -> Option<usize> {
+        self.0.minimum_len
+    }
+
+    /// Returns the length (in bytes) of the longest string matched by this
+    /// HIR.
+    ///
+    /// A return value of `0` is possible and occurs when nothing longer than
+    /// the empty string is in the language described by this HIR.
+    ///
+    /// `None` is returned when there is no longest matching string. This
+    /// occurs when the HIR matches nothing or when there is no upper bound on
+    /// the length of matching strings. Example of such regexes are `\P{any}`
+    /// (matches nothing) and `a+` (has no upper bound).
+    #[inline]
+    pub fn maximum_len(&self) -> Option<usize> {
+        self.0.maximum_len
+    }
+
+    /// Returns a set of all look-around assertions that appear at least once
+    /// in this HIR value.
+    #[inline]
+    pub fn look_set(&self) -> LookSet {
+        self.0.look_set
+    }
+
+    /// Returns a set of all look-around assertions that appear as a prefix for
+    /// this HIR value. That is, the set returned corresponds to the set of
+    /// assertions that must be passed before matching any bytes in a haystack.
+    ///
+    /// For example, `hir.look_set_prefix().contains(Look::Start)` returns true
+    /// if and only if the HIR is fully anchored at the start.
+    #[inline]
+    pub fn look_set_prefix(&self) -> LookSet {
+        self.0.look_set_prefix
+    }
+
+    /// Returns a set of all look-around assertions that appear as a _possible_
+    /// prefix for this HIR value. That is, the set returned corresponds to the
+    /// set of assertions that _may_ be passed before matching any bytes in a
+    /// haystack.
+    ///
+    /// For example, `hir.look_set_prefix_any().contains(Look::Start)` returns
+    /// true if and only if it's possible for the regex to match through a
+    /// anchored assertion before consuming any input.
+    #[inline]
+    pub fn look_set_prefix_any(&self) -> LookSet {
+        self.0.look_set_prefix_any
+    }
+
+    /// Returns a set of all look-around assertions that appear as a suffix for
+    /// this HIR value. That is, the set returned corresponds to the set of
+    /// assertions that must be passed in order to be considered a match after
+    /// all other consuming HIR expressions.
+    ///
+    /// For example, `hir.look_set_suffix().contains(Look::End)` returns true
+    /// if and only if the HIR is fully anchored at the end.
+    #[inline]
+    pub fn look_set_suffix(&self) -> LookSet {
+        self.0.look_set_suffix
+    }
+
+    /// Returns a set of all look-around assertions that appear as a _possible_
+    /// suffix for this HIR value. That is, the set returned corresponds to the
+    /// set of assertions that _may_ be passed before matching any bytes in a
+    /// haystack.
+    ///
+    /// For example, `hir.look_set_suffix_any().contains(Look::End)` returns
+    /// true if and only if it's possible for the regex to match through a
+    /// anchored assertion at the end of a match without consuming any input.
+    #[inline]
+    pub fn look_set_suffix_any(&self) -> LookSet {
+        self.0.look_set_suffix_any
+    }
+
+    /// Return true if and only if the corresponding HIR will always match
+    /// valid UTF-8.
+    ///
+    /// When this returns false, then it is possible for this HIR expression to
+    /// match invalid UTF-8, including by matching between the code units of
+    /// a single UTF-8 encoded codepoint.
+    ///
+    /// Note that this returns true even when the corresponding HIR can match
+    /// the empty string. Since an empty string can technically appear between
+    /// UTF-8 code units, it is possible for a match to be reported that splits
+    /// a codepoint which could in turn be considered matching invalid UTF-8.
+    /// However, it is generally assumed that such empty matches are handled
+    /// specially by the search routine if it is absolutely required that
+    /// matches not split a codepoint.
+    ///
+    /// # Example
+    ///
+    /// This code example shows the UTF-8 property of a variety of patterns.
+    ///
+    /// ```
+    /// use regex_syntax::{ParserBuilder, parse};
+    ///
+    /// // Examples of 'is_utf8() == true'.
+    /// assert!(parse(r"a")?.properties().is_utf8());
+    /// assert!(parse(r"[^a]")?.properties().is_utf8());
+    /// assert!(parse(r".")?.properties().is_utf8());
+    /// assert!(parse(r"\W")?.properties().is_utf8());
+    /// assert!(parse(r"\b")?.properties().is_utf8());
+    /// assert!(parse(r"\B")?.properties().is_utf8());
+    /// assert!(parse(r"(?-u)\b")?.properties().is_utf8());
+    /// assert!(parse(r"(?-u)\B")?.properties().is_utf8());
+    /// // Unicode mode is enabled by default, and in
+    /// // that mode, all \x hex escapes are treated as
+    /// // codepoints. So this actually matches the UTF-8
+    /// // encoding of U+00FF.
+    /// assert!(parse(r"\xFF")?.properties().is_utf8());
+    ///
+    /// // Now we show examples of 'is_utf8() == false'.
+    /// // The only way to do this is to force the parser
+    /// // to permit invalid UTF-8, otherwise all of these
+    /// // would fail to parse!
+    /// let parse = |pattern| {
+    ///     ParserBuilder::new().utf8(false).build().parse(pattern)
+    /// };
+    /// assert!(!parse(r"(?-u)[^a]")?.properties().is_utf8());
+    /// assert!(!parse(r"(?-u).")?.properties().is_utf8());
+    /// assert!(!parse(r"(?-u)\W")?.properties().is_utf8());
+    /// // Conversely to the equivalent example above,
+    /// // when Unicode mode is disabled, \x hex escapes
+    /// // are treated as their raw byte values.
+    /// assert!(!parse(r"(?-u)\xFF")?.properties().is_utf8());
+    /// // Note that just because we disabled UTF-8 in the
+    /// // parser doesn't mean we still can't use Unicode.
+    /// // It is enabled by default, so \xFF is still
+    /// // equivalent to matching the UTF-8 encoding of
+    /// // U+00FF by default.
+    /// assert!(parse(r"\xFF")?.properties().is_utf8());
+    /// // Even though we use raw bytes that individually
+    /// // are not valid UTF-8, when combined together, the
+    /// // overall expression *does* match valid UTF-8!
+    /// assert!(parse(r"(?-u)\xE2\x98\x83")?.properties().is_utf8());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn is_utf8(&self) -> bool {
+        self.0.utf8
+    }
+
+    /// Returns the total number of explicit capturing groups in the
+    /// corresponding HIR.
+    ///
+    /// Note that this does not include the implicit capturing group
+    /// corresponding to the entire match that is typically included by regex
+    /// engines.
+    ///
+    /// # Example
+    ///
+    /// This method will return `0` for `a` and `1` for `(a)`:
+    ///
+    /// ```
+    /// use regex_syntax::parse;
+    ///
+    /// assert_eq!(0, parse("a")?.properties().explicit_captures_len());
+    /// assert_eq!(1, parse("(a)")?.properties().explicit_captures_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn explicit_captures_len(&self) -> usize {
+        self.0.explicit_captures_len
+    }
+
+    /// Returns the total number of explicit capturing groups that appear in
+    /// every possible match.
+    ///
+    /// If the number of capture groups can vary depending on the match, then
+    /// this returns `None`. That is, a value is only returned when the number
+    /// of matching groups is invariant or "static."
+    ///
+    /// Note that this does not include the implicit capturing group
+    /// corresponding to the entire match.
+    ///
+    /// # Example
+    ///
+    /// This shows a few cases where a static number of capture groups is
+    /// available and a few cases where it is not.
+    ///
+    /// ```
+    /// use regex_syntax::parse;
+    ///
+    /// let len = |pattern| {
+    ///     parse(pattern).map(|h| {
+    ///         h.properties().static_explicit_captures_len()
+    ///     })
+    /// };
+    ///
+    /// assert_eq!(Some(0), len("a")?);
+    /// assert_eq!(Some(1), len("(a)")?);
+    /// assert_eq!(Some(1), len("(a)|(b)")?);
+    /// assert_eq!(Some(2), len("(a)(b)|(c)(d)")?);
+    /// assert_eq!(None, len("(a)|b")?);
+    /// assert_eq!(None, len("a|(b)")?);
+    /// assert_eq!(None, len("(b)*")?);
+    /// assert_eq!(Some(1), len("(b)+")?);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn static_explicit_captures_len(&self) -> Option<usize> {
+        self.0.static_explicit_captures_len
+    }
+
+    /// Return true if and only if this HIR is a simple literal. This is
+    /// only true when this HIR expression is either itself a `Literal` or a
+    /// concatenation of only `Literal`s.
+    ///
+    /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()` and
+    /// the empty string are not (even though they contain sub-expressions that
+    /// are literals).
+    #[inline]
+    pub fn is_literal(&self) -> bool {
+        self.0.literal
+    }
+
+    /// Return true if and only if this HIR is either a simple literal or an
+    /// alternation of simple literals. This is only
+    /// true when this HIR expression is either itself a `Literal` or a
+    /// concatenation of only `Literal`s or an alternation of only `Literal`s.
+    ///
+    /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternation
+    /// literals, but `f+`, `(foo)`, `foo()`, and the empty pattern are not
+    /// (even though that contain sub-expressions that are literals).
+    #[inline]
+    pub fn is_alternation_literal(&self) -> bool {
+        self.0.alternation_literal
+    }
+
+    /// Returns the total amount of heap memory usage, in bytes, used by this
+    /// `Properties` value.
+    #[inline]
+    pub fn memory_usage(&self) -> usize {
+        core::mem::size_of::<PropertiesI>()
+    }
+
+    /// Returns a new set of properties that corresponds to the union of the
+    /// iterator of properties given.
+    ///
+    /// This is useful when one has multiple `Hir` expressions and wants
+    /// to combine them into a single alternation without constructing the
+    /// corresponding `Hir`. This routine provides a way of combining the
+    /// properties of each `Hir` expression into one set of properties
+    /// representing the union of those expressions.
+    ///
+    /// # Example: union with HIRs that never match
+    ///
+    /// This example shows that unioning properties together with one that
+    /// represents a regex that never matches will "poison" certain attributes,
+    /// like the minimum and maximum lengths.
+    ///
+    /// ```
+    /// use regex_syntax::{hir::Properties, parse};
+    ///
+    /// let hir1 = parse("ab?c?")?;
+    /// assert_eq!(Some(1), hir1.properties().minimum_len());
+    /// assert_eq!(Some(3), hir1.properties().maximum_len());
+    ///
+    /// let hir2 = parse(r"[a&&b]")?;
+    /// assert_eq!(None, hir2.properties().minimum_len());
+    /// assert_eq!(None, hir2.properties().maximum_len());
+    ///
+    /// let hir3 = parse(r"wxy?z?")?;
+    /// assert_eq!(Some(2), hir3.properties().minimum_len());
+    /// assert_eq!(Some(4), hir3.properties().maximum_len());
+    ///
+    /// let unioned = Properties::union([
+    ///		hir1.properties(),
+    ///		hir2.properties(),
+    ///		hir3.properties(),
+    ///	]);
+    /// assert_eq!(None, unioned.minimum_len());
+    /// assert_eq!(None, unioned.maximum_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// The maximum length can also be "poisoned" by a pattern that has no
+    /// upper bound on the length of a match. The minimum length remains
+    /// unaffected:
+    ///
+    /// ```
+    /// use regex_syntax::{hir::Properties, parse};
+    ///
+    /// let hir1 = parse("ab?c?")?;
+    /// assert_eq!(Some(1), hir1.properties().minimum_len());
+    /// assert_eq!(Some(3), hir1.properties().maximum_len());
+    ///
+    /// let hir2 = parse(r"a+")?;
+    /// assert_eq!(Some(1), hir2.properties().minimum_len());
+    /// assert_eq!(None, hir2.properties().maximum_len());
+    ///
+    /// let hir3 = parse(r"wxy?z?")?;
+    /// assert_eq!(Some(2), hir3.properties().minimum_len());
+    /// assert_eq!(Some(4), hir3.properties().maximum_len());
+    ///
+    /// let unioned = Properties::union([
+    ///		hir1.properties(),
+    ///		hir2.properties(),
+    ///		hir3.properties(),
+    ///	]);
+    /// assert_eq!(Some(1), unioned.minimum_len());
+    /// assert_eq!(None, unioned.maximum_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn union<I, P>(props: I) -> Properties
+    where
+        I: IntoIterator<Item = P>,
+        P: core::borrow::Borrow<Properties>,
+    {
+        let mut it = props.into_iter().peekable();
+        // While empty alternations aren't possible, we still behave as if they
+        // are. When we have an empty alternate, then clearly the look-around
+        // prefix and suffix is empty. Otherwise, it is the intersection of all
+        // prefixes and suffixes (respectively) of the branches.
+        let fix = if it.peek().is_none() {
+            LookSet::empty()
+        } else {
+            LookSet::full()
+        };
+        // And also, an empty alternate means we have 0 static capture groups,
+        // but we otherwise start with the number corresponding to the first
+        // alternate. If any subsequent alternate has a different number of
+        // static capture groups, then we overall have a variation and not a
+        // static number of groups.
+        let static_explicit_captures_len =
+            it.peek().and_then(|p| p.borrow().static_explicit_captures_len());
+        // The base case is an empty alternation, which matches nothing.
+        // Note though that empty alternations aren't possible, because the
+        // Hir::alternation smart constructor rewrites those as empty character
+        // classes.
+        let mut props = PropertiesI {
+            minimum_len: None,
+            maximum_len: None,
+            look_set: LookSet::empty(),
+            look_set_prefix: fix,
+            look_set_suffix: fix,
+            look_set_prefix_any: LookSet::empty(),
+            look_set_suffix_any: LookSet::empty(),
+            utf8: true,
+            explicit_captures_len: 0,
+            static_explicit_captures_len,
+            literal: false,
+            alternation_literal: true,
+        };
+        let (mut min_poisoned, mut max_poisoned) = (false, false);
+        // Handle properties that need to visit every child hir.
+        for prop in it {
+            let p = prop.borrow();
+            props.look_set.set_union(p.look_set());
+            props.look_set_prefix.set_intersect(p.look_set_prefix());
+            props.look_set_suffix.set_intersect(p.look_set_suffix());
+            props.look_set_prefix_any.set_union(p.look_set_prefix_any());
+            props.look_set_suffix_any.set_union(p.look_set_suffix_any());
+            props.utf8 = props.utf8 && p.is_utf8();
+            props.explicit_captures_len = props
+                .explicit_captures_len
+                .saturating_add(p.explicit_captures_len());
+            if props.static_explicit_captures_len
+                != p.static_explicit_captures_len()
+            {
+                props.static_explicit_captures_len = None;
+            }
+            props.alternation_literal =
+                props.alternation_literal && p.is_literal();
+            if !min_poisoned {
+                if let Some(xmin) = p.minimum_len() {
+                    if props.minimum_len.map_or(true, |pmin| xmin < pmin) {
+                        props.minimum_len = Some(xmin);
+                    }
+                } else {
+                    props.minimum_len = None;
+                    min_poisoned = true;
+                }
+            }
+            if !max_poisoned {
+                if let Some(xmax) = p.maximum_len() {
+                    if props.maximum_len.map_or(true, |pmax| xmax > pmax) {
+                        props.maximum_len = Some(xmax);
+                    }
+                } else {
+                    props.maximum_len = None;
+                    max_poisoned = true;
+                }
+            }
+        }
+        Properties(Box::new(props))
+    }
+}
+
+impl Properties {
+    /// Create a new set of HIR properties for an empty regex.
+    fn empty() -> Properties {
+        let inner = PropertiesI {
+            minimum_len: Some(0),
+            maximum_len: Some(0),
+            look_set: LookSet::empty(),
+            look_set_prefix: LookSet::empty(),
+            look_set_suffix: LookSet::empty(),
+            look_set_prefix_any: LookSet::empty(),
+            look_set_suffix_any: LookSet::empty(),
+            // It is debatable whether an empty regex always matches at valid
+            // UTF-8 boundaries. Strictly speaking, at a byte oriented view,
+            // it is clearly false. There are, for example, many empty strings
+            // between the bytes encoding a '☃'.
+            //
+            // However, when Unicode mode is enabled, the fundamental atom
+            // of matching is really a codepoint. And in that scenario, an
+            // empty regex is defined to only match at valid UTF-8 boundaries
+            // and to never split a codepoint. It just so happens that this
+            // enforcement is somewhat tricky to do for regexes that match
+            // the empty string inside regex engines themselves. It usually
+            // requires some layer above the regex engine to filter out such
+            // matches.
+            //
+            // In any case, 'true' is really the only coherent option. If it
+            // were false, for example, then 'a*' would also need to be false
+            // since it too can match the empty string.
+            utf8: true,
+            explicit_captures_len: 0,
+            static_explicit_captures_len: Some(0),
+            literal: false,
+            alternation_literal: false,
+        };
+        Properties(Box::new(inner))
+    }
+
+    /// Create a new set of HIR properties for a literal regex.
+    fn literal(lit: &Literal) -> Properties {
+        let inner = PropertiesI {
+            minimum_len: Some(lit.0.len()),
+            maximum_len: Some(lit.0.len()),
+            look_set: LookSet::empty(),
+            look_set_prefix: LookSet::empty(),
+            look_set_suffix: LookSet::empty(),
+            look_set_prefix_any: LookSet::empty(),
+            look_set_suffix_any: LookSet::empty(),
+            utf8: core::str::from_utf8(&lit.0).is_ok(),
+            explicit_captures_len: 0,
+            static_explicit_captures_len: Some(0),
+            literal: true,
+            alternation_literal: true,
+        };
+        Properties(Box::new(inner))
+    }
+
+    /// Create a new set of HIR properties for a character class.
+    fn class(class: &Class) -> Properties {
+        let inner = PropertiesI {
+            minimum_len: class.minimum_len(),
+            maximum_len: class.maximum_len(),
+            look_set: LookSet::empty(),
+            look_set_prefix: LookSet::empty(),
+            look_set_suffix: LookSet::empty(),
+            look_set_prefix_any: LookSet::empty(),
+            look_set_suffix_any: LookSet::empty(),
+            utf8: class.is_utf8(),
+            explicit_captures_len: 0,
+            static_explicit_captures_len: Some(0),
+            literal: false,
+            alternation_literal: false,
+        };
+        Properties(Box::new(inner))
+    }
+
+    /// Create a new set of HIR properties for a look-around assertion.
+    fn look(look: Look) -> Properties {
+        let inner = PropertiesI {
+            minimum_len: Some(0),
+            maximum_len: Some(0),
+            look_set: LookSet::singleton(look),
+            look_set_prefix: LookSet::singleton(look),
+            look_set_suffix: LookSet::singleton(look),
+            look_set_prefix_any: LookSet::singleton(look),
+            look_set_suffix_any: LookSet::singleton(look),
+            // This requires a little explanation. Basically, we don't consider
+            // matching an empty string to be equivalent to matching invalid
+            // UTF-8, even though technically matching every empty string will
+            // split the UTF-8 encoding of a single codepoint when treating a
+            // UTF-8 encoded string as a sequence of bytes. Our defense here is
+            // that in such a case, a codepoint should logically be treated as
+            // the fundamental atom for matching, and thus the only valid match
+            // points are between codepoints and not bytes.
+            //
+            // More practically, this is true here because it's also true
+            // for 'Hir::empty()', otherwise something like 'a*' would be
+            // considered to match invalid UTF-8. That in turn makes this
+            // property borderline useless.
+            utf8: true,
+            explicit_captures_len: 0,
+            static_explicit_captures_len: Some(0),
+            literal: false,
+            alternation_literal: false,
+        };
+        Properties(Box::new(inner))
+    }
+
+    /// Create a new set of HIR properties for a repetition.
+    fn repetition(rep: &Repetition) -> Properties {
+        let p = rep.sub.properties();
+        let minimum_len = p.minimum_len().map(|child_min| {
+            let rep_min = usize::try_from(rep.min).unwrap_or(usize::MAX);
+            child_min.saturating_mul(rep_min)
+        });
+        let maximum_len = rep.max.and_then(|rep_max| {
+            let rep_max = usize::try_from(rep_max).ok()?;
+            let child_max = p.maximum_len()?;
+            child_max.checked_mul(rep_max)
+        });
+
+        let mut inner = PropertiesI {
+            minimum_len,
+            maximum_len,
+            look_set: p.look_set(),
+            look_set_prefix: LookSet::empty(),
+            look_set_suffix: LookSet::empty(),
+            look_set_prefix_any: p.look_set_prefix_any(),
+            look_set_suffix_any: p.look_set_suffix_any(),
+            utf8: p.is_utf8(),
+            explicit_captures_len: p.explicit_captures_len(),
+            static_explicit_captures_len: p.static_explicit_captures_len(),
+            literal: false,
+            alternation_literal: false,
+        };
+        // If the repetition operator can match the empty string, then its
+        // lookset prefix and suffixes themselves remain empty since they are
+        // no longer required to match.
+        if rep.min > 0 {
+            inner.look_set_prefix = p.look_set_prefix();
+            inner.look_set_suffix = p.look_set_suffix();
+        }
+        // If the static captures len of the sub-expression is not known or
+        // is greater than zero, then it automatically propagates to the
+        // repetition, regardless of the repetition. Otherwise, it might
+        // change, but only when the repetition can match 0 times.
+        if rep.min == 0
+            && inner.static_explicit_captures_len.map_or(false, |len| len > 0)
+        {
+            // If we require a match 0 times, then our captures len is
+            // guaranteed to be zero. Otherwise, if we *can* match the empty
+            // string, then it's impossible to know how many captures will be
+            // in the resulting match.
+            if rep.max == Some(0) {
+                inner.static_explicit_captures_len = Some(0);
+            } else {
+                inner.static_explicit_captures_len = None;
+            }
+        }
+        Properties(Box::new(inner))
+    }
+
+    /// Create a new set of HIR properties for a capture.
+    fn capture(capture: &Capture) -> Properties {
+        let p = capture.sub.properties();
+        Properties(Box::new(PropertiesI {
+            explicit_captures_len: p.explicit_captures_len().saturating_add(1),
+            static_explicit_captures_len: p
+                .static_explicit_captures_len()
+                .map(|len| len.saturating_add(1)),
+            literal: false,
+            alternation_literal: false,
+            ..*p.0.clone()
+        }))
+    }
+
+    /// Create a new set of HIR properties for a concatenation.
+    fn concat(concat: &[Hir]) -> Properties {
+        // The base case is an empty concatenation, which matches the empty
+        // string. Note though that empty concatenations aren't possible,
+        // because the Hir::concat smart constructor rewrites those as
+        // Hir::empty.
+        let mut props = PropertiesI {
+            minimum_len: Some(0),
+            maximum_len: Some(0),
+            look_set: LookSet::empty(),
+            look_set_prefix: LookSet::empty(),
+            look_set_suffix: LookSet::empty(),
+            look_set_prefix_any: LookSet::empty(),
+            look_set_suffix_any: LookSet::empty(),
+            utf8: true,
+            explicit_captures_len: 0,
+            static_explicit_captures_len: Some(0),
+            literal: true,
+            alternation_literal: true,
+        };
+        // Handle properties that need to visit every child hir.
+        for x in concat.iter() {
+            let p = x.properties();
+            props.look_set.set_union(p.look_set());
+            props.utf8 = props.utf8 && p.is_utf8();
+            props.explicit_captures_len = props
+                .explicit_captures_len
+                .saturating_add(p.explicit_captures_len());
+            props.static_explicit_captures_len = p
+                .static_explicit_captures_len()
+                .and_then(|len1| {
+                    Some((len1, props.static_explicit_captures_len?))
+                })
+                .and_then(|(len1, len2)| Some(len1.saturating_add(len2)));
+            props.literal = props.literal && p.is_literal();
+            props.alternation_literal =
+                props.alternation_literal && p.is_alternation_literal();
+            if let Some(minimum_len) = props.minimum_len {
+                match p.minimum_len() {
+                    None => props.minimum_len = None,
+                    Some(len) => {
+                        // We use saturating arithmetic here because the
+                        // minimum is just a lower bound. We can't go any
+                        // higher than what our number types permit.
+                        props.minimum_len =
+                            Some(minimum_len.saturating_add(len));
+                    }
+                }
+            }
+            if let Some(maximum_len) = props.maximum_len {
+                match p.maximum_len() {
+                    None => props.maximum_len = None,
+                    Some(len) => {
+                        props.maximum_len = maximum_len.checked_add(len)
+                    }
+                }
+            }
+        }
+        // Handle the prefix properties, which only requires visiting
+        // child exprs until one matches more than the empty string.
+        let mut it = concat.iter();
+        while let Some(x) = it.next() {
+            props.look_set_prefix.set_union(x.properties().look_set_prefix());
+            props
+                .look_set_prefix_any
+                .set_union(x.properties().look_set_prefix_any());
+            if x.properties().maximum_len().map_or(true, |x| x > 0) {
+                break;
+            }
+        }
+        // Same thing for the suffix properties, but in reverse.
+        let mut it = concat.iter().rev();
+        while let Some(x) = it.next() {
+            props.look_set_suffix.set_union(x.properties().look_set_suffix());
+            props
+                .look_set_suffix_any
+                .set_union(x.properties().look_set_suffix_any());
+            if x.properties().maximum_len().map_or(true, |x| x > 0) {
+                break;
+            }
+        }
+        Properties(Box::new(props))
+    }
+
+    /// Create a new set of HIR properties for a concatenation.
+    fn alternation(alts: &[Hir]) -> Properties {
+        Properties::union(alts.iter().map(|hir| hir.properties()))
+    }
+}
+
+/// A set of look-around assertions.
+///
+/// This is useful for efficiently tracking look-around assertions. For
+/// example, an [`Hir`] provides properties that return `LookSet`s.
+#[derive(Clone, Copy, Default, Eq, PartialEq)]
+pub struct LookSet {
+    /// The underlying representation this set is exposed to make it possible
+    /// to store it somewhere efficiently. The representation is that
+    /// of a bitset, where each assertion occupies bit `i` where `i =
+    /// Look::as_repr()`.
+    ///
+    /// Note that users of this internal representation must permit the full
+    /// range of `u16` values to be represented. For example, even if the
+    /// current implementation only makes use of the 10 least significant bits,
+    /// it may use more bits in a future semver compatible release.
+    pub bits: u32,
+}
+
+impl LookSet {
+    /// Create an empty set of look-around assertions.
+    #[inline]
+    pub fn empty() -> LookSet {
+        LookSet { bits: 0 }
+    }
+
+    /// Create a full set of look-around assertions.
+    ///
+    /// This set contains all possible look-around assertions.
+    #[inline]
+    pub fn full() -> LookSet {
+        LookSet { bits: !0 }
+    }
+
+    /// Create a look-around set containing the look-around assertion given.
+    ///
+    /// This is a convenience routine for creating an empty set and inserting
+    /// one look-around assertions.
+    #[inline]
+    pub fn singleton(look: Look) -> LookSet {
+        LookSet::empty().insert(look)
+    }
+
+    /// Returns the total number of look-around assertions in this set.
+    #[inline]
+    pub fn len(self) -> usize {
+        // OK because max value always fits in a u8, which in turn always
+        // fits in a usize, regardless of target.
+        usize::try_from(self.bits.count_ones()).unwrap()
+    }
+
+    /// Returns true if and only if this set is empty.
+    #[inline]
+    pub fn is_empty(self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns true if and only if the given look-around assertion is in this
+    /// set.
+    #[inline]
+    pub fn contains(self, look: Look) -> bool {
+        self.bits & look.as_repr() != 0
+    }
+
+    /// Returns true if and only if this set contains any anchor assertions.
+    /// This includes both "start/end of haystack" and "start/end of line."
+    #[inline]
+    pub fn contains_anchor(&self) -> bool {
+        self.contains_anchor_haystack() || self.contains_anchor_line()
+    }
+
+    /// Returns true if and only if this set contains any "start/end of
+    /// haystack" anchors. This doesn't include "start/end of line" anchors.
+    #[inline]
+    pub fn contains_anchor_haystack(&self) -> bool {
+        self.contains(Look::Start) || self.contains(Look::End)
+    }
+
+    /// Returns true if and only if this set contains any "start/end of line"
+    /// anchors. This doesn't include "start/end of haystack" anchors. This
+    /// includes both `\n` line anchors and CRLF (`\r\n`) aware line anchors.
+    #[inline]
+    pub fn contains_anchor_line(&self) -> bool {
+        self.contains(Look::StartLF)
+            || self.contains(Look::EndLF)
+            || self.contains(Look::StartCRLF)
+            || self.contains(Look::EndCRLF)
+    }
+
+    /// Returns true if and only if this set contains any "start/end of line"
+    /// anchors that only treat `\n` as line terminators. This does not include
+    /// haystack anchors or CRLF aware line anchors.
+    #[inline]
+    pub fn contains_anchor_lf(&self) -> bool {
+        self.contains(Look::StartLF) || self.contains(Look::EndLF)
+    }
+
+    /// Returns true if and only if this set contains any "start/end of line"
+    /// anchors that are CRLF-aware. This doesn't include "start/end of
+    /// haystack" or "start/end of line-feed" anchors.
+    #[inline]
+    pub fn contains_anchor_crlf(&self) -> bool {
+        self.contains(Look::StartCRLF) || self.contains(Look::EndCRLF)
+    }
+
+    /// Returns true if and only if this set contains any word boundary or
+    /// negated word boundary assertions. This include both Unicode and ASCII
+    /// word boundaries.
+    #[inline]
+    pub fn contains_word(self) -> bool {
+        self.contains_word_unicode() || self.contains_word_ascii()
+    }
+
+    /// Returns true if and only if this set contains any Unicode word boundary
+    /// or negated Unicode word boundary assertions.
+    #[inline]
+    pub fn contains_word_unicode(self) -> bool {
+        self.contains(Look::WordUnicode)
+            || self.contains(Look::WordUnicodeNegate)
+            || self.contains(Look::WordStartUnicode)
+            || self.contains(Look::WordEndUnicode)
+            || self.contains(Look::WordStartHalfUnicode)
+            || self.contains(Look::WordEndHalfUnicode)
+    }
+
+    /// Returns true if and only if this set contains any ASCII word boundary
+    /// or negated ASCII word boundary assertions.
+    #[inline]
+    pub fn contains_word_ascii(self) -> bool {
+        self.contains(Look::WordAscii)
+            || self.contains(Look::WordAsciiNegate)
+            || self.contains(Look::WordStartAscii)
+            || self.contains(Look::WordEndAscii)
+            || self.contains(Look::WordStartHalfAscii)
+            || self.contains(Look::WordEndHalfAscii)
+    }
+
+    /// Returns an iterator over all of the look-around assertions in this set.
+    #[inline]
+    pub fn iter(self) -> LookSetIter {
+        LookSetIter { set: self }
+    }
+
+    /// Return a new set that is equivalent to the original, but with the given
+    /// assertion added to it. If the assertion is already in the set, then the
+    /// returned set is equivalent to the original.
+    #[inline]
+    pub fn insert(self, look: Look) -> LookSet {
+        LookSet { bits: self.bits | look.as_repr() }
+    }
+
+    /// Updates this set in place with the result of inserting the given
+    /// assertion into this set.
+    #[inline]
+    pub fn set_insert(&mut self, look: Look) {
+        *self = self.insert(look);
+    }
+
+    /// Return a new set that is equivalent to the original, but with the given
+    /// assertion removed from it. If the assertion is not in the set, then the
+    /// returned set is equivalent to the original.
+    #[inline]
+    pub fn remove(self, look: Look) -> LookSet {
+        LookSet { bits: self.bits & !look.as_repr() }
+    }
+
+    /// Updates this set in place with the result of removing the given
+    /// assertion from this set.
+    #[inline]
+    pub fn set_remove(&mut self, look: Look) {
+        *self = self.remove(look);
+    }
+
+    /// Returns a new set that is the result of subtracting the given set from
+    /// this set.
+    #[inline]
+    pub fn subtract(self, other: LookSet) -> LookSet {
+        LookSet { bits: self.bits & !other.bits }
+    }
+
+    /// Updates this set in place with the result of subtracting the given set
+    /// from this set.
+    #[inline]
+    pub fn set_subtract(&mut self, other: LookSet) {
+        *self = self.subtract(other);
+    }
+
+    /// Returns a new set that is the union of this and the one given.
+    #[inline]
+    pub fn union(self, other: LookSet) -> LookSet {
+        LookSet { bits: self.bits | other.bits }
+    }
+
+    /// Updates this set in place with the result of unioning it with the one
+    /// given.
+    #[inline]
+    pub fn set_union(&mut self, other: LookSet) {
+        *self = self.union(other);
+    }
+
+    /// Returns a new set that is the intersection of this and the one given.
+    #[inline]
+    pub fn intersect(self, other: LookSet) -> LookSet {
+        LookSet { bits: self.bits & other.bits }
+    }
+
+    /// Updates this set in place with the result of intersecting it with the
+    /// one given.
+    #[inline]
+    pub fn set_intersect(&mut self, other: LookSet) {
+        *self = self.intersect(other);
+    }
+
+    /// Return a `LookSet` from the slice given as a native endian 32-bit
+    /// integer.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `slice.len() < 4`.
+    #[inline]
+    pub fn read_repr(slice: &[u8]) -> LookSet {
+        let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap());
+        LookSet { bits }
+    }
+
+    /// Write a `LookSet` as a native endian 32-bit integer to the beginning
+    /// of the slice given.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `slice.len() < 4`.
+    #[inline]
+    pub fn write_repr(self, slice: &mut [u8]) {
+        let raw = self.bits.to_ne_bytes();
+        slice[0] = raw[0];
+        slice[1] = raw[1];
+        slice[2] = raw[2];
+        slice[3] = raw[3];
+    }
+}
+
+impl core::fmt::Debug for LookSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        if self.is_empty() {
+            return write!(f, "∅");
+        }
+        for look in self.iter() {
+            write!(f, "{}", look.as_char())?;
+        }
+        Ok(())
+    }
+}
+
+/// An iterator over all look-around assertions in a [`LookSet`].
+///
+/// This iterator is created by [`LookSet::iter`].
+#[derive(Clone, Debug)]
+pub struct LookSetIter {
+    set: LookSet,
+}
+
+impl Iterator for LookSetIter {
+    type Item = Look;
+
+    #[inline]
+    fn next(&mut self) -> Option<Look> {
+        if self.set.is_empty() {
+            return None;
+        }
+        // We'll never have more than u8::MAX distinct look-around assertions,
+        // so 'bit' will always fit into a u16.
+        let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap();
+        let look = Look::from_repr(1 << bit)?;
+        self.set = self.set.remove(look);
+        Some(look)
+    }
+}
+
+/// Given a sequence of HIR values where each value corresponds to a Unicode
+/// class (or an all-ASCII byte class), return a single Unicode class
+/// corresponding to the union of the classes found.
+fn class_chars(hirs: &[Hir]) -> Option<Class> {
+    let mut cls = ClassUnicode::new(vec![]);
+    for hir in hirs.iter() {
+        match *hir.kind() {
+            HirKind::Class(Class::Unicode(ref cls2)) => {
+                cls.union(cls2);
+            }
+            HirKind::Class(Class::Bytes(ref cls2)) => {
+                cls.union(&cls2.to_unicode_class()?);
+            }
+            _ => return None,
+        };
+    }
+    Some(Class::Unicode(cls))
+}
+
+/// Given a sequence of HIR values where each value corresponds to a byte class
+/// (or an all-ASCII Unicode class), return a single byte class corresponding
+/// to the union of the classes found.
+fn class_bytes(hirs: &[Hir]) -> Option<Class> {
+    let mut cls = ClassBytes::new(vec![]);
+    for hir in hirs.iter() {
+        match *hir.kind() {
+            HirKind::Class(Class::Unicode(ref cls2)) => {
+                cls.union(&cls2.to_byte_class()?);
+            }
+            HirKind::Class(Class::Bytes(ref cls2)) => {
+                cls.union(cls2);
+            }
+            _ => return None,
+        };
+    }
+    Some(Class::Bytes(cls))
+}
+
+/// Given a sequence of HIR values where each value corresponds to a literal
+/// that is a single `char`, return that sequence of `char`s. Otherwise return
+/// None. No deduplication is done.
+fn singleton_chars(hirs: &[Hir]) -> Option<Vec<char>> {
+    let mut singletons = vec![];
+    for hir in hirs.iter() {
+        let literal = match *hir.kind() {
+            HirKind::Literal(Literal(ref bytes)) => bytes,
+            _ => return None,
+        };
+        let ch = match crate::debug::utf8_decode(literal) {
+            None => return None,
+            Some(Err(_)) => return None,
+            Some(Ok(ch)) => ch,
+        };
+        if literal.len() != ch.len_utf8() {
+            return None;
+        }
+        singletons.push(ch);
+    }
+    Some(singletons)
+}
+
+/// Given a sequence of HIR values where each value corresponds to a literal
+/// that is a single byte, return that sequence of bytes. Otherwise return
+/// None. No deduplication is done.
+fn singleton_bytes(hirs: &[Hir]) -> Option<Vec<u8>> {
+    let mut singletons = vec![];
+    for hir in hirs.iter() {
+        let literal = match *hir.kind() {
+            HirKind::Literal(Literal(ref bytes)) => bytes,
+            _ => return None,
+        };
+        if literal.len() != 1 {
+            return None;
+        }
+        singletons.push(literal[0]);
+    }
+    Some(singletons)
+}
+
+/// Looks for a common prefix in the list of alternation branches given. If one
+/// is found, then an equivalent but (hopefully) simplified Hir is returned.
+/// Otherwise, the original given list of branches is returned unmodified.
+///
+/// This is not quite as good as it could be. Right now, it requires that
+/// all branches are 'Concat' expressions. It also doesn't do well with
+/// literals. For example, given 'foofoo|foobar', it will not refactor it to
+/// 'foo(?:foo|bar)' because literals are flattened into their own special
+/// concatenation. (One wonders if perhaps 'Literal' should be a single atom
+/// instead of a string of bytes because of this. Otherwise, handling the
+/// current representation in this routine will be pretty gnarly. Sigh.)
+fn lift_common_prefix(hirs: Vec<Hir>) -> Result<Hir, Vec<Hir>> {
+    if hirs.len() <= 1 {
+        return Err(hirs);
+    }
+    let mut prefix = match hirs[0].kind() {
+        HirKind::Concat(ref xs) => &**xs,
+        _ => return Err(hirs),
+    };
+    if prefix.is_empty() {
+        return Err(hirs);
+    }
+    for h in hirs.iter().skip(1) {
+        let concat = match h.kind() {
+            HirKind::Concat(ref xs) => xs,
+            _ => return Err(hirs),
+        };
+        let common_len = prefix
+            .iter()
+            .zip(concat.iter())
+            .take_while(|(x, y)| x == y)
+            .count();
+        prefix = &prefix[..common_len];
+        if prefix.is_empty() {
+            return Err(hirs);
+        }
+    }
+    let len = prefix.len();
+    assert_ne!(0, len);
+    let mut prefix_concat = vec![];
+    let mut suffix_alts = vec![];
+    for h in hirs {
+        let mut concat = match h.into_kind() {
+            HirKind::Concat(xs) => xs,
+            // We required all sub-expressions to be
+            // concats above, so we're only here if we
+            // have a concat.
+            _ => unreachable!(),
+        };
+        suffix_alts.push(Hir::concat(concat.split_off(len)));
+        if prefix_concat.is_empty() {
+            prefix_concat = concat;
+        }
+    }
+    let mut concat = prefix_concat;
+    concat.push(Hir::alternation(suffix_alts));
+    Ok(Hir::concat(concat))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn uclass(ranges: &[(char, char)]) -> ClassUnicode {
+        let ranges: Vec<ClassUnicodeRange> = ranges
+            .iter()
+            .map(|&(s, e)| ClassUnicodeRange::new(s, e))
+            .collect();
+        ClassUnicode::new(ranges)
+    }
+
+    fn bclass(ranges: &[(u8, u8)]) -> ClassBytes {
+        let ranges: Vec<ClassBytesRange> =
+            ranges.iter().map(|&(s, e)| ClassBytesRange::new(s, e)).collect();
+        ClassBytes::new(ranges)
+    }
+
+    fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> {
+        cls.iter().map(|x| (x.start(), x.end())).collect()
+    }
+
+    #[cfg(feature = "unicode-case")]
+    fn ucasefold(cls: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls.clone();
+        cls_.case_fold_simple();
+        cls_
+    }
+
+    fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.union(cls2);
+        cls_
+    }
+
+    fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.intersect(cls2);
+        cls_
+    }
+
+    fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.difference(cls2);
+        cls_
+    }
+
+    fn usymdifference(
+        cls1: &ClassUnicode,
+        cls2: &ClassUnicode,
+    ) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.symmetric_difference(cls2);
+        cls_
+    }
+
+    fn unegate(cls: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls.clone();
+        cls_.negate();
+        cls_
+    }
+
+    fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> {
+        cls.iter().map(|x| (x.start(), x.end())).collect()
+    }
+
+    fn bcasefold(cls: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls.clone();
+        cls_.case_fold_simple();
+        cls_
+    }
+
+    fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.union(cls2);
+        cls_
+    }
+
+    fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.intersect(cls2);
+        cls_
+    }
+
+    fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.difference(cls2);
+        cls_
+    }
+
+    fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.symmetric_difference(cls2);
+        cls_
+    }
+
+    fn bnegate(cls: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls.clone();
+        cls_.negate();
+        cls_
+    }
+
+    #[test]
+    fn class_range_canonical_unicode() {
+        let range = ClassUnicodeRange::new('\u{00FF}', '\0');
+        assert_eq!('\0', range.start());
+        assert_eq!('\u{00FF}', range.end());
+    }
+
+    #[test]
+    fn class_range_canonical_bytes() {
+        let range = ClassBytesRange::new(b'\xFF', b'\0');
+        assert_eq!(b'\0', range.start());
+        assert_eq!(b'\xFF', range.end());
+    }
+
+    #[test]
+    fn class_canonicalize_unicode() {
+        let cls = uclass(&[('a', 'c'), ('x', 'z')]);
+        let expected = vec![('a', 'c'), ('x', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('a', 'c')]);
+        let expected = vec![('a', 'c'), ('x', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('w', 'y')]);
+        let expected = vec![('w', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[
+            ('c', 'f'),
+            ('a', 'g'),
+            ('d', 'j'),
+            ('a', 'c'),
+            ('m', 'p'),
+            ('l', 's'),
+        ]);
+        let expected = vec![('a', 'j'), ('l', 's')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('u', 'w')]);
+        let expected = vec![('u', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
+        let expected = vec![('\x00', '\u{10FFFF}')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('a', 'a'), ('b', 'b')]);
+        let expected = vec![('a', 'b')];
+        assert_eq!(expected, uranges(&cls));
+    }
+
+    #[test]
+    fn class_canonicalize_bytes() {
+        let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
+        let expected = vec![(b'a', b'c'), (b'x', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]);
+        let expected = vec![(b'a', b'c'), (b'x', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]);
+        let expected = vec![(b'w', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[
+            (b'c', b'f'),
+            (b'a', b'g'),
+            (b'd', b'j'),
+            (b'a', b'c'),
+            (b'm', b'p'),
+            (b'l', b's'),
+        ]);
+        let expected = vec![(b'a', b'j'), (b'l', b's')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]);
+        let expected = vec![(b'u', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]);
+        let expected = vec![(b'\x00', b'\xFF')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
+        let expected = vec![(b'a', b'b')];
+        assert_eq!(expected, branges(&cls));
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-case")]
+    fn class_case_fold_unicode() {
+        let cls = uclass(&[
+            ('C', 'F'),
+            ('A', 'G'),
+            ('D', 'J'),
+            ('A', 'C'),
+            ('M', 'P'),
+            ('L', 'S'),
+            ('c', 'f'),
+        ]);
+        let expected = uclass(&[
+            ('A', 'J'),
+            ('L', 'S'),
+            ('a', 'j'),
+            ('l', 's'),
+            ('\u{17F}', '\u{17F}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'Z')]);
+        let expected = uclass(&[
+            ('A', 'Z'),
+            ('a', 'z'),
+            ('\u{17F}', '\u{17F}'),
+            ('\u{212A}', '\u{212A}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('a', 'z')]);
+        let expected = uclass(&[
+            ('A', 'Z'),
+            ('a', 'z'),
+            ('\u{17F}', '\u{17F}'),
+            ('\u{212A}', '\u{212A}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'A'), ('_', '_')]);
+        let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'A'), ('=', '=')]);
+        let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('\x00', '\x10')]);
+        assert_eq!(cls, ucasefold(&cls));
+
+        let cls = uclass(&[('k', 'k')]);
+        let expected =
+            uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}')]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('@', '@')]);
+        assert_eq!(cls, ucasefold(&cls));
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-case"))]
+    fn class_case_fold_unicode_disabled() {
+        let mut cls = uclass(&[
+            ('C', 'F'),
+            ('A', 'G'),
+            ('D', 'J'),
+            ('A', 'C'),
+            ('M', 'P'),
+            ('L', 'S'),
+            ('c', 'f'),
+        ]);
+        assert!(cls.try_case_fold_simple().is_err());
+    }
+
+    #[test]
+    #[should_panic]
+    #[cfg(not(feature = "unicode-case"))]
+    fn class_case_fold_unicode_disabled_panics() {
+        let mut cls = uclass(&[
+            ('C', 'F'),
+            ('A', 'G'),
+            ('D', 'J'),
+            ('A', 'C'),
+            ('M', 'P'),
+            ('L', 'S'),
+            ('c', 'f'),
+        ]);
+        cls.case_fold_simple();
+    }
+
+    #[test]
+    fn class_case_fold_bytes() {
+        let cls = bclass(&[
+            (b'C', b'F'),
+            (b'A', b'G'),
+            (b'D', b'J'),
+            (b'A', b'C'),
+            (b'M', b'P'),
+            (b'L', b'S'),
+            (b'c', b'f'),
+        ]);
+        let expected =
+            bclass(&[(b'A', b'J'), (b'L', b'S'), (b'a', b'j'), (b'l', b's')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'Z')]);
+        let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
+        let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
+        let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\x10')]);
+        assert_eq!(cls, bcasefold(&cls));
+
+        let cls = bclass(&[(b'k', b'k')]);
+        let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'@', b'@')]);
+        assert_eq!(cls, bcasefold(&cls));
+    }
+
+    #[test]
+    fn class_negate_unicode() {
+        let cls = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', 'a'), ('b', 'b')]);
+        let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', 'c'), ('x', 'z')]);
+        let expected = uclass(&[
+            ('\x00', '\x60'),
+            ('\x64', '\x77'),
+            ('\x7B', '\u{10FFFF}'),
+        ]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', 'a')]);
+        let expected = uclass(&[('\x62', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\x60')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{10FFFF}')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[]);
+        let expected = uclass(&[('\x00', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls =
+            uclass(&[('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')]);
+        let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{D7FF}')]);
+        let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{D7FE}')]);
+        let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\u{D7FF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\u{E000}')]);
+        assert_eq!(expected, unegate(&cls));
+    }
+
+    #[test]
+    fn class_negate_bytes() {
+        let cls = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
+        let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
+        let expected = bclass(&[
+            (b'\x00', b'\x60'),
+            (b'\x64', b'\x77'),
+            (b'\x7B', b'\xFF'),
+        ]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'a')]);
+        let expected = bclass(&[(b'\x62', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'\xFF')]);
+        let expected = bclass(&[(b'\x00', b'\x60')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFF')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[]);
+        let expected = bclass(&[(b'\x00', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]);
+        let expected = bclass(&[(b'\xFE', b'\xFE')]);
+        assert_eq!(expected, bnegate(&cls));
+    }
+
+    #[test]
+    fn class_union_unicode() {
+        let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]);
+        let cls2 = uclass(&[('a', 'z')]);
+        let expected = uclass(&[('a', 'z'), ('A', 'C')]);
+        assert_eq!(expected, uunion(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_union_bytes() {
+        let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]);
+        let cls2 = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]);
+        assert_eq!(expected, bunion(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_intersect_unicode() {
+        let cls1 = uclass(&[]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('b', 'b')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'c')]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('a', 'c')]);
+        let expected = uclass(&[('a', 'b')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('b', 'c')]);
+        let expected = uclass(&[('b', 'b')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('c', 'd')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('b', 'c')]);
+        let cls2 = uclass(&[('a', 'd')]);
+        let expected = uclass(&[('b', 'c')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('a', 'h')]);
+        let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('g', 'h')]);
+        let cls2 = uclass(&[('d', 'e'), ('k', 'l')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('h', 'h')]);
+        let expected = uclass(&[('h', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
+        let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
+        let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
+        let expected = uclass(&[('b', 'f')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_intersect_bytes() {
+        let cls1 = bclass(&[]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'b', b'b')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'c')]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'a', b'c')]);
+        let expected = bclass(&[(b'a', b'b')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'b', b'c')]);
+        let expected = bclass(&[(b'b', b'b')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'c', b'd')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'b', b'c')]);
+        let cls2 = bclass(&[(b'a', b'd')]);
+        let expected = bclass(&[(b'b', b'c')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'a', b'h')]);
+        let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'h', b'h')]);
+        let expected = bclass(&[(b'h', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]);
+        let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]);
+        let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]);
+        let expected = bclass(&[(b'b', b'f')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_difference_unicode() {
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('b', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('z', 'z')]);
+        let expected = uclass(&[('a', 'y')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('m', 'm')]);
+        let expected = uclass(&[('a', 'l'), ('n', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('a', 'z')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('d', 'v')]);
+        let expected = uclass(&[('a', 'c')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('b', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('x', 'z')]);
+        let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('x', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_difference_bytes() {
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'b', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'z', b'z')]);
+        let expected = bclass(&[(b'a', b'y')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'm', b'm')]);
+        let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'd', b'v')]);
+        let expected = bclass(&[(b'a', b'c')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'x', b'z')]);
+        let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'x', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_symmetric_difference_unicode() {
+        let cls1 = uclass(&[('a', 'm')]);
+        let cls2 = uclass(&[('g', 't')]);
+        let expected = uclass(&[('a', 'f'), ('n', 't')]);
+        assert_eq!(expected, usymdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_symmetric_difference_bytes() {
+        let cls1 = bclass(&[(b'a', b'm')]);
+        let cls2 = bclass(&[(b'g', b't')]);
+        let expected = bclass(&[(b'a', b'f'), (b'n', b't')]);
+        assert_eq!(expected, bsymdifference(&cls1, &cls2));
+    }
+
+    // We use a thread with an explicit stack size to test that our destructor
+    // for Hir can handle arbitrarily sized expressions in constant stack
+    // space. In case we run on a platform without threads (WASM?), we limit
+    // this test to Windows/Unix.
+    #[test]
+    #[cfg(any(unix, windows))]
+    fn no_stack_overflow_on_drop() {
+        use std::thread;
+
+        let run = || {
+            let mut expr = Hir::empty();
+            for _ in 0..100 {
+                expr = Hir::capture(Capture {
+                    index: 1,
+                    name: None,
+                    sub: Box::new(expr),
+                });
+                expr = Hir::repetition(Repetition {
+                    min: 0,
+                    max: Some(1),
+                    greedy: true,
+                    sub: Box::new(expr),
+                });
+
+                expr = Hir {
+                    kind: HirKind::Concat(vec![expr]),
+                    props: Properties::empty(),
+                };
+                expr = Hir {
+                    kind: HirKind::Alternation(vec![expr]),
+                    props: Properties::empty(),
+                };
+            }
+            assert!(!matches!(*expr.kind(), HirKind::Empty));
+        };
+
+        // We run our test on a thread with a small stack size so we can
+        // force the issue more easily.
+        //
+        // NOTE(2023-03-21): See the corresponding test in 'crate::ast::tests'
+        // for context on the specific stack size chosen here.
+        thread::Builder::new()
+            .stack_size(16 << 10)
+            .spawn(run)
+            .unwrap()
+            .join()
+            .unwrap();
+    }
+
+    #[test]
+    fn look_set_iter() {
+        let set = LookSet::empty();
+        assert_eq!(0, set.iter().count());
+
+        let set = LookSet::full();
+        assert_eq!(18, set.iter().count());
+
+        let set =
+            LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode);
+        assert_eq!(2, set.iter().count());
+
+        let set = LookSet::empty().insert(Look::StartLF);
+        assert_eq!(1, set.iter().count());
+
+        let set = LookSet::empty().insert(Look::WordAsciiNegate);
+        assert_eq!(1, set.iter().count());
+    }
+
+    #[test]
+    fn look_set_debug() {
+        let res = format!("{:?}", LookSet::empty());
+        assert_eq!("∅", res);
+        let res = format!("{:?}", LookSet::full());
+        assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res);
+    }
+}
diff --git a/vendor/regex-syntax/src/hir/print.rs b/vendor/regex-syntax/src/hir/print.rs
new file mode 100644
index 0000000..dfa6d40
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/print.rs
@@ -0,0 +1,608 @@
+/*!
+This module provides a regular expression printer for `Hir`.
+*/
+
+use core::fmt;
+
+use crate::{
+    hir::{
+        self,
+        visitor::{self, Visitor},
+        Hir, HirKind,
+    },
+    is_meta_character,
+};
+
+/// A builder for constructing a printer.
+///
+/// Note that since a printer doesn't have any configuration knobs, this type
+/// remains unexported.
+#[derive(Clone, Debug)]
+struct PrinterBuilder {
+    _priv: (),
+}
+
+impl Default for PrinterBuilder {
+    fn default() -> PrinterBuilder {
+        PrinterBuilder::new()
+    }
+}
+
+impl PrinterBuilder {
+    fn new() -> PrinterBuilder {
+        PrinterBuilder { _priv: () }
+    }
+
+    fn build(&self) -> Printer {
+        Printer { _priv: () }
+    }
+}
+
+/// A printer for a regular expression's high-level intermediate
+/// representation.
+///
+/// A printer converts a high-level intermediate representation (HIR) to a
+/// regular expression pattern string. This particular printer uses constant
+/// stack space and heap space proportional to the size of the HIR.
+///
+/// Since this printer is only using the HIR, the pattern it prints will likely
+/// not resemble the original pattern at all. For example, a pattern like
+/// `\pL` will have its entire class written out.
+///
+/// The purpose of this printer is to provide a means to mutate an HIR and then
+/// build a regular expression from the result of that mutation. (A regex
+/// library could provide a constructor from this HIR explicitly, but that
+/// creates an unnecessary public coupling between the regex library and this
+/// specific HIR representation.)
+#[derive(Debug)]
+pub struct Printer {
+    _priv: (),
+}
+
+impl Printer {
+    /// Create a new printer.
+    pub fn new() -> Printer {
+        PrinterBuilder::new().build()
+    }
+
+    /// Print the given `Ast` to the given writer. The writer must implement
+    /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
+    /// here are a `fmt::Formatter` (which is available in `fmt::Display`
+    /// implementations) or a `&mut String`.
+    pub fn print<W: fmt::Write>(&mut self, hir: &Hir, wtr: W) -> fmt::Result {
+        visitor::visit(hir, Writer { wtr })
+    }
+}
+
+#[derive(Debug)]
+struct Writer<W> {
+    wtr: W,
+}
+
+impl<W: fmt::Write> Visitor for Writer<W> {
+    type Output = ();
+    type Err = fmt::Error;
+
+    fn finish(self) -> fmt::Result {
+        Ok(())
+    }
+
+    fn visit_pre(&mut self, hir: &Hir) -> fmt::Result {
+        match *hir.kind() {
+            HirKind::Empty => {
+                // Technically an empty sub-expression could be "printed" by
+                // just ignoring it, but in practice, you could have a
+                // repetition operator attached to an empty expression, and you
+                // really need something in the concrete syntax to make that
+                // work as you'd expect.
+                self.wtr.write_str(r"(?:)")?;
+            }
+            // Repetition operators are strictly suffix oriented.
+            HirKind::Repetition(_) => {}
+            HirKind::Literal(hir::Literal(ref bytes)) => {
+                // See the comment on the 'Concat' and 'Alternation' case below
+                // for why we put parens here. Literals are, conceptually,
+                // a special case of concatenation where each element is a
+                // character. The HIR flattens this into a Box<[u8]>, but we
+                // still need to treat it like a concatenation for correct
+                // printing. As a special case, we don't write parens if there
+                // is only one character. One character means there is no
+                // concat so we don't need parens. Adding parens would still be
+                // correct, but we drop them here because it tends to create
+                // rather noisy regexes even in simple cases.
+                let result = core::str::from_utf8(bytes);
+                let len = result.map_or(bytes.len(), |s| s.chars().count());
+                if len > 1 {
+                    self.wtr.write_str(r"(?:")?;
+                }
+                match result {
+                    Ok(string) => {
+                        for c in string.chars() {
+                            self.write_literal_char(c)?;
+                        }
+                    }
+                    Err(_) => {
+                        for &b in bytes.iter() {
+                            self.write_literal_byte(b)?;
+                        }
+                    }
+                }
+                if len > 1 {
+                    self.wtr.write_str(r")")?;
+                }
+            }
+            HirKind::Class(hir::Class::Unicode(ref cls)) => {
+                if cls.ranges().is_empty() {
+                    return self.wtr.write_str("[a&&b]");
+                }
+                self.wtr.write_str("[")?;
+                for range in cls.iter() {
+                    if range.start() == range.end() {
+                        self.write_literal_char(range.start())?;
+                    } else if u32::from(range.start()) + 1
+                        == u32::from(range.end())
+                    {
+                        self.write_literal_char(range.start())?;
+                        self.write_literal_char(range.end())?;
+                    } else {
+                        self.write_literal_char(range.start())?;
+                        self.wtr.write_str("-")?;
+                        self.write_literal_char(range.end())?;
+                    }
+                }
+                self.wtr.write_str("]")?;
+            }
+            HirKind::Class(hir::Class::Bytes(ref cls)) => {
+                if cls.ranges().is_empty() {
+                    return self.wtr.write_str("[a&&b]");
+                }
+                self.wtr.write_str("(?-u:[")?;
+                for range in cls.iter() {
+                    if range.start() == range.end() {
+                        self.write_literal_class_byte(range.start())?;
+                    } else if range.start() + 1 == range.end() {
+                        self.write_literal_class_byte(range.start())?;
+                        self.write_literal_class_byte(range.end())?;
+                    } else {
+                        self.write_literal_class_byte(range.start())?;
+                        self.wtr.write_str("-")?;
+                        self.write_literal_class_byte(range.end())?;
+                    }
+                }
+                self.wtr.write_str("])")?;
+            }
+            HirKind::Look(ref look) => match *look {
+                hir::Look::Start => {
+                    self.wtr.write_str(r"\A")?;
+                }
+                hir::Look::End => {
+                    self.wtr.write_str(r"\z")?;
+                }
+                hir::Look::StartLF => {
+                    self.wtr.write_str("(?m:^)")?;
+                }
+                hir::Look::EndLF => {
+                    self.wtr.write_str("(?m:$)")?;
+                }
+                hir::Look::StartCRLF => {
+                    self.wtr.write_str("(?mR:^)")?;
+                }
+                hir::Look::EndCRLF => {
+                    self.wtr.write_str("(?mR:$)")?;
+                }
+                hir::Look::WordAscii => {
+                    self.wtr.write_str(r"(?-u:\b)")?;
+                }
+                hir::Look::WordAsciiNegate => {
+                    self.wtr.write_str(r"(?-u:\B)")?;
+                }
+                hir::Look::WordUnicode => {
+                    self.wtr.write_str(r"\b")?;
+                }
+                hir::Look::WordUnicodeNegate => {
+                    self.wtr.write_str(r"\B")?;
+                }
+                hir::Look::WordStartAscii => {
+                    self.wtr.write_str(r"(?-u:\b{start})")?;
+                }
+                hir::Look::WordEndAscii => {
+                    self.wtr.write_str(r"(?-u:\b{end})")?;
+                }
+                hir::Look::WordStartUnicode => {
+                    self.wtr.write_str(r"\b{start}")?;
+                }
+                hir::Look::WordEndUnicode => {
+                    self.wtr.write_str(r"\b{end}")?;
+                }
+                hir::Look::WordStartHalfAscii => {
+                    self.wtr.write_str(r"(?-u:\b{start-half})")?;
+                }
+                hir::Look::WordEndHalfAscii => {
+                    self.wtr.write_str(r"(?-u:\b{end-half})")?;
+                }
+                hir::Look::WordStartHalfUnicode => {
+                    self.wtr.write_str(r"\b{start-half}")?;
+                }
+                hir::Look::WordEndHalfUnicode => {
+                    self.wtr.write_str(r"\b{end-half}")?;
+                }
+            },
+            HirKind::Capture(hir::Capture { ref name, .. }) => {
+                self.wtr.write_str("(")?;
+                if let Some(ref name) = *name {
+                    write!(self.wtr, "?P<{}>", name)?;
+                }
+            }
+            // Why do this? Wrapping concats and alts in non-capturing groups
+            // is not *always* necessary, but is sometimes necessary. For
+            // example, 'concat(a, alt(b, c))' should be written as 'a(?:b|c)'
+            // and not 'ab|c'. The former is clearly the intended meaning, but
+            // the latter is actually 'alt(concat(a, b), c)'.
+            //
+            // It would be possible to only group these things in cases where
+            // it's strictly necessary, but it requires knowing the parent
+            // expression. And since this technique is simpler and always
+            // correct, we take this route. More to the point, it is a non-goal
+            // of an HIR printer to show a nice easy-to-read regex. Indeed,
+            // its construction forbids it from doing so. Therefore, inserting
+            // extra groups where they aren't necessary is perfectly okay.
+            HirKind::Concat(_) | HirKind::Alternation(_) => {
+                self.wtr.write_str(r"(?:")?;
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_post(&mut self, hir: &Hir) -> fmt::Result {
+        match *hir.kind() {
+            // Handled during visit_pre
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Look(_) => {}
+            HirKind::Repetition(ref x) => {
+                match (x.min, x.max) {
+                    (0, Some(1)) => {
+                        self.wtr.write_str("?")?;
+                    }
+                    (0, None) => {
+                        self.wtr.write_str("*")?;
+                    }
+                    (1, None) => {
+                        self.wtr.write_str("+")?;
+                    }
+                    (1, Some(1)) => {
+                        // 'a{1}' and 'a{1}?' are exactly equivalent to 'a'.
+                        return Ok(());
+                    }
+                    (m, None) => {
+                        write!(self.wtr, "{{{},}}", m)?;
+                    }
+                    (m, Some(n)) if m == n => {
+                        write!(self.wtr, "{{{}}}", m)?;
+                        // a{m} and a{m}? are always exactly equivalent.
+                        return Ok(());
+                    }
+                    (m, Some(n)) => {
+                        write!(self.wtr, "{{{},{}}}", m, n)?;
+                    }
+                }
+                if !x.greedy {
+                    self.wtr.write_str("?")?;
+                }
+            }
+            HirKind::Capture(_)
+            | HirKind::Concat(_)
+            | HirKind::Alternation(_) => {
+                self.wtr.write_str(r")")?;
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_alternation_in(&mut self) -> fmt::Result {
+        self.wtr.write_str("|")
+    }
+}
+
+impl<W: fmt::Write> Writer<W> {
+    fn write_literal_char(&mut self, c: char) -> fmt::Result {
+        if is_meta_character(c) {
+            self.wtr.write_str("\\")?;
+        }
+        self.wtr.write_char(c)
+    }
+
+    fn write_literal_byte(&mut self, b: u8) -> fmt::Result {
+        if b <= 0x7F && !b.is_ascii_control() && !b.is_ascii_whitespace() {
+            self.write_literal_char(char::try_from(b).unwrap())
+        } else {
+            write!(self.wtr, "(?-u:\\x{:02X})", b)
+        }
+    }
+
+    fn write_literal_class_byte(&mut self, b: u8) -> fmt::Result {
+        if b <= 0x7F && !b.is_ascii_control() && !b.is_ascii_whitespace() {
+            self.write_literal_char(char::try_from(b).unwrap())
+        } else {
+            write!(self.wtr, "\\x{:02X}", b)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use alloc::{
+        boxed::Box,
+        string::{String, ToString},
+    };
+
+    use crate::ParserBuilder;
+
+    use super::*;
+
+    fn roundtrip(given: &str, expected: &str) {
+        roundtrip_with(|b| b, given, expected);
+    }
+
+    fn roundtrip_bytes(given: &str, expected: &str) {
+        roundtrip_with(|b| b.utf8(false), given, expected);
+    }
+
+    fn roundtrip_with<F>(mut f: F, given: &str, expected: &str)
+    where
+        F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
+    {
+        let mut builder = ParserBuilder::new();
+        f(&mut builder);
+        let hir = builder.build().parse(given).unwrap();
+
+        let mut printer = Printer::new();
+        let mut dst = String::new();
+        printer.print(&hir, &mut dst).unwrap();
+
+        // Check that the result is actually valid.
+        builder.build().parse(&dst).unwrap();
+
+        assert_eq!(expected, dst);
+    }
+
+    #[test]
+    fn print_literal() {
+        roundtrip("a", "a");
+        roundtrip(r"\xff", "\u{FF}");
+        roundtrip_bytes(r"\xff", "\u{FF}");
+        roundtrip_bytes(r"(?-u)\xff", r"(?-u:\xFF)");
+        roundtrip("☃", "☃");
+    }
+
+    #[test]
+    fn print_class() {
+        roundtrip(r"[a]", r"a");
+        roundtrip(r"[ab]", r"[ab]");
+        roundtrip(r"[a-z]", r"[a-z]");
+        roundtrip(r"[a-z--b-c--x-y]", r"[ad-wz]");
+        roundtrip(r"[^\x01-\u{10FFFF}]", "\u{0}");
+        roundtrip(r"[-]", r"\-");
+        roundtrip(r"[☃-⛄]", r"[☃-⛄]");
+
+        roundtrip(r"(?-u)[a]", r"a");
+        roundtrip(r"(?-u)[ab]", r"(?-u:[ab])");
+        roundtrip(r"(?-u)[a-z]", r"(?-u:[a-z])");
+        roundtrip_bytes(r"(?-u)[a-\xFF]", r"(?-u:[a-\xFF])");
+
+        // The following test that the printer escapes meta characters
+        // in character classes.
+        roundtrip(r"[\[]", r"\[");
+        roundtrip(r"[Z-_]", r"[Z-_]");
+        roundtrip(r"[Z-_--Z]", r"[\[-_]");
+
+        // The following test that the printer escapes meta characters
+        // in byte oriented character classes.
+        roundtrip_bytes(r"(?-u)[\[]", r"\[");
+        roundtrip_bytes(r"(?-u)[Z-_]", r"(?-u:[Z-_])");
+        roundtrip_bytes(r"(?-u)[Z-_--Z]", r"(?-u:[\[-_])");
+
+        // This tests that an empty character class is correctly roundtripped.
+        #[cfg(feature = "unicode-gencat")]
+        roundtrip(r"\P{any}", r"[a&&b]");
+        roundtrip_bytes(r"(?-u)[^\x00-\xFF]", r"[a&&b]");
+    }
+
+    #[test]
+    fn print_anchor() {
+        roundtrip(r"^", r"\A");
+        roundtrip(r"$", r"\z");
+        roundtrip(r"(?m)^", r"(?m:^)");
+        roundtrip(r"(?m)$", r"(?m:$)");
+    }
+
+    #[test]
+    fn print_word_boundary() {
+        roundtrip(r"\b", r"\b");
+        roundtrip(r"\B", r"\B");
+        roundtrip(r"(?-u)\b", r"(?-u:\b)");
+        roundtrip_bytes(r"(?-u)\B", r"(?-u:\B)");
+    }
+
+    #[test]
+    fn print_repetition() {
+        roundtrip("a?", "a?");
+        roundtrip("a??", "a??");
+        roundtrip("(?U)a?", "a??");
+
+        roundtrip("a*", "a*");
+        roundtrip("a*?", "a*?");
+        roundtrip("(?U)a*", "a*?");
+
+        roundtrip("a+", "a+");
+        roundtrip("a+?", "a+?");
+        roundtrip("(?U)a+", "a+?");
+
+        roundtrip("a{1}", "a");
+        roundtrip("a{2}", "a{2}");
+        roundtrip("a{1,}", "a+");
+        roundtrip("a{1,5}", "a{1,5}");
+        roundtrip("a{1}?", "a");
+        roundtrip("a{2}?", "a{2}");
+        roundtrip("a{1,}?", "a+?");
+        roundtrip("a{1,5}?", "a{1,5}?");
+        roundtrip("(?U)a{1}", "a");
+        roundtrip("(?U)a{2}", "a{2}");
+        roundtrip("(?U)a{1,}", "a+?");
+        roundtrip("(?U)a{1,5}", "a{1,5}?");
+
+        // Test that various zero-length repetitions always translate to an
+        // empty regex. This is more a property of HIR's smart constructors
+        // than the printer though.
+        roundtrip("a{0}", "(?:)");
+        roundtrip("(?:ab){0}", "(?:)");
+        #[cfg(feature = "unicode-gencat")]
+        {
+            roundtrip(r"\p{any}{0}", "(?:)");
+            roundtrip(r"\P{any}{0}", "(?:)");
+        }
+    }
+
+    #[test]
+    fn print_group() {
+        roundtrip("()", "((?:))");
+        roundtrip("(?P<foo>)", "(?P<foo>(?:))");
+        roundtrip("(?:)", "(?:)");
+
+        roundtrip("(a)", "(a)");
+        roundtrip("(?P<foo>a)", "(?P<foo>a)");
+        roundtrip("(?:a)", "a");
+
+        roundtrip("((((a))))", "((((a))))");
+    }
+
+    #[test]
+    fn print_alternation() {
+        roundtrip("|", "(?:(?:)|(?:))");
+        roundtrip("||", "(?:(?:)|(?:)|(?:))");
+
+        roundtrip("a|b", "[ab]");
+        roundtrip("ab|cd", "(?:(?:ab)|(?:cd))");
+        roundtrip("a|b|c", "[a-c]");
+        roundtrip("ab|cd|ef", "(?:(?:ab)|(?:cd)|(?:ef))");
+        roundtrip("foo|bar|quux", "(?:(?:foo)|(?:bar)|(?:quux))");
+    }
+
+    // This is a regression test that stresses a peculiarity of how the HIR
+    // is both constructed and printed. Namely, it is legal for a repetition
+    // to directly contain a concatenation. This particular construct isn't
+    // really possible to build from the concrete syntax directly, since you'd
+    // be forced to put the concatenation into (at least) a non-capturing
+    // group. Concurrently, the printer doesn't consider this case and just
+    // kind of naively prints the child expression and tacks on the repetition
+    // operator.
+    //
+    // As a result, if you attached '+' to a 'concat(a, b)', the printer gives
+    // you 'ab+', but clearly it really should be '(?:ab)+'.
+    //
+    // This bug isn't easy to surface because most ways of building an HIR
+    // come directly from the concrete syntax, and as mentioned above, it just
+    // isn't possible to build this kind of HIR from the concrete syntax.
+    // Nevertheless, this is definitely a bug.
+    //
+    // See: https://github.com/rust-lang/regex/issues/731
+    #[test]
+    fn regression_repetition_concat() {
+        let expr = Hir::concat(alloc::vec![
+            Hir::literal("x".as_bytes()),
+            Hir::repetition(hir::Repetition {
+                min: 1,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::literal("ab".as_bytes())),
+            }),
+            Hir::literal("y".as_bytes()),
+        ]);
+        assert_eq!(r"(?:x(?:ab)+y)", expr.to_string());
+
+        let expr = Hir::concat(alloc::vec![
+            Hir::look(hir::Look::Start),
+            Hir::repetition(hir::Repetition {
+                min: 1,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::concat(alloc::vec![
+                    Hir::look(hir::Look::Start),
+                    Hir::look(hir::Look::End),
+                ])),
+            }),
+            Hir::look(hir::Look::End),
+        ]);
+        assert_eq!(r"(?:\A\A\z\z)", expr.to_string());
+    }
+
+    // Just like regression_repetition_concat, but with the repetition using
+    // an alternation as a child expression instead.
+    //
+    // See: https://github.com/rust-lang/regex/issues/731
+    #[test]
+    fn regression_repetition_alternation() {
+        let expr = Hir::concat(alloc::vec![
+            Hir::literal("ab".as_bytes()),
+            Hir::repetition(hir::Repetition {
+                min: 1,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::alternation(alloc::vec![
+                    Hir::literal("cd".as_bytes()),
+                    Hir::literal("ef".as_bytes()),
+                ])),
+            }),
+            Hir::literal("gh".as_bytes()),
+        ]);
+        assert_eq!(r"(?:(?:ab)(?:(?:cd)|(?:ef))+(?:gh))", expr.to_string());
+
+        let expr = Hir::concat(alloc::vec![
+            Hir::look(hir::Look::Start),
+            Hir::repetition(hir::Repetition {
+                min: 1,
+                max: None,
+                greedy: true,
+                sub: Box::new(Hir::alternation(alloc::vec![
+                    Hir::look(hir::Look::Start),
+                    Hir::look(hir::Look::End),
+                ])),
+            }),
+            Hir::look(hir::Look::End),
+        ]);
+        assert_eq!(r"(?:\A(?:\A|\z)\z)", expr.to_string());
+    }
+
+    // This regression test is very similar in flavor to
+    // regression_repetition_concat in that the root of the issue lies in a
+    // peculiarity of how the HIR is represented and how the printer writes it
+    // out. Like the other regression, this one is also rooted in the fact that
+    // you can't produce the peculiar HIR from the concrete syntax. Namely, you
+    // just can't have a 'concat(a, alt(b, c))' because the 'alt' will normally
+    // be in (at least) a non-capturing group. Why? Because the '|' has very
+    // low precedence (lower that concatenation), and so something like 'ab|c'
+    // is actually 'alt(ab, c)'.
+    //
+    // See: https://github.com/rust-lang/regex/issues/516
+    #[test]
+    fn regression_alternation_concat() {
+        let expr = Hir::concat(alloc::vec![
+            Hir::literal("ab".as_bytes()),
+            Hir::alternation(alloc::vec![
+                Hir::literal("mn".as_bytes()),
+                Hir::literal("xy".as_bytes()),
+            ]),
+        ]);
+        assert_eq!(r"(?:(?:ab)(?:(?:mn)|(?:xy)))", expr.to_string());
+
+        let expr = Hir::concat(alloc::vec![
+            Hir::look(hir::Look::Start),
+            Hir::alternation(alloc::vec![
+                Hir::look(hir::Look::Start),
+                Hir::look(hir::Look::End),
+            ]),
+        ]);
+        assert_eq!(r"(?:\A(?:\A|\z))", expr.to_string());
+    }
+}
diff --git a/vendor/regex-syntax/src/hir/translate.rs b/vendor/regex-syntax/src/hir/translate.rs
new file mode 100644
index 0000000..313a1e9
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/translate.rs
@@ -0,0 +1,3724 @@
+/*!
+Defines a translator that converts an `Ast` to an `Hir`.
+*/
+
+use core::cell::{Cell, RefCell};
+
+use alloc::{boxed::Box, string::ToString, vec, vec::Vec};
+
+use crate::{
+    ast::{self, Ast, Span, Visitor},
+    either::Either,
+    hir::{self, Error, ErrorKind, Hir, HirKind},
+    unicode::{self, ClassQuery},
+};
+
+type Result<T> = core::result::Result<T, Error>;
+
+/// A builder for constructing an AST->HIR translator.
+#[derive(Clone, Debug)]
+pub struct TranslatorBuilder {
+    utf8: bool,
+    line_terminator: u8,
+    flags: Flags,
+}
+
+impl Default for TranslatorBuilder {
+    fn default() -> TranslatorBuilder {
+        TranslatorBuilder::new()
+    }
+}
+
+impl TranslatorBuilder {
+    /// Create a new translator builder with a default c onfiguration.
+    pub fn new() -> TranslatorBuilder {
+        TranslatorBuilder {
+            utf8: true,
+            line_terminator: b'\n',
+            flags: Flags::default(),
+        }
+    }
+
+    /// Build a translator using the current configuration.
+    pub fn build(&self) -> Translator {
+        Translator {
+            stack: RefCell::new(vec![]),
+            flags: Cell::new(self.flags),
+            utf8: self.utf8,
+            line_terminator: self.line_terminator,
+        }
+    }
+
+    /// When disabled, translation will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// When enabled (the default), the translator is guaranteed to produce an
+    /// expression that, for non-empty matches, will only ever produce spans
+    /// that are entirely valid UTF-8 (otherwise, the translator will return an
+    /// error).
+    ///
+    /// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
+    /// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
+    /// syntax) will be allowed even though they can produce matches that split
+    /// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
+    /// matches, and it is expected that the regex engine itself must handle
+    /// these cases if necessary (perhaps by suppressing any zero-width matches
+    /// that split a codepoint).
+    pub fn utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.utf8 = yes;
+        self
+    }
+
+    /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
+    ///
+    /// Namely, instead of `.` (by default) matching everything except for `\n`,
+    /// this will cause `.` to match everything except for the byte given.
+    ///
+    /// If `.` is used in a context where Unicode mode is enabled and this byte
+    /// isn't ASCII, then an error will be returned. When Unicode mode is
+    /// disabled, then any byte is permitted, but will return an error if UTF-8
+    /// mode is enabled and it is a non-ASCII byte.
+    ///
+    /// In short, any ASCII value for a line terminator is always okay. But a
+    /// non-ASCII byte might result in an error depending on whether Unicode
+    /// mode or UTF-8 mode are enabled.
+    ///
+    /// Note that if `R` mode is enabled then it always takes precedence and
+    /// the line terminator will be treated as `\r` and `\n` simultaneously.
+    ///
+    /// Note also that this *doesn't* impact the look-around assertions
+    /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
+    /// configuration in the regex engine itself.
+    pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder {
+        self.line_terminator = byte;
+        self
+    }
+
+    /// Enable or disable the case insensitive flag (`i`) by default.
+    pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.case_insensitive = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag (`m`) by default.
+    pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.multi_line = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag (`s`) by
+    /// default.
+    pub fn dot_matches_new_line(
+        &mut self,
+        yes: bool,
+    ) -> &mut TranslatorBuilder {
+        self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the CRLF mode flag (`R`) by default.
+    pub fn crlf(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.crlf = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag (`U`) by default.
+    pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.swap_greed = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.unicode = if yes { None } else { Some(false) };
+        self
+    }
+}
+
+/// A translator maps abstract syntax to a high level intermediate
+/// representation.
+///
+/// A translator may be benefit from reuse. That is, a translator can translate
+/// many abstract syntax trees.
+///
+/// A `Translator` can be configured in more detail via a
+/// [`TranslatorBuilder`].
+#[derive(Clone, Debug)]
+pub struct Translator {
+    /// Our call stack, but on the heap.
+    stack: RefCell<Vec<HirFrame>>,
+    /// The current flag settings.
+    flags: Cell<Flags>,
+    /// Whether we're allowed to produce HIR that can match arbitrary bytes.
+    utf8: bool,
+    /// The line terminator to use for `.`.
+    line_terminator: u8,
+}
+
+impl Translator {
+    /// Create a new translator using the default configuration.
+    pub fn new() -> Translator {
+        TranslatorBuilder::new().build()
+    }
+
+    /// Translate the given abstract syntax tree (AST) into a high level
+    /// intermediate representation (HIR).
+    ///
+    /// If there was a problem doing the translation, then an HIR-specific
+    /// error is returned.
+    ///
+    /// The original pattern string used to produce the `Ast` *must* also be
+    /// provided. The translator does not use the pattern string during any
+    /// correct translation, but is used for error reporting.
+    pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
+        ast::visit(ast, TranslatorI::new(self, pattern))
+    }
+}
+
+/// An HirFrame is a single stack frame, represented explicitly, which is
+/// created for each item in the Ast that we traverse.
+///
+/// Note that technically, this type doesn't represent our entire stack
+/// frame. In particular, the Ast visitor represents any state associated with
+/// traversing the Ast itself.
+#[derive(Clone, Debug)]
+enum HirFrame {
+    /// An arbitrary HIR expression. These get pushed whenever we hit a base
+    /// case in the Ast. They get popped after an inductive (i.e., recursive)
+    /// step is complete.
+    Expr(Hir),
+    /// A literal that is being constructed, character by character, from the
+    /// AST. We need this because the AST gives each individual character its
+    /// own node. So as we see characters, we peek at the top-most HirFrame.
+    /// If it's a literal, then we add to it. Otherwise, we push a new literal.
+    /// When it comes time to pop it, we convert it to an Hir via Hir::literal.
+    Literal(Vec<u8>),
+    /// A Unicode character class. This frame is mutated as we descend into
+    /// the Ast of a character class (which is itself its own mini recursive
+    /// structure).
+    ClassUnicode(hir::ClassUnicode),
+    /// A byte-oriented character class. This frame is mutated as we descend
+    /// into the Ast of a character class (which is itself its own mini
+    /// recursive structure).
+    ///
+    /// Byte character classes are created when Unicode mode (`u`) is disabled.
+    /// If `utf8` is enabled (the default), then a byte character is only
+    /// permitted to match ASCII text.
+    ClassBytes(hir::ClassBytes),
+    /// This is pushed whenever a repetition is observed. After visiting every
+    /// sub-expression in the repetition, the translator's stack is expected to
+    /// have this sentinel at the top.
+    ///
+    /// This sentinel only exists to stop other things (like flattening
+    /// literals) from reaching across repetition operators.
+    Repetition,
+    /// This is pushed on to the stack upon first seeing any kind of capture,
+    /// indicated by parentheses (including non-capturing groups). It is popped
+    /// upon leaving a group.
+    Group {
+        /// The old active flags when this group was opened.
+        ///
+        /// If this group sets flags, then the new active flags are set to the
+        /// result of merging the old flags with the flags introduced by this
+        /// group. If the group doesn't set any flags, then this is simply
+        /// equivalent to whatever flags were set when the group was opened.
+        ///
+        /// When this group is popped, the active flags should be restored to
+        /// the flags set here.
+        ///
+        /// The "active" flags correspond to whatever flags are set in the
+        /// Translator.
+        old_flags: Flags,
+    },
+    /// This is pushed whenever a concatenation is observed. After visiting
+    /// every sub-expression in the concatenation, the translator's stack is
+    /// popped until it sees a Concat frame.
+    Concat,
+    /// This is pushed whenever an alternation is observed. After visiting
+    /// every sub-expression in the alternation, the translator's stack is
+    /// popped until it sees an Alternation frame.
+    Alternation,
+    /// This is pushed immediately before each sub-expression in an
+    /// alternation. This separates the branches of an alternation on the
+    /// stack and prevents literal flattening from reaching across alternation
+    /// branches.
+    ///
+    /// It is popped after each expression in a branch until an 'Alternation'
+    /// frame is observed when doing a post visit on an alternation.
+    AlternationBranch,
+}
+
+impl HirFrame {
+    /// Assert that the current stack frame is an Hir expression and return it.
+    fn unwrap_expr(self) -> Hir {
+        match self {
+            HirFrame::Expr(expr) => expr,
+            HirFrame::Literal(lit) => Hir::literal(lit),
+            _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
+        }
+    }
+
+    /// Assert that the current stack frame is a Unicode class expression and
+    /// return it.
+    fn unwrap_class_unicode(self) -> hir::ClassUnicode {
+        match self {
+            HirFrame::ClassUnicode(cls) => cls,
+            _ => panic!(
+                "tried to unwrap Unicode class \
+                 from HirFrame, got: {:?}",
+                self
+            ),
+        }
+    }
+
+    /// Assert that the current stack frame is a byte class expression and
+    /// return it.
+    fn unwrap_class_bytes(self) -> hir::ClassBytes {
+        match self {
+            HirFrame::ClassBytes(cls) => cls,
+            _ => panic!(
+                "tried to unwrap byte class \
+                 from HirFrame, got: {:?}",
+                self
+            ),
+        }
+    }
+
+    /// Assert that the current stack frame is a repetition sentinel. If it
+    /// isn't, then panic.
+    fn unwrap_repetition(self) {
+        match self {
+            HirFrame::Repetition => {}
+            _ => {
+                panic!(
+                    "tried to unwrap repetition from HirFrame, got: {:?}",
+                    self
+                )
+            }
+        }
+    }
+
+    /// Assert that the current stack frame is a group indicator and return
+    /// its corresponding flags (the flags that were active at the time the
+    /// group was entered).
+    fn unwrap_group(self) -> Flags {
+        match self {
+            HirFrame::Group { old_flags } => old_flags,
+            _ => {
+                panic!("tried to unwrap group from HirFrame, got: {:?}", self)
+            }
+        }
+    }
+
+    /// Assert that the current stack frame is an alternation pipe sentinel. If
+    /// it isn't, then panic.
+    fn unwrap_alternation_pipe(self) {
+        match self {
+            HirFrame::AlternationBranch => {}
+            _ => {
+                panic!(
+                    "tried to unwrap alt pipe from HirFrame, got: {:?}",
+                    self
+                )
+            }
+        }
+    }
+}
+
+impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
+    type Output = Hir;
+    type Err = Error;
+
+    fn finish(self) -> Result<Hir> {
+        // ... otherwise, we should have exactly one HIR on the stack.
+        assert_eq!(self.trans().stack.borrow().len(), 1);
+        Ok(self.pop().unwrap().unwrap_expr())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::ClassBracketed(_) => {
+                if self.flags().unicode() {
+                    let cls = hir::ClassUnicode::empty();
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let cls = hir::ClassBytes::empty();
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            Ast::Repetition(_) => self.push(HirFrame::Repetition),
+            Ast::Group(ref x) => {
+                let old_flags = x
+                    .flags()
+                    .map(|ast| self.set_flags(ast))
+                    .unwrap_or_else(|| self.flags());
+                self.push(HirFrame::Group { old_flags });
+            }
+            Ast::Concat(_) => {
+                self.push(HirFrame::Concat);
+            }
+            Ast::Alternation(ref x) => {
+                self.push(HirFrame::Alternation);
+                if !x.asts.is_empty() {
+                    self.push(HirFrame::AlternationBranch);
+                }
+            }
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Empty(_) => {
+                self.push(HirFrame::Expr(Hir::empty()));
+            }
+            Ast::Flags(ref x) => {
+                self.set_flags(&x.flags);
+                // Flags in the AST are generally considered directives and
+                // not actual sub-expressions. However, they can be used in
+                // the concrete syntax like `((?i))`, and we need some kind of
+                // indication of an expression there, and Empty is the correct
+                // choice.
+                //
+                // There can also be things like `(?i)+`, but we rule those out
+                // in the parser. In the future, we might allow them for
+                // consistency sake.
+                self.push(HirFrame::Expr(Hir::empty()));
+            }
+            Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
+                Either::Right(byte) => self.push_byte(byte),
+                Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
+                    None => self.push_char(ch),
+                    Some(expr) => self.push(HirFrame::Expr(expr)),
+                },
+            },
+            Ast::Dot(ref span) => {
+                self.push(HirFrame::Expr(self.hir_dot(**span)?));
+            }
+            Ast::Assertion(ref x) => {
+                self.push(HirFrame::Expr(self.hir_assertion(x)?));
+            }
+            Ast::ClassPerl(ref x) => {
+                if self.flags().unicode() {
+                    let cls = self.hir_perl_unicode_class(x)?;
+                    let hcls = hir::Class::Unicode(cls);
+                    self.push(HirFrame::Expr(Hir::class(hcls)));
+                } else {
+                    let cls = self.hir_perl_byte_class(x)?;
+                    let hcls = hir::Class::Bytes(cls);
+                    self.push(HirFrame::Expr(Hir::class(hcls)));
+                }
+            }
+            Ast::ClassUnicode(ref x) => {
+                let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
+                self.push(HirFrame::Expr(Hir::class(cls)));
+            }
+            Ast::ClassBracketed(ref ast) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    self.unicode_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls,
+                    )?;
+                    let expr = Hir::class(hir::Class::Unicode(cls));
+                    self.push(HirFrame::Expr(expr));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    self.bytes_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls,
+                    )?;
+                    let expr = Hir::class(hir::Class::Bytes(cls));
+                    self.push(HirFrame::Expr(expr));
+                }
+            }
+            Ast::Repetition(ref x) => {
+                let expr = self.pop().unwrap().unwrap_expr();
+                self.pop().unwrap().unwrap_repetition();
+                self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
+            }
+            Ast::Group(ref x) => {
+                let expr = self.pop().unwrap().unwrap_expr();
+                let old_flags = self.pop().unwrap().unwrap_group();
+                self.trans().flags.set(old_flags);
+                self.push(HirFrame::Expr(self.hir_capture(x, expr)));
+            }
+            Ast::Concat(_) => {
+                let mut exprs = vec![];
+                while let Some(expr) = self.pop_concat_expr() {
+                    if !matches!(*expr.kind(), HirKind::Empty) {
+                        exprs.push(expr);
+                    }
+                }
+                exprs.reverse();
+                self.push(HirFrame::Expr(Hir::concat(exprs)));
+            }
+            Ast::Alternation(_) => {
+                let mut exprs = vec![];
+                while let Some(expr) = self.pop_alt_expr() {
+                    self.pop().unwrap().unwrap_alternation_pipe();
+                    exprs.push(expr);
+                }
+                exprs.reverse();
+                self.push(HirFrame::Expr(Hir::alternation(exprs)));
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_alternation_in(&mut self) -> Result<()> {
+        self.push(HirFrame::AlternationBranch);
+        Ok(())
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Bracketed(_) => {
+                if self.flags().unicode() {
+                    let cls = hir::ClassUnicode::empty();
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let cls = hir::ClassBytes::empty();
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            // We needn't handle the Union case here since the visitor will
+            // do it for us.
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Empty(_) => {}
+            ast::ClassSetItem::Literal(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    let byte = self.class_literal_byte(x)?;
+                    cls.push(hir::ClassBytesRange::new(byte, byte));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Range(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    let start = self.class_literal_byte(&x.start)?;
+                    let end = self.class_literal_byte(&x.end)?;
+                    cls.push(hir::ClassBytesRange::new(start, end));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Ascii(ref x) => {
+                if self.flags().unicode() {
+                    let xcls = self.hir_ascii_unicode_class(x)?;
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let xcls = self.hir_ascii_byte_class(x)?;
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Unicode(ref x) => {
+                let xcls = self.hir_unicode_class(x)?;
+                let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                cls.union(&xcls);
+                self.push(HirFrame::ClassUnicode(cls));
+            }
+            ast::ClassSetItem::Perl(ref x) => {
+                if self.flags().unicode() {
+                    let xcls = self.hir_perl_unicode_class(x)?;
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let xcls = self.hir_perl_byte_class(x)?;
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Bracketed(ref ast) => {
+                if self.flags().unicode() {
+                    let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
+                    self.unicode_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls1,
+                    )?;
+
+                    let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
+                    cls2.union(&cls1);
+                    self.push(HirFrame::ClassUnicode(cls2));
+                } else {
+                    let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
+                    self.bytes_fold_and_negate(
+                        &ast.span,
+                        ast.negated,
+                        &mut cls1,
+                    )?;
+
+                    let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
+                    cls2.union(&cls1);
+                    self.push(HirFrame::ClassBytes(cls2));
+                }
+            }
+            // This is handled automatically by the visitor.
+            ast::ClassSetItem::Union(_) => {}
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        _op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        if self.flags().unicode() {
+            let cls = hir::ClassUnicode::empty();
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let cls = hir::ClassBytes::empty();
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        _op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        if self.flags().unicode() {
+            let cls = hir::ClassUnicode::empty();
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let cls = hir::ClassBytes::empty();
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        use crate::ast::ClassSetBinaryOpKind::*;
+
+        if self.flags().unicode() {
+            let mut rhs = self.pop().unwrap().unwrap_class_unicode();
+            let mut lhs = self.pop().unwrap().unwrap_class_unicode();
+            let mut cls = self.pop().unwrap().unwrap_class_unicode();
+            if self.flags().case_insensitive() {
+                rhs.try_case_fold_simple().map_err(|_| {
+                    self.error(
+                        op.rhs.span().clone(),
+                        ErrorKind::UnicodeCaseUnavailable,
+                    )
+                })?;
+                lhs.try_case_fold_simple().map_err(|_| {
+                    self.error(
+                        op.lhs.span().clone(),
+                        ErrorKind::UnicodeCaseUnavailable,
+                    )
+                })?;
+            }
+            match op.kind {
+                Intersection => lhs.intersect(&rhs),
+                Difference => lhs.difference(&rhs),
+                SymmetricDifference => lhs.symmetric_difference(&rhs),
+            }
+            cls.union(&lhs);
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let mut rhs = self.pop().unwrap().unwrap_class_bytes();
+            let mut lhs = self.pop().unwrap().unwrap_class_bytes();
+            let mut cls = self.pop().unwrap().unwrap_class_bytes();
+            if self.flags().case_insensitive() {
+                rhs.case_fold_simple();
+                lhs.case_fold_simple();
+            }
+            match op.kind {
+                Intersection => lhs.intersect(&rhs),
+                Difference => lhs.difference(&rhs),
+                SymmetricDifference => lhs.symmetric_difference(&rhs),
+            }
+            cls.union(&lhs);
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+}
+
+/// The internal implementation of a translator.
+///
+/// This type is responsible for carrying around the original pattern string,
+/// which is not tied to the internal state of a translator.
+///
+/// A TranslatorI exists for the time it takes to translate a single Ast.
+#[derive(Clone, Debug)]
+struct TranslatorI<'t, 'p> {
+    trans: &'t Translator,
+    pattern: &'p str,
+}
+
+impl<'t, 'p> TranslatorI<'t, 'p> {
+    /// Build a new internal translator.
+    fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
+        TranslatorI { trans, pattern }
+    }
+
+    /// Return a reference to the underlying translator.
+    fn trans(&self) -> &Translator {
+        &self.trans
+    }
+
+    /// Push the given frame on to the call stack.
+    fn push(&self, frame: HirFrame) {
+        self.trans().stack.borrow_mut().push(frame);
+    }
+
+    /// Push the given literal char on to the call stack.
+    ///
+    /// If the top-most element of the stack is a literal, then the char
+    /// is appended to the end of that literal. Otherwise, a new literal
+    /// containing just the given char is pushed to the top of the stack.
+    fn push_char(&self, ch: char) {
+        let mut buf = [0; 4];
+        let bytes = ch.encode_utf8(&mut buf).as_bytes();
+        let mut stack = self.trans().stack.borrow_mut();
+        if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
+            literal.extend_from_slice(bytes);
+        } else {
+            stack.push(HirFrame::Literal(bytes.to_vec()));
+        }
+    }
+
+    /// Push the given literal byte on to the call stack.
+    ///
+    /// If the top-most element of the stack is a literal, then the byte
+    /// is appended to the end of that literal. Otherwise, a new literal
+    /// containing just the given byte is pushed to the top of the stack.
+    fn push_byte(&self, byte: u8) {
+        let mut stack = self.trans().stack.borrow_mut();
+        if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
+            literal.push(byte);
+        } else {
+            stack.push(HirFrame::Literal(vec![byte]));
+        }
+    }
+
+    /// Pop the top of the call stack. If the call stack is empty, return None.
+    fn pop(&self) -> Option<HirFrame> {
+        self.trans().stack.borrow_mut().pop()
+    }
+
+    /// Pop an HIR expression from the top of the stack for a concatenation.
+    ///
+    /// This returns None if the stack is empty or when a concat frame is seen.
+    /// Otherwise, it panics if it could not find an HIR expression.
+    fn pop_concat_expr(&self) -> Option<Hir> {
+        let frame = self.pop()?;
+        match frame {
+            HirFrame::Concat => None,
+            HirFrame::Expr(expr) => Some(expr),
+            HirFrame::Literal(lit) => Some(Hir::literal(lit)),
+            HirFrame::ClassUnicode(_) => {
+                unreachable!("expected expr or concat, got Unicode class")
+            }
+            HirFrame::ClassBytes(_) => {
+                unreachable!("expected expr or concat, got byte class")
+            }
+            HirFrame::Repetition => {
+                unreachable!("expected expr or concat, got repetition")
+            }
+            HirFrame::Group { .. } => {
+                unreachable!("expected expr or concat, got group")
+            }
+            HirFrame::Alternation => {
+                unreachable!("expected expr or concat, got alt marker")
+            }
+            HirFrame::AlternationBranch => {
+                unreachable!("expected expr or concat, got alt branch marker")
+            }
+        }
+    }
+
+    /// Pop an HIR expression from the top of the stack for an alternation.
+    ///
+    /// This returns None if the stack is empty or when an alternation frame is
+    /// seen. Otherwise, it panics if it could not find an HIR expression.
+    fn pop_alt_expr(&self) -> Option<Hir> {
+        let frame = self.pop()?;
+        match frame {
+            HirFrame::Alternation => None,
+            HirFrame::Expr(expr) => Some(expr),
+            HirFrame::Literal(lit) => Some(Hir::literal(lit)),
+            HirFrame::ClassUnicode(_) => {
+                unreachable!("expected expr or alt, got Unicode class")
+            }
+            HirFrame::ClassBytes(_) => {
+                unreachable!("expected expr or alt, got byte class")
+            }
+            HirFrame::Repetition => {
+                unreachable!("expected expr or alt, got repetition")
+            }
+            HirFrame::Group { .. } => {
+                unreachable!("expected expr or alt, got group")
+            }
+            HirFrame::Concat => {
+                unreachable!("expected expr or alt, got concat marker")
+            }
+            HirFrame::AlternationBranch => {
+                unreachable!("expected expr or alt, got alt branch marker")
+            }
+        }
+    }
+
+    /// Create a new error with the given span and error type.
+    fn error(&self, span: Span, kind: ErrorKind) -> Error {
+        Error { kind, pattern: self.pattern.to_string(), span }
+    }
+
+    /// Return a copy of the active flags.
+    fn flags(&self) -> Flags {
+        self.trans().flags.get()
+    }
+
+    /// Set the flags of this translator from the flags set in the given AST.
+    /// Then, return the old flags.
+    fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
+        let old_flags = self.flags();
+        let mut new_flags = Flags::from_ast(ast_flags);
+        new_flags.merge(&old_flags);
+        self.trans().flags.set(new_flags);
+        old_flags
+    }
+
+    /// Convert an Ast literal to its scalar representation.
+    ///
+    /// When Unicode mode is enabled, then this always succeeds and returns a
+    /// `char` (Unicode scalar value).
+    ///
+    /// When Unicode mode is disabled, then a `char` will still be returned
+    /// whenever possible. A byte is returned only when invalid UTF-8 is
+    /// allowed and when the byte is not ASCII. Otherwise, a non-ASCII byte
+    /// will result in an error when invalid UTF-8 is not allowed.
+    fn ast_literal_to_scalar(
+        &self,
+        lit: &ast::Literal,
+    ) -> Result<Either<char, u8>> {
+        if self.flags().unicode() {
+            return Ok(Either::Left(lit.c));
+        }
+        let byte = match lit.byte() {
+            None => return Ok(Either::Left(lit.c)),
+            Some(byte) => byte,
+        };
+        if byte <= 0x7F {
+            return Ok(Either::Left(char::try_from(byte).unwrap()));
+        }
+        if self.trans().utf8 {
+            return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
+        }
+        Ok(Either::Right(byte))
+    }
+
+    fn case_fold_char(&self, span: Span, c: char) -> Result<Option<Hir>> {
+        if !self.flags().case_insensitive() {
+            return Ok(None);
+        }
+        if self.flags().unicode() {
+            // If case folding won't do anything, then don't bother trying.
+            let map = unicode::SimpleCaseFolder::new()
+                .map(|f| f.overlaps(c, c))
+                .map_err(|_| {
+                    self.error(span, ErrorKind::UnicodeCaseUnavailable)
+                })?;
+            if !map {
+                return Ok(None);
+            }
+            let mut cls =
+                hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
+                    c, c,
+                )]);
+            cls.try_case_fold_simple().map_err(|_| {
+                self.error(span, ErrorKind::UnicodeCaseUnavailable)
+            })?;
+            Ok(Some(Hir::class(hir::Class::Unicode(cls))))
+        } else {
+            if !c.is_ascii() {
+                return Ok(None);
+            }
+            // If case folding won't do anything, then don't bother trying.
+            match c {
+                'A'..='Z' | 'a'..='z' => {}
+                _ => return Ok(None),
+            }
+            let mut cls =
+                hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
+                    // OK because 'c.len_utf8() == 1' which in turn implies
+                    // that 'c' is ASCII.
+                    u8::try_from(c).unwrap(),
+                    u8::try_from(c).unwrap(),
+                )]);
+            cls.case_fold_simple();
+            Ok(Some(Hir::class(hir::Class::Bytes(cls))))
+        }
+    }
+
+    fn hir_dot(&self, span: Span) -> Result<Hir> {
+        let (utf8, lineterm, flags) =
+            (self.trans().utf8, self.trans().line_terminator, self.flags());
+        if utf8 && (!flags.unicode() || !lineterm.is_ascii()) {
+            return Err(self.error(span, ErrorKind::InvalidUtf8));
+        }
+        let dot = if flags.dot_matches_new_line() {
+            if flags.unicode() {
+                hir::Dot::AnyChar
+            } else {
+                hir::Dot::AnyByte
+            }
+        } else {
+            if flags.unicode() {
+                if flags.crlf() {
+                    hir::Dot::AnyCharExceptCRLF
+                } else {
+                    if !lineterm.is_ascii() {
+                        return Err(
+                            self.error(span, ErrorKind::InvalidLineTerminator)
+                        );
+                    }
+                    hir::Dot::AnyCharExcept(char::from(lineterm))
+                }
+            } else {
+                if flags.crlf() {
+                    hir::Dot::AnyByteExceptCRLF
+                } else {
+                    hir::Dot::AnyByteExcept(lineterm)
+                }
+            }
+        };
+        Ok(Hir::dot(dot))
+    }
+
+    fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
+        let unicode = self.flags().unicode();
+        let multi_line = self.flags().multi_line();
+        let crlf = self.flags().crlf();
+        Ok(match asst.kind {
+            ast::AssertionKind::StartLine => Hir::look(if multi_line {
+                if crlf {
+                    hir::Look::StartCRLF
+                } else {
+                    hir::Look::StartLF
+                }
+            } else {
+                hir::Look::Start
+            }),
+            ast::AssertionKind::EndLine => Hir::look(if multi_line {
+                if crlf {
+                    hir::Look::EndCRLF
+                } else {
+                    hir::Look::EndLF
+                }
+            } else {
+                hir::Look::End
+            }),
+            ast::AssertionKind::StartText => Hir::look(hir::Look::Start),
+            ast::AssertionKind::EndText => Hir::look(hir::Look::End),
+            ast::AssertionKind::WordBoundary => Hir::look(if unicode {
+                hir::Look::WordUnicode
+            } else {
+                hir::Look::WordAscii
+            }),
+            ast::AssertionKind::NotWordBoundary => Hir::look(if unicode {
+                hir::Look::WordUnicodeNegate
+            } else {
+                hir::Look::WordAsciiNegate
+            }),
+            ast::AssertionKind::WordBoundaryStart
+            | ast::AssertionKind::WordBoundaryStartAngle => {
+                Hir::look(if unicode {
+                    hir::Look::WordStartUnicode
+                } else {
+                    hir::Look::WordStartAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryEnd
+            | ast::AssertionKind::WordBoundaryEndAngle => {
+                Hir::look(if unicode {
+                    hir::Look::WordEndUnicode
+                } else {
+                    hir::Look::WordEndAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryStartHalf => {
+                Hir::look(if unicode {
+                    hir::Look::WordStartHalfUnicode
+                } else {
+                    hir::Look::WordStartHalfAscii
+                })
+            }
+            ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode {
+                hir::Look::WordEndHalfUnicode
+            } else {
+                hir::Look::WordEndHalfAscii
+            }),
+        })
+    }
+
+    fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir {
+        let (index, name) = match group.kind {
+            ast::GroupKind::CaptureIndex(index) => (index, None),
+            ast::GroupKind::CaptureName { ref name, .. } => {
+                (name.index, Some(name.name.clone().into_boxed_str()))
+            }
+            // The HIR doesn't need to use non-capturing groups, since the way
+            // in which the data type is defined handles this automatically.
+            ast::GroupKind::NonCapturing(_) => return expr,
+        };
+        Hir::capture(hir::Capture { index, name, sub: Box::new(expr) })
+    }
+
+    fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
+        let (min, max) = match rep.op.kind {
+            ast::RepetitionKind::ZeroOrOne => (0, Some(1)),
+            ast::RepetitionKind::ZeroOrMore => (0, None),
+            ast::RepetitionKind::OneOrMore => (1, None),
+            ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
+                (m, Some(m))
+            }
+            ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
+                (m, None)
+            }
+            ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
+                m,
+                n,
+            )) => (m, Some(n)),
+        };
+        let greedy =
+            if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
+        Hir::repetition(hir::Repetition {
+            min,
+            max,
+            greedy,
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_unicode_class(
+        &self,
+        ast_class: &ast::ClassUnicode,
+    ) -> Result<hir::ClassUnicode> {
+        use crate::ast::ClassUnicodeKind::*;
+
+        if !self.flags().unicode() {
+            return Err(
+                self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
+            );
+        }
+        let query = match ast_class.kind {
+            OneLetter(name) => ClassQuery::OneLetter(name),
+            Named(ref name) => ClassQuery::Binary(name),
+            NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
+                property_name: name,
+                property_value: value,
+            },
+        };
+        let mut result = self.convert_unicode_class_error(
+            &ast_class.span,
+            unicode::class(query),
+        );
+        if let Ok(ref mut class) = result {
+            self.unicode_fold_and_negate(
+                &ast_class.span,
+                ast_class.negated,
+                class,
+            )?;
+        }
+        result
+    }
+
+    fn hir_ascii_unicode_class(
+        &self,
+        ast: &ast::ClassAscii,
+    ) -> Result<hir::ClassUnicode> {
+        let mut cls = hir::ClassUnicode::new(
+            ascii_class_as_chars(&ast.kind)
+                .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
+        );
+        self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
+        Ok(cls)
+    }
+
+    fn hir_ascii_byte_class(
+        &self,
+        ast: &ast::ClassAscii,
+    ) -> Result<hir::ClassBytes> {
+        let mut cls = hir::ClassBytes::new(
+            ascii_class(&ast.kind)
+                .map(|(s, e)| hir::ClassBytesRange::new(s, e)),
+        );
+        self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
+        Ok(cls)
+    }
+
+    fn hir_perl_unicode_class(
+        &self,
+        ast_class: &ast::ClassPerl,
+    ) -> Result<hir::ClassUnicode> {
+        use crate::ast::ClassPerlKind::*;
+
+        assert!(self.flags().unicode());
+        let result = match ast_class.kind {
+            Digit => unicode::perl_digit(),
+            Space => unicode::perl_space(),
+            Word => unicode::perl_word(),
+        };
+        let mut class =
+            self.convert_unicode_class_error(&ast_class.span, result)?;
+        // We needn't apply case folding here because the Perl Unicode classes
+        // are already closed under Unicode simple case folding.
+        if ast_class.negated {
+            class.negate();
+        }
+        Ok(class)
+    }
+
+    fn hir_perl_byte_class(
+        &self,
+        ast_class: &ast::ClassPerl,
+    ) -> Result<hir::ClassBytes> {
+        use crate::ast::ClassPerlKind::*;
+
+        assert!(!self.flags().unicode());
+        let mut class = match ast_class.kind {
+            Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
+            Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
+            Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
+        };
+        // We needn't apply case folding here because the Perl ASCII classes
+        // are already closed (under ASCII case folding).
+        if ast_class.negated {
+            class.negate();
+        }
+        // Negating a Perl byte class is likely to cause it to match invalid
+        // UTF-8. That's only OK if the translator is configured to allow such
+        // things.
+        if self.trans().utf8 && !class.is_ascii() {
+            return Err(self.error(ast_class.span, ErrorKind::InvalidUtf8));
+        }
+        Ok(class)
+    }
+
+    /// Converts the given Unicode specific error to an HIR translation error.
+    ///
+    /// The span given should approximate the position at which an error would
+    /// occur.
+    fn convert_unicode_class_error(
+        &self,
+        span: &Span,
+        result: core::result::Result<hir::ClassUnicode, unicode::Error>,
+    ) -> Result<hir::ClassUnicode> {
+        result.map_err(|err| {
+            let sp = span.clone();
+            match err {
+                unicode::Error::PropertyNotFound => {
+                    self.error(sp, ErrorKind::UnicodePropertyNotFound)
+                }
+                unicode::Error::PropertyValueNotFound => {
+                    self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
+                }
+                unicode::Error::PerlClassNotFound => {
+                    self.error(sp, ErrorKind::UnicodePerlClassNotFound)
+                }
+            }
+        })
+    }
+
+    fn unicode_fold_and_negate(
+        &self,
+        span: &Span,
+        negated: bool,
+        class: &mut hir::ClassUnicode,
+    ) -> Result<()> {
+        // Note that we must apply case folding before negation!
+        // Consider `(?i)[^x]`. If we applied negation first, then
+        // the result would be the character class that matched any
+        // Unicode scalar value.
+        if self.flags().case_insensitive() {
+            class.try_case_fold_simple().map_err(|_| {
+                self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
+            })?;
+        }
+        if negated {
+            class.negate();
+        }
+        Ok(())
+    }
+
+    fn bytes_fold_and_negate(
+        &self,
+        span: &Span,
+        negated: bool,
+        class: &mut hir::ClassBytes,
+    ) -> Result<()> {
+        // Note that we must apply case folding before negation!
+        // Consider `(?i)[^x]`. If we applied negation first, then
+        // the result would be the character class that matched any
+        // Unicode scalar value.
+        if self.flags().case_insensitive() {
+            class.case_fold_simple();
+        }
+        if negated {
+            class.negate();
+        }
+        if self.trans().utf8 && !class.is_ascii() {
+            return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
+        }
+        Ok(())
+    }
+
+    /// Return a scalar byte value suitable for use as a literal in a byte
+    /// character class.
+    fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
+        match self.ast_literal_to_scalar(ast)? {
+            Either::Right(byte) => Ok(byte),
+            Either::Left(ch) => {
+                if ch.is_ascii() {
+                    Ok(u8::try_from(ch).unwrap())
+                } else {
+                    // We can't feasibly support Unicode in
+                    // byte oriented classes. Byte classes don't
+                    // do Unicode case folding.
+                    Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
+                }
+            }
+        }
+    }
+}
+
+/// A translator's representation of a regular expression's flags at any given
+/// moment in time.
+///
+/// Each flag can be in one of three states: absent, present but disabled or
+/// present but enabled.
+#[derive(Clone, Copy, Debug, Default)]
+struct Flags {
+    case_insensitive: Option<bool>,
+    multi_line: Option<bool>,
+    dot_matches_new_line: Option<bool>,
+    swap_greed: Option<bool>,
+    unicode: Option<bool>,
+    crlf: Option<bool>,
+    // Note that `ignore_whitespace` is omitted here because it is handled
+    // entirely in the parser.
+}
+
+impl Flags {
+    fn from_ast(ast: &ast::Flags) -> Flags {
+        let mut flags = Flags::default();
+        let mut enable = true;
+        for item in &ast.items {
+            match item.kind {
+                ast::FlagsItemKind::Negation => {
+                    enable = false;
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
+                    flags.case_insensitive = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
+                    flags.multi_line = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
+                    flags.dot_matches_new_line = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
+                    flags.swap_greed = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
+                    flags.unicode = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::CRLF) => {
+                    flags.crlf = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
+            }
+        }
+        flags
+    }
+
+    fn merge(&mut self, previous: &Flags) {
+        if self.case_insensitive.is_none() {
+            self.case_insensitive = previous.case_insensitive;
+        }
+        if self.multi_line.is_none() {
+            self.multi_line = previous.multi_line;
+        }
+        if self.dot_matches_new_line.is_none() {
+            self.dot_matches_new_line = previous.dot_matches_new_line;
+        }
+        if self.swap_greed.is_none() {
+            self.swap_greed = previous.swap_greed;
+        }
+        if self.unicode.is_none() {
+            self.unicode = previous.unicode;
+        }
+        if self.crlf.is_none() {
+            self.crlf = previous.crlf;
+        }
+    }
+
+    fn case_insensitive(&self) -> bool {
+        self.case_insensitive.unwrap_or(false)
+    }
+
+    fn multi_line(&self) -> bool {
+        self.multi_line.unwrap_or(false)
+    }
+
+    fn dot_matches_new_line(&self) -> bool {
+        self.dot_matches_new_line.unwrap_or(false)
+    }
+
+    fn swap_greed(&self) -> bool {
+        self.swap_greed.unwrap_or(false)
+    }
+
+    fn unicode(&self) -> bool {
+        self.unicode.unwrap_or(true)
+    }
+
+    fn crlf(&self) -> bool {
+        self.crlf.unwrap_or(false)
+    }
+}
+
+fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
+    let ranges: Vec<_> = ascii_class(kind)
+        .map(|(s, e)| hir::ClassBytesRange::new(s, e))
+        .collect();
+    hir::ClassBytes::new(ranges)
+}
+
+fn ascii_class(kind: &ast::ClassAsciiKind) -> impl Iterator<Item = (u8, u8)> {
+    use crate::ast::ClassAsciiKind::*;
+
+    let slice: &'static [(u8, u8)] = match *kind {
+        Alnum => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')],
+        Alpha => &[(b'A', b'Z'), (b'a', b'z')],
+        Ascii => &[(b'\x00', b'\x7F')],
+        Blank => &[(b'\t', b'\t'), (b' ', b' ')],
+        Cntrl => &[(b'\x00', b'\x1F'), (b'\x7F', b'\x7F')],
+        Digit => &[(b'0', b'9')],
+        Graph => &[(b'!', b'~')],
+        Lower => &[(b'a', b'z')],
+        Print => &[(b' ', b'~')],
+        Punct => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')],
+        Space => &[
+            (b'\t', b'\t'),
+            (b'\n', b'\n'),
+            (b'\x0B', b'\x0B'),
+            (b'\x0C', b'\x0C'),
+            (b'\r', b'\r'),
+            (b' ', b' '),
+        ],
+        Upper => &[(b'A', b'Z')],
+        Word => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')],
+        Xdigit => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')],
+    };
+    slice.iter().copied()
+}
+
+fn ascii_class_as_chars(
+    kind: &ast::ClassAsciiKind,
+) -> impl Iterator<Item = (char, char)> {
+    ascii_class(kind).map(|(s, e)| (char::from(s), char::from(e)))
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{
+        ast::{self, parse::ParserBuilder, Ast, Position, Span},
+        hir::{self, Hir, HirKind, Look, Properties},
+        unicode::{self, ClassQuery},
+    };
+
+    use super::*;
+
+    // We create these errors to compare with real hir::Errors in the tests.
+    // We define equality between TestError and hir::Error to disregard the
+    // pattern string in hir::Error, which is annoying to provide in tests.
+    #[derive(Clone, Debug)]
+    struct TestError {
+        span: Span,
+        kind: hir::ErrorKind,
+    }
+
+    impl PartialEq<hir::Error> for TestError {
+        fn eq(&self, other: &hir::Error) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    impl PartialEq<TestError> for hir::Error {
+        fn eq(&self, other: &TestError) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    fn parse(pattern: &str) -> Ast {
+        ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
+    }
+
+    fn t(pattern: &str) -> Hir {
+        TranslatorBuilder::new()
+            .utf8(true)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap()
+    }
+
+    fn t_err(pattern: &str) -> hir::Error {
+        TranslatorBuilder::new()
+            .utf8(true)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap_err()
+    }
+
+    fn t_bytes(pattern: &str) -> Hir {
+        TranslatorBuilder::new()
+            .utf8(false)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap()
+    }
+
+    fn props(pattern: &str) -> Properties {
+        t(pattern).properties().clone()
+    }
+
+    fn props_bytes(pattern: &str) -> Properties {
+        t_bytes(pattern).properties().clone()
+    }
+
+    fn hir_lit(s: &str) -> Hir {
+        hir_blit(s.as_bytes())
+    }
+
+    fn hir_blit(s: &[u8]) -> Hir {
+        Hir::literal(s)
+    }
+
+    fn hir_capture(index: u32, expr: Hir) -> Hir {
+        Hir::capture(hir::Capture { index, name: None, sub: Box::new(expr) })
+    }
+
+    fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir {
+        Hir::capture(hir::Capture {
+            index,
+            name: Some(name.into()),
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_quest(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            min: 0,
+            max: Some(1),
+            greedy,
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_star(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            min: 0,
+            max: None,
+            greedy,
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_plus(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            min: 1,
+            max: None,
+            greedy,
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_range(greedy: bool, min: u32, max: Option<u32>, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            min,
+            max,
+            greedy,
+            sub: Box::new(expr),
+        })
+    }
+
+    fn hir_alt(alts: Vec<Hir>) -> Hir {
+        Hir::alternation(alts)
+    }
+
+    fn hir_cat(exprs: Vec<Hir>) -> Hir {
+        Hir::concat(exprs)
+    }
+
+    #[allow(dead_code)]
+    fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
+        Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
+    }
+
+    #[allow(dead_code)]
+    fn hir_uclass_perl_word() -> Hir {
+        Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
+    }
+
+    fn hir_ascii_uclass(kind: &ast::ClassAsciiKind) -> Hir {
+        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(
+            ascii_class_as_chars(kind)
+                .map(|(s, e)| hir::ClassUnicodeRange::new(s, e)),
+        )))
+    }
+
+    fn hir_ascii_bclass(kind: &ast::ClassAsciiKind) -> Hir {
+        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(
+            ascii_class(kind).map(|(s, e)| hir::ClassBytesRange::new(s, e)),
+        )))
+    }
+
+    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
+        Hir::class(uclass(ranges))
+    }
+
+    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
+        Hir::class(bclass(ranges))
+    }
+
+    fn hir_case_fold(expr: Hir) -> Hir {
+        match expr.into_kind() {
+            HirKind::Class(mut cls) => {
+                cls.case_fold_simple();
+                Hir::class(cls)
+            }
+            _ => panic!("cannot case fold non-class Hir expr"),
+        }
+    }
+
+    fn hir_negate(expr: Hir) -> Hir {
+        match expr.into_kind() {
+            HirKind::Class(mut cls) => {
+                cls.negate();
+                Hir::class(cls)
+            }
+            _ => panic!("cannot negate non-class Hir expr"),
+        }
+    }
+
+    fn uclass(ranges: &[(char, char)]) -> hir::Class {
+        let ranges: Vec<hir::ClassUnicodeRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+            .collect();
+        hir::Class::Unicode(hir::ClassUnicode::new(ranges))
+    }
+
+    fn bclass(ranges: &[(u8, u8)]) -> hir::Class {
+        let ranges: Vec<hir::ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
+            .collect();
+        hir::Class::Bytes(hir::ClassBytes::new(ranges))
+    }
+
+    #[cfg(feature = "unicode-case")]
+    fn class_case_fold(mut cls: hir::Class) -> Hir {
+        cls.case_fold_simple();
+        Hir::class(cls)
+    }
+
+    fn class_negate(mut cls: hir::Class) -> Hir {
+        cls.negate();
+        Hir::class(cls)
+    }
+
+    #[allow(dead_code)]
+    fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
+        use crate::hir::Class::{Bytes, Unicode};
+
+        match (expr1.into_kind(), expr2.into_kind()) {
+            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
+                c1.union(&c2);
+                Hir::class(hir::Class::Unicode(c1))
+            }
+            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
+                c1.union(&c2);
+                Hir::class(hir::Class::Bytes(c1))
+            }
+            _ => panic!("cannot union non-class Hir exprs"),
+        }
+    }
+
+    #[allow(dead_code)]
+    fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
+        use crate::hir::Class::{Bytes, Unicode};
+
+        match (expr1.into_kind(), expr2.into_kind()) {
+            (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
+                c1.difference(&c2);
+                Hir::class(hir::Class::Unicode(c1))
+            }
+            (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
+                c1.difference(&c2);
+                Hir::class(hir::Class::Bytes(c1))
+            }
+            _ => panic!("cannot difference non-class Hir exprs"),
+        }
+    }
+
+    fn hir_look(look: hir::Look) -> Hir {
+        Hir::look(look)
+    }
+
+    #[test]
+    fn empty() {
+        assert_eq!(t(""), Hir::empty());
+        assert_eq!(t("(?i)"), Hir::empty());
+        assert_eq!(t("()"), hir_capture(1, Hir::empty()));
+        assert_eq!(t("(?:)"), Hir::empty());
+        assert_eq!(t("(?P<wat>)"), hir_capture_name(1, "wat", Hir::empty()));
+        assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
+        assert_eq!(
+            t("()|()"),
+            hir_alt(vec![
+                hir_capture(1, Hir::empty()),
+                hir_capture(2, Hir::empty()),
+            ])
+        );
+        assert_eq!(
+            t("(|b)"),
+            hir_capture(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
+        );
+        assert_eq!(
+            t("(a|)"),
+            hir_capture(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
+        );
+        assert_eq!(
+            t("(a||c)"),
+            hir_capture(
+                1,
+                hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
+            )
+        );
+        assert_eq!(
+            t("(||)"),
+            hir_capture(
+                1,
+                hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
+            )
+        );
+    }
+
+    #[test]
+    fn literal() {
+        assert_eq!(t("a"), hir_lit("a"));
+        assert_eq!(t("(?-u)a"), hir_lit("a"));
+        assert_eq!(t("☃"), hir_lit("☃"));
+        assert_eq!(t("abcd"), hir_lit("abcd"));
+
+        assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
+        assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
+        assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
+        assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
+
+        assert_eq!(t("(?-u)☃"), hir_lit("☃"));
+        assert_eq!(
+            t_err(r"(?-u)\xFF"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(9, 1, 10)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn literal_case_insensitive() {
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t("(?i:a)"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("a(?i)a(?-i)a"),
+            hir_cat(vec![
+                hir_lit("a"),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_lit("a"),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)ab@c"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_uclass(&[('B', 'B'), ('b', 'b')]),
+                hir_lit("@"),
+                hir_uclass(&[('C', 'C'), ('c', 'c')]),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)β"),
+            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
+        );
+
+        assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?-u)a(?i)a(?-i)a"),
+            hir_cat(vec![
+                hir_lit("a"),
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+                hir_lit("a"),
+            ])
+        );
+        assert_eq!(
+            t("(?i-u)ab@c"),
+            hir_cat(vec![
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+                hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
+                hir_lit("@"),
+                hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
+            ])
+        );
+
+        assert_eq!(
+            t_bytes("(?i-u)a"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+        );
+        assert_eq!(
+            t_bytes("(?i-u)\x61"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+        );
+        assert_eq!(
+            t_bytes(r"(?i-u)\x61"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
+        );
+        assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
+
+        assert_eq!(t("(?i-u)β"), hir_lit("β"),);
+    }
+
+    #[test]
+    fn dot() {
+        assert_eq!(
+            t("."),
+            hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}')])
+        );
+        assert_eq!(
+            t("(?R)."),
+            hir_uclass(&[
+                ('\0', '\t'),
+                ('\x0B', '\x0C'),
+                ('\x0E', '\u{10FFFF}'),
+            ])
+        );
+        assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
+        assert_eq!(t("(?Rs)."), hir_uclass(&[('\0', '\u{10FFFF}')]));
+        assert_eq!(
+            t_bytes("(?-u)."),
+            hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF')])
+        );
+        assert_eq!(
+            t_bytes("(?R-u)."),
+            hir_bclass(&[
+                (b'\0', b'\t'),
+                (b'\x0B', b'\x0C'),
+                (b'\x0E', b'\xFF'),
+            ])
+        );
+        assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
+        assert_eq!(t_bytes("(?Rs-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
+
+        // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
+        assert_eq!(
+            t_err("(?-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(6, 1, 7)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err("(?R-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err("(?s-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err("(?Rs-u)."),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(7, 1, 8),
+                    Position::new(8, 1, 9)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn assertions() {
+        assert_eq!(t("^"), hir_look(hir::Look::Start));
+        assert_eq!(t("$"), hir_look(hir::Look::End));
+        assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
+        assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
+        assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
+
+        assert_eq!(t(r"\b"), hir_look(hir::Look::WordUnicode));
+        assert_eq!(t(r"\B"), hir_look(hir::Look::WordUnicodeNegate));
+        assert_eq!(t(r"(?-u)\b"), hir_look(hir::Look::WordAscii));
+        assert_eq!(t(r"(?-u)\B"), hir_look(hir::Look::WordAsciiNegate));
+    }
+
+    #[test]
+    fn group() {
+        assert_eq!(t("(a)"), hir_capture(1, hir_lit("a")));
+        assert_eq!(
+            t("(a)(b)"),
+            hir_cat(vec![
+                hir_capture(1, hir_lit("a")),
+                hir_capture(2, hir_lit("b")),
+            ])
+        );
+        assert_eq!(
+            t("(a)|(b)"),
+            hir_alt(vec![
+                hir_capture(1, hir_lit("a")),
+                hir_capture(2, hir_lit("b")),
+            ])
+        );
+        assert_eq!(t("(?P<foo>)"), hir_capture_name(1, "foo", Hir::empty()));
+        assert_eq!(t("(?P<foo>a)"), hir_capture_name(1, "foo", hir_lit("a")));
+        assert_eq!(
+            t("(?P<foo>a)(?P<bar>b)"),
+            hir_cat(vec![
+                hir_capture_name(1, "foo", hir_lit("a")),
+                hir_capture_name(2, "bar", hir_lit("b")),
+            ])
+        );
+        assert_eq!(t("(?:)"), Hir::empty());
+        assert_eq!(t("(?:a)"), hir_lit("a"));
+        assert_eq!(
+            t("(?:a)(b)"),
+            hir_cat(vec![hir_lit("a"), hir_capture(1, hir_lit("b")),])
+        );
+        assert_eq!(
+            t("(a)(?:b)(c)"),
+            hir_cat(vec![
+                hir_capture(1, hir_lit("a")),
+                hir_lit("b"),
+                hir_capture(2, hir_lit("c")),
+            ])
+        );
+        assert_eq!(
+            t("(a)(?P<foo>b)(c)"),
+            hir_cat(vec![
+                hir_capture(1, hir_lit("a")),
+                hir_capture_name(2, "foo", hir_lit("b")),
+                hir_capture(3, hir_lit("c")),
+            ])
+        );
+        assert_eq!(t("()"), hir_capture(1, Hir::empty()));
+        assert_eq!(t("((?i))"), hir_capture(1, Hir::empty()));
+        assert_eq!(t("((?x))"), hir_capture(1, Hir::empty()));
+        assert_eq!(
+            t("(((?x)))"),
+            hir_capture(1, hir_capture(2, Hir::empty()))
+        );
+    }
+
+    #[test]
+    fn line_anchors() {
+        assert_eq!(t("^"), hir_look(hir::Look::Start));
+        assert_eq!(t("$"), hir_look(hir::Look::End));
+        assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"\z"), hir_look(hir::Look::End));
+
+        assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
+        assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
+
+        assert_eq!(t(r"(?R)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?R)\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?R)^"), hir_look(hir::Look::Start));
+        assert_eq!(t("(?R)$"), hir_look(hir::Look::End));
+
+        assert_eq!(t(r"(?Rm)\A"), hir_look(hir::Look::Start));
+        assert_eq!(t(r"(?Rm)\z"), hir_look(hir::Look::End));
+        assert_eq!(t("(?Rm)^"), hir_look(hir::Look::StartCRLF));
+        assert_eq!(t("(?Rm)$"), hir_look(hir::Look::EndCRLF));
+    }
+
+    #[test]
+    fn flags() {
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i:a)a"),
+            hir_cat(
+                vec![hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"),]
+            )
+        );
+        assert_eq!(
+            t("(?i-u:a)β"),
+            hir_cat(vec![
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+                hir_lit("β"),
+            ])
+        );
+        assert_eq!(
+            t("(?:(?i-u)a)b"),
+            hir_cat(vec![
+                hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+                hir_lit("b"),
+            ])
+        );
+        assert_eq!(
+            t("((?i-u)a)b"),
+            hir_cat(vec![
+                hir_capture(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+                hir_lit("b"),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)(?-i:a)a"),
+            hir_cat(
+                vec![hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]),]
+            )
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?im)a^"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_look(hir::Look::StartLF),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?im)a^(?i-m)a^"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_look(hir::Look::StartLF),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_look(hir::Look::Start),
+            ])
+        );
+        assert_eq!(
+            t("(?U)a*a*?(?-U)a*a*?"),
+            hir_cat(vec![
+                hir_star(false, hir_lit("a")),
+                hir_star(true, hir_lit("a")),
+                hir_star(true, hir_lit("a")),
+                hir_star(false, hir_lit("a")),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?:a(?i)a)a"),
+            hir_cat(vec![
+                hir_cat(vec![
+                    hir_lit("a"),
+                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                ]),
+                hir_lit("a"),
+            ])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)(?:a(?-i)a)a"),
+            hir_cat(vec![
+                hir_cat(vec![
+                    hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                    hir_lit("a"),
+                ]),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            ])
+        );
+    }
+
+    #[test]
+    fn escape() {
+        assert_eq!(
+            t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
+            hir_lit(r"\.+*?()|[]{}^$#")
+        );
+    }
+
+    #[test]
+    fn repetition() {
+        assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
+        assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
+        assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
+        assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
+        assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
+        assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
+
+        assert_eq!(t("a{1}"), hir_range(true, 1, Some(1), hir_lit("a"),));
+        assert_eq!(t("a{1,}"), hir_range(true, 1, None, hir_lit("a"),));
+        assert_eq!(t("a{1,2}"), hir_range(true, 1, Some(2), hir_lit("a"),));
+        assert_eq!(t("a{1}?"), hir_range(false, 1, Some(1), hir_lit("a"),));
+        assert_eq!(t("a{1,}?"), hir_range(false, 1, None, hir_lit("a"),));
+        assert_eq!(t("a{1,2}?"), hir_range(false, 1, Some(2), hir_lit("a"),));
+
+        assert_eq!(
+            t("ab?"),
+            hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
+        );
+        assert_eq!(t("(ab)?"), hir_quest(true, hir_capture(1, hir_lit("ab"))));
+        assert_eq!(
+            t("a|b?"),
+            hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
+        );
+    }
+
+    #[test]
+    fn cat_alt() {
+        let a = || hir_look(hir::Look::Start);
+        let b = || hir_look(hir::Look::End);
+        let c = || hir_look(hir::Look::WordUnicode);
+        let d = || hir_look(hir::Look::WordUnicodeNegate);
+
+        assert_eq!(t("(^$)"), hir_capture(1, hir_cat(vec![a(), b()])));
+        assert_eq!(t("^|$"), hir_alt(vec![a(), b()]));
+        assert_eq!(t(r"^|$|\b"), hir_alt(vec![a(), b(), c()]));
+        assert_eq!(
+            t(r"^$|$\b|\b\B"),
+            hir_alt(vec![
+                hir_cat(vec![a(), b()]),
+                hir_cat(vec![b(), c()]),
+                hir_cat(vec![c(), d()]),
+            ])
+        );
+        assert_eq!(t("(^|$)"), hir_capture(1, hir_alt(vec![a(), b()])));
+        assert_eq!(
+            t(r"(^|$|\b)"),
+            hir_capture(1, hir_alt(vec![a(), b(), c()]))
+        );
+        assert_eq!(
+            t(r"(^$|$\b|\b\B)"),
+            hir_capture(
+                1,
+                hir_alt(vec![
+                    hir_cat(vec![a(), b()]),
+                    hir_cat(vec![b(), c()]),
+                    hir_cat(vec![c(), d()]),
+                ])
+            )
+        );
+        assert_eq!(
+            t(r"(^$|($\b|(\b\B)))"),
+            hir_capture(
+                1,
+                hir_alt(vec![
+                    hir_cat(vec![a(), b()]),
+                    hir_capture(
+                        2,
+                        hir_alt(vec![
+                            hir_cat(vec![b(), c()]),
+                            hir_capture(3, hir_cat(vec![c(), d()])),
+                        ])
+                    ),
+                ])
+            )
+        );
+    }
+
+    // Tests the HIR transformation of things like '[a-z]|[A-Z]' into
+    // '[A-Za-z]'. In other words, an alternation of just classes is always
+    // equivalent to a single class corresponding to the union of the branches
+    // in that class. (Unless some branches match invalid UTF-8 and others
+    // match non-ASCII Unicode.)
+    #[test]
+    fn cat_class_flattened() {
+        assert_eq!(t(r"[a-z]|[A-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
+        // Combining all of the letter properties should give us the one giant
+        // letter property.
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"(?x)
+                \p{Lowercase_Letter}
+                |\p{Uppercase_Letter}
+                |\p{Titlecase_Letter}
+                |\p{Modifier_Letter}
+                |\p{Other_Letter}
+            "),
+            hir_uclass_query(ClassQuery::Binary("letter"))
+        );
+        // Byte classes that can truly match invalid UTF-8 cannot be combined
+        // with Unicode classes.
+        assert_eq!(
+            t_bytes(r"[Δδ]|(?-u:[\x90-\xFF])|[Λλ]"),
+            hir_alt(vec![
+                hir_uclass(&[('Δ', 'Δ'), ('δ', 'δ')]),
+                hir_bclass(&[(b'\x90', b'\xFF')]),
+                hir_uclass(&[('Λ', 'Λ'), ('λ', 'λ')]),
+            ])
+        );
+        // Byte classes on their own can be combined, even if some are ASCII
+        // and others are invalid UTF-8.
+        assert_eq!(
+            t_bytes(r"[a-z]|(?-u:[\x90-\xFF])|[A-Z]"),
+            hir_bclass(&[(b'A', b'Z'), (b'a', b'z'), (b'\x90', b'\xFF')]),
+        );
+    }
+
+    #[test]
+    fn class_ascii() {
+        assert_eq!(
+            t("[[:alnum:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Alnum)
+        );
+        assert_eq!(
+            t("[[:alpha:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Alpha)
+        );
+        assert_eq!(
+            t("[[:ascii:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Ascii)
+        );
+        assert_eq!(
+            t("[[:blank:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Blank)
+        );
+        assert_eq!(
+            t("[[:cntrl:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Cntrl)
+        );
+        assert_eq!(
+            t("[[:digit:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Digit)
+        );
+        assert_eq!(
+            t("[[:graph:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Graph)
+        );
+        assert_eq!(
+            t("[[:lower:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Lower)
+        );
+        assert_eq!(
+            t("[[:print:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Print)
+        );
+        assert_eq!(
+            t("[[:punct:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Punct)
+        );
+        assert_eq!(
+            t("[[:space:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Space)
+        );
+        assert_eq!(
+            t("[[:upper:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Upper)
+        );
+        assert_eq!(
+            t("[[:word:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Word)
+        );
+        assert_eq!(
+            t("[[:xdigit:]]"),
+            hir_ascii_uclass(&ast::ClassAsciiKind::Xdigit)
+        );
+
+        assert_eq!(
+            t("[[:^lower:]]"),
+            hir_negate(hir_ascii_uclass(&ast::ClassAsciiKind::Lower))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[[:lower:]]"),
+            hir_uclass(&[
+                ('A', 'Z'),
+                ('a', 'z'),
+                ('\u{17F}', '\u{17F}'),
+                ('\u{212A}', '\u{212A}'),
+            ])
+        );
+
+        assert_eq!(
+            t("(?-u)[[:lower:]]"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Lower)
+        );
+        assert_eq!(
+            t("(?i-u)[[:lower:]]"),
+            hir_case_fold(hir_ascii_bclass(&ast::ClassAsciiKind::Lower))
+        );
+
+        assert_eq!(
+            t_err("(?-u)[[:^lower:]]"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(16, 1, 17)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err("(?i-u)[[:^lower:]]"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(7, 1, 8),
+                    Position::new(17, 1, 18)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn class_ascii_multiple() {
+        // See: https://github.com/rust-lang/regex/issues/680
+        assert_eq!(
+            t("[[:alnum:][:^ascii:]]"),
+            hir_union(
+                hir_ascii_uclass(&ast::ClassAsciiKind::Alnum),
+                hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
+            ),
+        );
+        assert_eq!(
+            t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
+            hir_union(
+                hir_ascii_bclass(&ast::ClassAsciiKind::Alnum),
+                hir_bclass(&[(0x80, 0xFF)]),
+            ),
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-perl")]
+    fn class_perl_unicode() {
+        // Unicode
+        assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
+        assert_eq!(t(r"\w"), hir_uclass_perl_word());
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\d"),
+            hir_uclass_query(ClassQuery::Binary("digit"))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\s"),
+            hir_uclass_query(ClassQuery::Binary("space"))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
+
+        // Unicode, negated
+        assert_eq!(
+            t(r"\D"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        assert_eq!(
+            t(r"\S"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
+        );
+        assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\D"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\S"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
+    }
+
+    #[test]
+    fn class_perl_ascii() {
+        // ASCII only
+        assert_eq!(
+            t(r"(?-u)\d"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
+        );
+        assert_eq!(
+            t(r"(?-u)\s"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Space)
+        );
+        assert_eq!(
+            t(r"(?-u)\w"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Word)
+        );
+        assert_eq!(
+            t(r"(?i-u)\d"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
+        );
+        assert_eq!(
+            t(r"(?i-u)\s"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Space)
+        );
+        assert_eq!(
+            t(r"(?i-u)\w"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Word)
+        );
+
+        // ASCII only, negated
+        assert_eq!(
+            t_bytes(r"(?-u)\D"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)\S"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)\W"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
+        );
+        assert_eq!(
+            t_bytes(r"(?i-u)\D"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t_bytes(r"(?i-u)\S"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
+        );
+        assert_eq!(
+            t_bytes(r"(?i-u)\W"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
+        );
+
+        // ASCII only, negated, with UTF-8 mode enabled.
+        // In this case, negating any Perl class results in an error because
+        // all such classes can match invalid UTF-8.
+        assert_eq!(
+            t_err(r"(?-u)\D"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(7, 1, 8),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?-u)\S"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(7, 1, 8),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?-u)\W"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(7, 1, 8),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?i-u)\D"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(8, 1, 9),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?i-u)\S"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(8, 1, 9),
+                ),
+            },
+        );
+        assert_eq!(
+            t_err(r"(?i-u)\W"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(6, 1, 7),
+                    Position::new(8, 1, 9),
+                ),
+            },
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-perl"))]
+    fn class_perl_word_disabled() {
+        assert_eq!(
+            t_err(r"\w"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePerlClassNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(2, 1, 3)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
+    fn class_perl_space_disabled() {
+        assert_eq!(
+            t_err(r"\s"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePerlClassNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(2, 1, 3)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(all(
+        not(feature = "unicode-perl"),
+        not(feature = "unicode-gencat")
+    ))]
+    fn class_perl_digit_disabled() {
+        assert_eq!(
+            t_err(r"\d"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePerlClassNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(2, 1, 3)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-gencat")]
+    fn class_unicode_gencat() {
+        assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{se      PaRa ToR}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{gc:Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{gc=Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z"))
+        );
+        assert_eq!(
+            t(r"\p{Other}"),
+            hir_uclass_query(ClassQuery::Binary("Other"))
+        );
+        assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
+
+        assert_eq!(
+            t(r"\PZ"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+        );
+        assert_eq!(
+            t(r"\P{separator}"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+        );
+        assert_eq!(
+            t(r"\P{gc!=separator}"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
+        );
+
+        assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
+        assert_eq!(
+            t(r"\p{assigned}"),
+            hir_uclass_query(ClassQuery::Binary("Assigned"))
+        );
+        assert_eq!(
+            t(r"\p{ascii}"),
+            hir_uclass_query(ClassQuery::Binary("ASCII"))
+        );
+        assert_eq!(
+            t(r"\p{gc:any}"),
+            hir_uclass_query(ClassQuery::Binary("Any"))
+        );
+        assert_eq!(
+            t(r"\p{gc:assigned}"),
+            hir_uclass_query(ClassQuery::Binary("Assigned"))
+        );
+        assert_eq!(
+            t(r"\p{gc:ascii}"),
+            hir_uclass_query(ClassQuery::Binary("ASCII"))
+        );
+
+        assert_eq!(
+            t_err(r"(?-u)\pZ"),
+            TestError {
+                kind: hir::ErrorKind::UnicodeNotAllowed,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(8, 1, 9)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"(?-u)\p{Separator}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodeNotAllowed,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(18, 1, 19)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\pE"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(3, 1, 4)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\p{Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\p{gc:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(10, 1, 11)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-gencat"))]
+    fn class_unicode_gencat_disabled() {
+        assert_eq!(
+            t_err(r"\p{Separator}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(13, 1, 14)
+                ),
+            }
+        );
+
+        assert_eq!(
+            t_err(r"\p{Any}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(7, 1, 8)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-script")]
+    fn class_unicode_script() {
+        assert_eq!(
+            t(r"\p{Greek}"),
+            hir_uclass_query(ClassQuery::Binary("Greek"))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\p{Greek}"),
+            hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)\P{Greek}"),
+            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+                "Greek"
+            ))))
+        );
+
+        assert_eq!(
+            t_err(r"\p{sc:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(10, 1, 11)
+                ),
+            }
+        );
+        assert_eq!(
+            t_err(r"\p{scx:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(11, 1, 12)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-script"))]
+    fn class_unicode_script_disabled() {
+        assert_eq!(
+            t_err(r"\p{Greek}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(9, 1, 10)
+                ),
+            }
+        );
+
+        assert_eq!(
+            t_err(r"\p{scx:Greek}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(13, 1, 14)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-age")]
+    fn class_unicode_age() {
+        assert_eq!(
+            t_err(r"\p{age:Foo}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(11, 1, 12)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    #[cfg(feature = "unicode-gencat")]
+    fn class_unicode_any_empty() {
+        assert_eq!(t(r"\P{any}"), hir_uclass(&[]),);
+    }
+
+    #[test]
+    #[cfg(not(feature = "unicode-age"))]
+    fn class_unicode_age_disabled() {
+        assert_eq!(
+            t_err(r"\p{age:3.0}"),
+            TestError {
+                kind: hir::ErrorKind::UnicodePropertyNotFound,
+                span: Span::new(
+                    Position::new(0, 1, 1),
+                    Position::new(11, 1, 12)
+                ),
+            }
+        );
+    }
+
+    #[test]
+    fn class_bracketed() {
+        assert_eq!(t("[a]"), hir_lit("a"));
+        assert_eq!(t("[ab]"), hir_uclass(&[('a', 'b')]));
+        assert_eq!(t("[^[a]]"), class_negate(uclass(&[('a', 'a')])));
+        assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
+        assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
+        assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
+        assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
+        assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
+        assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+        assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[\pZ]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[\p{separator}]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+        assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\PZ]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\P{separator}]"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+        #[cfg(all(
+            feature = "unicode-case",
+            any(feature = "unicode-perl", feature = "unicode-gencat")
+        ))]
+        assert_eq!(
+            t(r"(?i)[^\D]"),
+            hir_uclass_query(ClassQuery::Binary("digit"))
+        );
+        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"(?i)[^\P{greek}]"),
+            hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
+        );
+
+        assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
+        assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
+        assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[k]"),
+            hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[β]"),
+            hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
+        );
+        assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
+
+        assert_eq!(t("[^a]"), class_negate(uclass(&[('a', 'a')])));
+        assert_eq!(t(r"[^\x00]"), class_negate(uclass(&[('\0', '\0')])));
+        assert_eq!(
+            t_bytes("(?-u)[^a]"),
+            class_negate(bclass(&[(b'a', b'a')]))
+        );
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
+        assert_eq!(
+            t(r"[^\d]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\pZ]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
+        );
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[^\p{separator}]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
+        );
+        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"(?i)[^\p{greek}]"),
+            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+                "greek"
+            ))))
+        );
+        #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"(?i)[\P{greek}]"),
+            hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
+                "greek"
+            ))))
+        );
+
+        // Test some weird cases.
+        assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
+
+        assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
+        assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
+
+        assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
+        assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
+
+        assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
+        assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
+
+        assert_eq!(
+            t_err("(?-u)[^a]"),
+            TestError {
+                kind: hir::ErrorKind::InvalidUtf8,
+                span: Span::new(
+                    Position::new(5, 1, 6),
+                    Position::new(9, 1, 10)
+                ),
+            }
+        );
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
+        assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),);
+        #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
+        assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),);
+    }
+
+    #[test]
+    fn class_bracketed_union() {
+        assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[a\pZb]"),
+            hir_union(
+                hir_uclass(&[('a', 'b')]),
+                hir_uclass_query(ClassQuery::Binary("separator"))
+            )
+        );
+        #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
+        assert_eq!(
+            t(r"[\pZ\p{Greek}]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::Binary("greek")),
+                hir_uclass_query(ClassQuery::Binary("separator"))
+            )
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"[\p{age:3.0}\pZ\p{Greek}]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            )
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("cyrillic")),
+                    hir_union(
+                        hir_uclass_query(ClassQuery::Binary("greek")),
+                        hir_uclass_query(ClassQuery::Binary("separator"))
+                    )
+                )
+            )
+        );
+
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-case",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
+            hir_case_fold(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            ))
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
+            hir_negate(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            ))
+        );
+        #[cfg(all(
+            feature = "unicode-age",
+            feature = "unicode-case",
+            feature = "unicode-gencat",
+            feature = "unicode-script"
+        ))]
+        assert_eq!(
+            t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
+            hir_negate(hir_case_fold(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))
+                )
+            )))
+        );
+    }
+
+    #[test]
+    fn class_bracketed_nested() {
+        assert_eq!(t(r"[a[^c]]"), class_negate(uclass(&[('c', 'c')])));
+        assert_eq!(t(r"[a-b[^c]]"), class_negate(uclass(&[('c', 'c')])));
+        assert_eq!(t(r"[a-c[^c]]"), class_negate(uclass(&[])));
+
+        assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
+        assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)[a[^c]]"),
+            hir_negate(class_case_fold(uclass(&[('c', 'c')])))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)[a-b[^c]]"),
+            hir_negate(class_case_fold(uclass(&[('c', 'c')])))
+        );
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t(r"(?i)[^a-b[^c]]"),
+            hir_uclass(&[('C', 'C'), ('c', 'c')])
+        );
+
+        assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),);
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),);
+    }
+
+    #[test]
+    fn class_bracketed_intersect() {
+        assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
+        assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
+        assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
+        assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
+        assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
+        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
+
+        assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
+        assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
+        assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
+
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[abc&&b-c]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[abc&&[b-c]]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[[abc]&&[b-c]]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[a-z&&b-y&&c-x]"),
+            hir_case_fold(hir_uclass(&[('c', 'x')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[c-da-b&&a-d]"),
+            hir_case_fold(hir_uclass(&[('a', 'd')]))
+        );
+        #[cfg(feature = "unicode-case")]
+        assert_eq!(
+            t("(?i)[a-d&&c-da-b]"),
+            hir_case_fold(hir_uclass(&[('a', 'd')]))
+        );
+
+        assert_eq!(
+            t("(?i-u)[abc&&b-c]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+        );
+        assert_eq!(
+            t("(?i-u)[abc&&[b-c]]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+        );
+        assert_eq!(
+            t("(?i-u)[[abc]&&[b-c]]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')]))
+        );
+        assert_eq!(
+            t("(?i-u)[a-z&&b-y&&c-x]"),
+            hir_case_fold(hir_bclass(&[(b'c', b'x')]))
+        );
+        assert_eq!(
+            t("(?i-u)[c-da-b&&a-d]"),
+            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
+        );
+        assert_eq!(
+            t("(?i-u)[a-d&&c-da-b]"),
+            hir_case_fold(hir_bclass(&[(b'a', b'd')]))
+        );
+
+        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
+        // `^` is also allowed to be unescaped after `&&`.
+        assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
+        // `]` needs to be escaped after `&&` since it's not at start of class.
+        assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
+        assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
+        // Test precedence.
+        assert_eq!(
+            t(r"[a-w&&[^c-g]z]"),
+            hir_uclass(&[('a', 'b'), ('h', 'w')])
+        );
+    }
+
+    #[test]
+    fn class_bracketed_intersect_negate() {
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t(r"[^\w&&\d]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t(r"[^[\w&&\d]]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
+        );
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t(r"[^[^\w&&\d]]"),
+            hir_uclass_query(ClassQuery::Binary("digit"))
+        );
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
+
+        #[cfg(feature = "unicode-perl")]
+        assert_eq!(
+            t_bytes(r"(?-u)[^\w&&\d]"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[^[a-z&&a-c]]"),
+            hir_negate(hir_bclass(&[(b'a', b'c')]))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[^[\w&&\d]]"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[^[^\w&&\d]]"),
+            hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
+        );
+        assert_eq!(
+            t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
+            hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
+        );
+    }
+
+    #[test]
+    fn class_bracketed_difference() {
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"[\pL--[:ascii:]]"),
+            hir_difference(
+                hir_uclass_query(ClassQuery::Binary("letter")),
+                hir_uclass(&[('\0', '\x7F')])
+            )
+        );
+
+        assert_eq!(
+            t(r"(?-u)[[:alpha:]--[:lower:]]"),
+            hir_bclass(&[(b'A', b'Z')])
+        );
+    }
+
+    #[test]
+    fn class_bracketed_symmetric_difference() {
+        #[cfg(feature = "unicode-script")]
+        assert_eq!(
+            t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
+            hir_uclass(&[
+                ('\u{0342}', '\u{0342}'),
+                ('\u{0345}', '\u{0345}'),
+                ('\u{1DC0}', '\u{1DC1}'),
+            ])
+        );
+        assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
+
+        assert_eq!(
+            t(r"(?-u)[a-g~~c-j]"),
+            hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
+        );
+    }
+
+    #[test]
+    fn ignore_whitespace() {
+        assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
+        assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
+        assert_eq!(
+            t(r"(?x)\x # comment
+{ # comment
+    53 # comment
+} #comment"),
+            hir_lit("S")
+        );
+
+        assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
+        assert_eq!(
+            t(r"(?x)\x # comment
+        53 # comment"),
+            hir_lit("S")
+        );
+        assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
+
+        #[cfg(feature = "unicode-gencat")]
+        assert_eq!(
+            t(r"(?x)\p # comment
+{ # comment
+    Separator # comment
+} # comment"),
+            hir_uclass_query(ClassQuery::Binary("separator"))
+        );
+
+        assert_eq!(
+            t(r"(?x)a # comment
+{ # comment
+    5 # comment
+    , # comment
+    10 # comment
+} # comment"),
+            hir_range(true, 5, Some(10), hir_lit("a"))
+        );
+
+        assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
+    }
+
+    #[test]
+    fn analysis_is_utf8() {
+        // Positive examples.
+        assert!(props_bytes(r"a").is_utf8());
+        assert!(props_bytes(r"ab").is_utf8());
+        assert!(props_bytes(r"(?-u)a").is_utf8());
+        assert!(props_bytes(r"(?-u)ab").is_utf8());
+        assert!(props_bytes(r"\xFF").is_utf8());
+        assert!(props_bytes(r"\xFF\xFF").is_utf8());
+        assert!(props_bytes(r"[^a]").is_utf8());
+        assert!(props_bytes(r"[^a][^a]").is_utf8());
+        assert!(props_bytes(r"\b").is_utf8());
+        assert!(props_bytes(r"\B").is_utf8());
+        assert!(props_bytes(r"(?-u)\b").is_utf8());
+        assert!(props_bytes(r"(?-u)\B").is_utf8());
+
+        // Negative examples.
+        assert!(!props_bytes(r"(?-u)\xFF").is_utf8());
+        assert!(!props_bytes(r"(?-u)\xFF\xFF").is_utf8());
+        assert!(!props_bytes(r"(?-u)[^a]").is_utf8());
+        assert!(!props_bytes(r"(?-u)[^a][^a]").is_utf8());
+    }
+
+    #[test]
+    fn analysis_captures_len() {
+        assert_eq!(0, props(r"a").explicit_captures_len());
+        assert_eq!(0, props(r"(?:a)").explicit_captures_len());
+        assert_eq!(0, props(r"(?i-u:a)").explicit_captures_len());
+        assert_eq!(0, props(r"(?i-u)a").explicit_captures_len());
+        assert_eq!(1, props(r"(a)").explicit_captures_len());
+        assert_eq!(1, props(r"(?P<foo>a)").explicit_captures_len());
+        assert_eq!(1, props(r"()").explicit_captures_len());
+        assert_eq!(1, props(r"()a").explicit_captures_len());
+        assert_eq!(1, props(r"(a)+").explicit_captures_len());
+        assert_eq!(2, props(r"(a)(b)").explicit_captures_len());
+        assert_eq!(2, props(r"(a)|(b)").explicit_captures_len());
+        assert_eq!(2, props(r"((a))").explicit_captures_len());
+        assert_eq!(1, props(r"([a&&b])").explicit_captures_len());
+    }
+
+    #[test]
+    fn analysis_static_captures_len() {
+        let len = |pattern| props(pattern).static_explicit_captures_len();
+        assert_eq!(Some(0), len(r""));
+        assert_eq!(Some(0), len(r"foo|bar"));
+        assert_eq!(None, len(r"(foo)|bar"));
+        assert_eq!(None, len(r"foo|(bar)"));
+        assert_eq!(Some(1), len(r"(foo|bar)"));
+        assert_eq!(Some(1), len(r"(a|b|c|d|e|f)"));
+        assert_eq!(Some(1), len(r"(a)|(b)|(c)|(d)|(e)|(f)"));
+        assert_eq!(Some(2), len(r"(a)(b)|(c)(d)|(e)(f)"));
+        assert_eq!(Some(6), len(r"(a)(b)(c)(d)(e)(f)"));
+        assert_eq!(Some(3), len(r"(a)(b)(extra)|(a)(b)()"));
+        assert_eq!(Some(3), len(r"(a)(b)((?:extra)?)"));
+        assert_eq!(None, len(r"(a)(b)(extra)?"));
+        assert_eq!(Some(1), len(r"(foo)|(bar)"));
+        assert_eq!(Some(2), len(r"(foo)(bar)"));
+        assert_eq!(Some(2), len(r"(foo)+(bar)"));
+        assert_eq!(None, len(r"(foo)*(bar)"));
+        assert_eq!(Some(0), len(r"(foo)?{0}"));
+        assert_eq!(None, len(r"(foo)?{1}"));
+        assert_eq!(Some(1), len(r"(foo){1}"));
+        assert_eq!(Some(1), len(r"(foo){1,}"));
+        assert_eq!(Some(1), len(r"(foo){1,}?"));
+        assert_eq!(None, len(r"(foo){1,}??"));
+        assert_eq!(None, len(r"(foo){0,}"));
+        assert_eq!(Some(1), len(r"(foo)(?:bar)"));
+        assert_eq!(Some(2), len(r"(foo(?:bar)+)(?:baz(boo))"));
+        assert_eq!(Some(2), len(r"(?P<bar>foo)(?:bar)(bal|loon)"));
+        assert_eq!(
+            Some(2),
+            len(r#"<(a)[^>]+href="([^"]+)"|<(img)[^>]+src="([^"]+)""#)
+        );
+    }
+
+    #[test]
+    fn analysis_is_all_assertions() {
+        // Positive examples.
+        let p = props(r"\b");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"\B");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"^");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"$");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"\A");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"\z");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"$^\z\A\b\B");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"$|^|\z|\A|\b|\B");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"^$|$^");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        let p = props(r"((\b)+())*^");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(0));
+
+        // Negative examples.
+        let p = props(r"^a");
+        assert!(!p.look_set().is_empty());
+        assert_eq!(p.minimum_len(), Some(1));
+    }
+
+    #[test]
+    fn analysis_look_set_prefix_any() {
+        let p = props(r"(?-u)(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))");
+        assert!(p.look_set_prefix_any().contains(Look::WordAscii));
+    }
+
+    #[test]
+    fn analysis_is_anchored() {
+        let is_start = |p| props(p).look_set_prefix().contains(Look::Start);
+        let is_end = |p| props(p).look_set_suffix().contains(Look::End);
+
+        // Positive examples.
+        assert!(is_start(r"^"));
+        assert!(is_end(r"$"));
+
+        assert!(is_start(r"^^"));
+        assert!(props(r"$$").look_set_suffix().contains(Look::End));
+
+        assert!(is_start(r"^$"));
+        assert!(is_end(r"^$"));
+
+        assert!(is_start(r"^foo"));
+        assert!(is_end(r"foo$"));
+
+        assert!(is_start(r"^foo|^bar"));
+        assert!(is_end(r"foo$|bar$"));
+
+        assert!(is_start(r"^(foo|bar)"));
+        assert!(is_end(r"(foo|bar)$"));
+
+        assert!(is_start(r"^+"));
+        assert!(is_end(r"$+"));
+        assert!(is_start(r"^++"));
+        assert!(is_end(r"$++"));
+        assert!(is_start(r"(^)+"));
+        assert!(is_end(r"($)+"));
+
+        assert!(is_start(r"$^"));
+        assert!(is_start(r"$^"));
+        assert!(is_start(r"$^|^$"));
+        assert!(is_end(r"$^|^$"));
+
+        assert!(is_start(r"\b^"));
+        assert!(is_end(r"$\b"));
+        assert!(is_start(r"^(?m:^)"));
+        assert!(is_end(r"(?m:$)$"));
+        assert!(is_start(r"(?m:^)^"));
+        assert!(is_end(r"$(?m:$)"));
+
+        // Negative examples.
+        assert!(!is_start(r"(?m)^"));
+        assert!(!is_end(r"(?m)$"));
+        assert!(!is_start(r"(?m:^$)|$^"));
+        assert!(!is_end(r"(?m:^$)|$^"));
+        assert!(!is_start(r"$^|(?m:^$)"));
+        assert!(!is_end(r"$^|(?m:^$)"));
+
+        assert!(!is_start(r"a^"));
+        assert!(!is_start(r"$a"));
+
+        assert!(!is_end(r"a^"));
+        assert!(!is_end(r"$a"));
+
+        assert!(!is_start(r"^foo|bar"));
+        assert!(!is_end(r"foo|bar$"));
+
+        assert!(!is_start(r"^*"));
+        assert!(!is_end(r"$*"));
+        assert!(!is_start(r"^*+"));
+        assert!(!is_end(r"$*+"));
+        assert!(!is_start(r"^+*"));
+        assert!(!is_end(r"$+*"));
+        assert!(!is_start(r"(^)*"));
+        assert!(!is_end(r"($)*"));
+    }
+
+    #[test]
+    fn analysis_is_any_anchored() {
+        let is_start = |p| props(p).look_set().contains(Look::Start);
+        let is_end = |p| props(p).look_set().contains(Look::End);
+
+        // Positive examples.
+        assert!(is_start(r"^"));
+        assert!(is_end(r"$"));
+        assert!(is_start(r"\A"));
+        assert!(is_end(r"\z"));
+
+        // Negative examples.
+        assert!(!is_start(r"(?m)^"));
+        assert!(!is_end(r"(?m)$"));
+        assert!(!is_start(r"$"));
+        assert!(!is_end(r"^"));
+    }
+
+    #[test]
+    fn analysis_can_empty() {
+        // Positive examples.
+        let assert_empty =
+            |p| assert_eq!(Some(0), props_bytes(p).minimum_len());
+        assert_empty(r"");
+        assert_empty(r"()");
+        assert_empty(r"()*");
+        assert_empty(r"()+");
+        assert_empty(r"()?");
+        assert_empty(r"a*");
+        assert_empty(r"a?");
+        assert_empty(r"a{0}");
+        assert_empty(r"a{0,}");
+        assert_empty(r"a{0,1}");
+        assert_empty(r"a{0,10}");
+        #[cfg(feature = "unicode-gencat")]
+        assert_empty(r"\pL*");
+        assert_empty(r"a*|b");
+        assert_empty(r"b|a*");
+        assert_empty(r"a|");
+        assert_empty(r"|a");
+        assert_empty(r"a||b");
+        assert_empty(r"a*a?(abcd)*");
+        assert_empty(r"^");
+        assert_empty(r"$");
+        assert_empty(r"(?m)^");
+        assert_empty(r"(?m)$");
+        assert_empty(r"\A");
+        assert_empty(r"\z");
+        assert_empty(r"\B");
+        assert_empty(r"(?-u)\B");
+        assert_empty(r"\b");
+        assert_empty(r"(?-u)\b");
+
+        // Negative examples.
+        let assert_non_empty =
+            |p| assert_ne!(Some(0), props_bytes(p).minimum_len());
+        assert_non_empty(r"a+");
+        assert_non_empty(r"a{1}");
+        assert_non_empty(r"a{1,}");
+        assert_non_empty(r"a{1,2}");
+        assert_non_empty(r"a{1,10}");
+        assert_non_empty(r"b|a");
+        assert_non_empty(r"a*a+(abcd)*");
+        #[cfg(feature = "unicode-gencat")]
+        assert_non_empty(r"\P{any}");
+        assert_non_empty(r"[a--a]");
+        assert_non_empty(r"[a&&b]");
+    }
+
+    #[test]
+    fn analysis_is_literal() {
+        // Positive examples.
+        assert!(props(r"a").is_literal());
+        assert!(props(r"ab").is_literal());
+        assert!(props(r"abc").is_literal());
+        assert!(props(r"(?m)abc").is_literal());
+        assert!(props(r"(?:a)").is_literal());
+        assert!(props(r"foo(?:a)").is_literal());
+        assert!(props(r"(?:a)foo").is_literal());
+        assert!(props(r"[a]").is_literal());
+
+        // Negative examples.
+        assert!(!props(r"").is_literal());
+        assert!(!props(r"^").is_literal());
+        assert!(!props(r"a|b").is_literal());
+        assert!(!props(r"(a)").is_literal());
+        assert!(!props(r"a+").is_literal());
+        assert!(!props(r"foo(a)").is_literal());
+        assert!(!props(r"(a)foo").is_literal());
+        assert!(!props(r"[ab]").is_literal());
+    }
+
+    #[test]
+    fn analysis_is_alternation_literal() {
+        // Positive examples.
+        assert!(props(r"a").is_alternation_literal());
+        assert!(props(r"ab").is_alternation_literal());
+        assert!(props(r"abc").is_alternation_literal());
+        assert!(props(r"(?m)abc").is_alternation_literal());
+        assert!(props(r"foo|bar").is_alternation_literal());
+        assert!(props(r"foo|bar|baz").is_alternation_literal());
+        assert!(props(r"[a]").is_alternation_literal());
+        assert!(props(r"(?:ab)|cd").is_alternation_literal());
+        assert!(props(r"ab|(?:cd)").is_alternation_literal());
+
+        // Negative examples.
+        assert!(!props(r"").is_alternation_literal());
+        assert!(!props(r"^").is_alternation_literal());
+        assert!(!props(r"(a)").is_alternation_literal());
+        assert!(!props(r"a+").is_alternation_literal());
+        assert!(!props(r"foo(a)").is_alternation_literal());
+        assert!(!props(r"(a)foo").is_alternation_literal());
+        assert!(!props(r"[ab]").is_alternation_literal());
+        assert!(!props(r"[ab]|b").is_alternation_literal());
+        assert!(!props(r"a|[ab]").is_alternation_literal());
+        assert!(!props(r"(a)|b").is_alternation_literal());
+        assert!(!props(r"a|(b)").is_alternation_literal());
+        assert!(!props(r"a|b").is_alternation_literal());
+        assert!(!props(r"a|b|c").is_alternation_literal());
+        assert!(!props(r"[a]|b").is_alternation_literal());
+        assert!(!props(r"a|[b]").is_alternation_literal());
+        assert!(!props(r"(?:a)|b").is_alternation_literal());
+        assert!(!props(r"a|(?:b)").is_alternation_literal());
+        assert!(!props(r"(?:z|xx)@|xx").is_alternation_literal());
+    }
+
+    // This tests that the smart Hir::repetition constructors does some basic
+    // simplifications.
+    #[test]
+    fn smart_repetition() {
+        assert_eq!(t(r"a{0}"), Hir::empty());
+        assert_eq!(t(r"a{1}"), hir_lit("a"));
+        assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate));
+    }
+
+    // This tests that the smart Hir::concat constructor simplifies the given
+    // exprs in a way we expect.
+    #[test]
+    fn smart_concat() {
+        assert_eq!(t(""), Hir::empty());
+        assert_eq!(t("(?:)"), Hir::empty());
+        assert_eq!(t("abc"), hir_lit("abc"));
+        assert_eq!(t("(?:foo)(?:bar)"), hir_lit("foobar"));
+        assert_eq!(t("quux(?:foo)(?:bar)baz"), hir_lit("quuxfoobarbaz"));
+        assert_eq!(
+            t("foo(?:bar^baz)quux"),
+            hir_cat(vec![
+                hir_lit("foobar"),
+                hir_look(hir::Look::Start),
+                hir_lit("bazquux"),
+            ])
+        );
+        assert_eq!(
+            t("foo(?:ba(?:r^b)az)quux"),
+            hir_cat(vec![
+                hir_lit("foobar"),
+                hir_look(hir::Look::Start),
+                hir_lit("bazquux"),
+            ])
+        );
+    }
+
+    // This tests that the smart Hir::alternation constructor simplifies the
+    // given exprs in a way we expect.
+    #[test]
+    fn smart_alternation() {
+        assert_eq!(
+            t("(?:foo)|(?:bar)"),
+            hir_alt(vec![hir_lit("foo"), hir_lit("bar")])
+        );
+        assert_eq!(
+            t("quux|(?:abc|def|xyz)|baz"),
+            hir_alt(vec![
+                hir_lit("quux"),
+                hir_lit("abc"),
+                hir_lit("def"),
+                hir_lit("xyz"),
+                hir_lit("baz"),
+            ])
+        );
+        assert_eq!(
+            t("quux|(?:abc|(?:def|mno)|xyz)|baz"),
+            hir_alt(vec![
+                hir_lit("quux"),
+                hir_lit("abc"),
+                hir_lit("def"),
+                hir_lit("mno"),
+                hir_lit("xyz"),
+                hir_lit("baz"),
+            ])
+        );
+        assert_eq!(
+            t("a|b|c|d|e|f|x|y|z"),
+            hir_uclass(&[('a', 'f'), ('x', 'z')]),
+        );
+        // Tests that we lift common prefixes out of an alternation.
+        assert_eq!(
+            t("[A-Z]foo|[A-Z]quux"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![hir_lit("foo"), hir_lit("quux")]),
+            ]),
+        );
+        assert_eq!(
+            t("[A-Z][A-Z]|[A-Z]quux"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![hir_uclass(&[('A', 'Z')]), hir_lit("quux")]),
+            ]),
+        );
+        assert_eq!(
+            t("[A-Z][A-Z]|[A-Z][A-Z]quux"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![Hir::empty(), hir_lit("quux")]),
+            ]),
+        );
+        assert_eq!(
+            t("[A-Z]foo|[A-Z]foobar"),
+            hir_cat(vec![
+                hir_uclass(&[('A', 'Z')]),
+                hir_alt(vec![hir_lit("foo"), hir_lit("foobar")]),
+            ]),
+        );
+    }
+
+    #[test]
+    fn regression_alt_empty_concat() {
+        use crate::ast::{self, Ast};
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::alternation(ast::Alternation {
+            span,
+            asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::empty()), t.translate("", &ast));
+    }
+
+    #[test]
+    fn regression_empty_alt() {
+        use crate::ast::{self, Ast};
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::concat(ast::Concat {
+            span,
+            asts: vec![Ast::alternation(ast::Alternation {
+                span,
+                asts: vec![],
+            })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::fail()), t.translate("", &ast));
+    }
+
+    #[test]
+    fn regression_singleton_alt() {
+        use crate::{
+            ast::{self, Ast},
+            hir::Dot,
+        };
+
+        let span = Span::splat(Position::new(0, 0, 0));
+        let ast = Ast::concat(ast::Concat {
+            span,
+            asts: vec![Ast::alternation(ast::Alternation {
+                span,
+                asts: vec![Ast::dot(span)],
+            })],
+        });
+
+        let mut t = Translator::new();
+        assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast));
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168
+    #[test]
+    fn regression_fuzz_match() {
+        let pat = "[(\u{6} \0-\u{afdf5}]  \0 ";
+        let ast = ParserBuilder::new()
+            .octal(false)
+            .ignore_whitespace(true)
+            .build()
+            .parse(pat)
+            .unwrap();
+        let hir = TranslatorBuilder::new()
+            .utf8(true)
+            .case_insensitive(false)
+            .multi_line(false)
+            .dot_matches_new_line(false)
+            .swap_greed(true)
+            .unicode(true)
+            .build()
+            .translate(pat, &ast)
+            .unwrap();
+        assert_eq!(
+            hir,
+            Hir::concat(vec![
+                hir_uclass(&[('\0', '\u{afdf5}')]),
+                hir_lit("\0"),
+            ])
+        );
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155
+    #[cfg(feature = "unicode")]
+    #[test]
+    fn regression_fuzz_difference1() {
+        let pat = r"\W\W|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*";
+        let _ = t(pat); // shouldn't panic
+    }
+
+    // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153
+    #[test]
+    fn regression_fuzz_char_decrement1() {
+        let pat = "w[w[^w?\rw\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0*\0\0\u{1}\0]\0\0-*\0][^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0x\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\0\0*??\0\u{7f}{2}\u{10}??\0\0\0\0\0\0\0\0\0\u{3}\0\0\0}\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\u{1}\0]\0\u{1}\u{1}H-i]-]\0\0\0\0\u{1}\0]\0\0\0\u{1}\0]\0\0-*\0\0\0\0\u{1}9-\u{7f}]\0'|-\u{7f}]\0'|(?i-ux)[-\u{7f}]\0'\u{3}\0\0\0}\0-*\0]<D\0\0\0\0\0\0\u{1}]\0\0\0\0]\0\0-*\0]\0\0 ";
+        let _ = t(pat); // shouldn't panic
+    }
+}
diff --git a/vendor/regex-syntax/src/hir/visitor.rs b/vendor/regex-syntax/src/hir/visitor.rs
new file mode 100644
index 0000000..f30f0a1
--- /dev/null
+++ b/vendor/regex-syntax/src/hir/visitor.rs
@@ -0,0 +1,215 @@
+use alloc::{vec, vec::Vec};
+
+use crate::hir::{self, Hir, HirKind};
+
+/// A trait for visiting the high-level IR (HIR) in depth first order.
+///
+/// The principle aim of this trait is to enable callers to perform case
+/// analysis on a high-level intermediate representation of a regular
+/// expression without necessarily using recursion. In particular, this permits
+/// callers to do case analysis with constant stack usage, which can be
+/// important since the size of an HIR may be proportional to end user input.
+///
+/// Typical usage of this trait involves providing an implementation and then
+/// running it using the [`visit`] function.
+pub trait Visitor {
+    /// The result of visiting an HIR.
+    type Output;
+    /// An error that visiting an HIR might return.
+    type Err;
+
+    /// All implementors of `Visitor` must provide a `finish` method, which
+    /// yields the result of visiting the HIR or an error.
+    fn finish(self) -> Result<Self::Output, Self::Err>;
+
+    /// This method is called before beginning traversal of the HIR.
+    fn start(&mut self) {}
+
+    /// This method is called on an `Hir` before descending into child `Hir`
+    /// nodes.
+    fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on an `Hir` after descending all of its child
+    /// `Hir` nodes.
+    fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between child nodes of an alternation.
+    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between child nodes of a concatenation.
+    fn visit_concat_in(&mut self) -> Result<(), Self::Err> {
+        Ok(())
+    }
+}
+
+/// Executes an implementation of `Visitor` in constant stack space.
+///
+/// This function will visit every node in the given `Hir` while calling
+/// appropriate methods provided by the [`Visitor`] trait.
+///
+/// The primary use case for this method is when one wants to perform case
+/// analysis over an `Hir` without using a stack size proportional to the depth
+/// of the `Hir`. Namely, this method will instead use constant stack space,
+/// but will use heap space proportional to the size of the `Hir`. This may be
+/// desirable in cases where the size of `Hir` is proportional to end user
+/// input.
+///
+/// If the visitor returns an error at any point, then visiting is stopped and
+/// the error is returned.
+pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> {
+    HeapVisitor::new().visit(hir, visitor)
+}
+
+/// HeapVisitor visits every item in an `Hir` recursively using constant stack
+/// size and a heap size proportional to the size of the `Hir`.
+struct HeapVisitor<'a> {
+    /// A stack of `Hir` nodes. This is roughly analogous to the call stack
+    /// used in a typical recursive visitor.
+    stack: Vec<(&'a Hir, Frame<'a>)>,
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// an `Hir`.
+enum Frame<'a> {
+    /// A stack frame allocated just before descending into a repetition
+    /// operator's child node.
+    Repetition(&'a hir::Repetition),
+    /// A stack frame allocated just before descending into a capture's child
+    /// node.
+    Capture(&'a hir::Capture),
+    /// The stack frame used while visiting every child node of a concatenation
+    /// of expressions.
+    Concat {
+        /// The child node we are currently visiting.
+        head: &'a Hir,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Hir],
+    },
+    /// The stack frame used while visiting every child node of an alternation
+    /// of expressions.
+    Alternation {
+        /// The child node we are currently visiting.
+        head: &'a Hir,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Hir],
+    },
+}
+
+impl<'a> HeapVisitor<'a> {
+    fn new() -> HeapVisitor<'a> {
+        HeapVisitor { stack: vec![] }
+    }
+
+    fn visit<V: Visitor>(
+        &mut self,
+        mut hir: &'a Hir,
+        mut visitor: V,
+    ) -> Result<V::Output, V::Err> {
+        self.stack.clear();
+
+        visitor.start();
+        loop {
+            visitor.visit_pre(hir)?;
+            if let Some(x) = self.induct(hir) {
+                let child = x.child();
+                self.stack.push((hir, x));
+                hir = child;
+                continue;
+            }
+            // No induction means we have a base case, so we can post visit
+            // it now.
+            visitor.visit_post(hir)?;
+
+            // At this point, we now try to pop our call stack until it is
+            // either empty or we hit another inductive case.
+            loop {
+                let (post_hir, frame) = match self.stack.pop() {
+                    None => return visitor.finish(),
+                    Some((post_hir, frame)) => (post_hir, frame),
+                };
+                // If this is a concat/alternate, then we might have additional
+                // inductive steps to process.
+                if let Some(x) = self.pop(frame) {
+                    match x {
+                        Frame::Alternation { .. } => {
+                            visitor.visit_alternation_in()?;
+                        }
+                        Frame::Concat { .. } => {
+                            visitor.visit_concat_in()?;
+                        }
+                        _ => {}
+                    }
+                    hir = x.child();
+                    self.stack.push((post_hir, x));
+                    break;
+                }
+                // Otherwise, we've finished visiting all the child nodes for
+                // this HIR, so we can post visit it now.
+                visitor.visit_post(post_hir)?;
+            }
+        }
+    }
+
+    /// Build a stack frame for the given HIR if one is needed (which occurs if
+    /// and only if there are child nodes in the HIR). Otherwise, return None.
+    fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> {
+        match *hir.kind() {
+            HirKind::Repetition(ref x) => Some(Frame::Repetition(x)),
+            HirKind::Capture(ref x) => Some(Frame::Capture(x)),
+            HirKind::Concat(ref x) if x.is_empty() => None,
+            HirKind::Concat(ref x) => {
+                Some(Frame::Concat { head: &x[0], tail: &x[1..] })
+            }
+            HirKind::Alternation(ref x) if x.is_empty() => None,
+            HirKind::Alternation(ref x) => {
+                Some(Frame::Alternation { head: &x[0], tail: &x[1..] })
+            }
+            _ => None,
+        }
+    }
+
+    /// Pops the given frame. If the frame has an additional inductive step,
+    /// then return it, otherwise return `None`.
+    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
+        match induct {
+            Frame::Repetition(_) => None,
+            Frame::Capture(_) => None,
+            Frame::Concat { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
+                }
+            }
+            Frame::Alternation { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Alternation {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+        }
+    }
+}
+
+impl<'a> Frame<'a> {
+    /// Perform the next inductive step on this frame and return the next
+    /// child HIR node to visit.
+    fn child(&self) -> &'a Hir {
+        match *self {
+            Frame::Repetition(rep) => &rep.sub,
+            Frame::Capture(capture) => &capture.sub,
+            Frame::Concat { head, .. } => head,
+            Frame::Alternation { head, .. } => head,
+        }
+    }
+}