Adding upstream version 0.70.1+ds1.upstream/0.70.1+ds1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:47:55 +0000
commit: 2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4 (patch)
tree: 033cc839730fda84ff08db877037977be94e5e3a /vendor/regex-automata/src/util
parent: Initial commit. (diff)
download: cargo-upstream.tar.xz
cargo-upstream.zip
29 files changed, 19091 insertions, 0 deletions
diff --git a/vendor/regex-automata/src/util/alphabet.rs b/vendor/regex-automata/src/util/alphabet.rs
new file mode 100644
index 0000000..22b5a76
--- /dev/null
+++ b/vendor/regex-automata/src/util/alphabet.rs
@@ -0,0 +1,1139 @@
+/*!
+This module provides APIs for dealing with the alphabets of finite state
+machines.
+
+There are two principal types in this module, [`ByteClasses`] and [`Unit`].
+The former defines the alphabet of a finite state machine while the latter
+represents an element of that alphabet.
+
+To a first approximation, the alphabet of all automata in this crate is just
+a `u8`. Namely, every distinct byte value. All 256 of them. In practice, this
+can be quite wasteful when building a transition table for a DFA, since it
+requires storing a state identifier for each element in the alphabet. Instead,
+we collapse the alphabet of an automaton down into equivalence classes, where
+every byte in the same equivalence class never discriminates between a match or
+a non-match from any other byte in the same class. For example, in the regex
+`[a-z]+`, then you could consider it having an alphabet consisting of two
+equivalence classes: `a-z` and everything else. In terms of the transitions on
+an automaton, it doesn't actually require representing every distinct byte.
+Just the equivalence classes.
+
+The downside of equivalence classes is that, of course, searching a haystack
+deals with individual byte values. Those byte values need to be mapped to
+their corresponding equivalence class. This is what `ByteClasses` does. In
+practice, doing this for every state transition has negligible impact on modern
+CPUs. Moreover, it helps make more efficient use of the CPU cache by (possibly
+considerably) shrinking the size of the transition table.
+
+One last hiccup concerns `Unit`. Namely, because of look-around and how the
+DFAs in this crate work, we need to add a sentinel value to our alphabet
+of equivalence classes that represents the "end" of a search. We call that
+sentinel [`Unit::eoi`] or "end of input." Thus, a `Unit` is either an
+equivalence class corresponding to a set of bytes, or it is a special "end of
+input" sentinel.
+
+In general, you should not expect to need either of these types unless you're
+doing lower level shenanigans with DFAs, or even building your own DFAs.
+(Although, you don't have to use these types to build your own DFAs of course.)
+For example, if you're walking a DFA's state graph, it's probably useful to
+make use of [`ByteClasses`] to visit each element in the DFA's alphabet instead
+of just visiting every distinct `u8` value. The latter isn't necessarily wrong,
+but it could be potentially very wasteful.
+*/
+use crate::util::{
+    escape::DebugByte,
+    wire::{self, DeserializeError, SerializeError},
+};
+
+/// Unit represents a single unit of haystack for DFA based regex engines.
+///
+/// It is not expected for consumers of this crate to need to use this type
+/// unless they are implementing their own DFA. And even then, it's not
+/// required: implementors may use other techniques to handle haystack units.
+///
+/// Typically, a single unit of haystack for a DFA would be a single byte.
+/// However, for the DFAs in this crate, matches are delayed by a single byte
+/// in order to handle look-ahead assertions (`\b`, `$` and `\z`). Thus, once
+/// we have consumed the haystack, we must run the DFA through one additional
+/// transition using a unit that indicates the haystack has ended.
+///
+/// There is no way to represent a sentinel with a `u8` since all possible
+/// values *may* be valid haystack units to a DFA, therefore this type
+/// explicitly adds room for a sentinel value.
+///
+/// The sentinel EOI value is always its own equivalence class and is
+/// ultimately represented by adding 1 to the maximum equivalence class value.
+/// So for example, the regex `^[a-z]+$` might be split into the following
+/// equivalence classes:
+///
+/// ```text
+/// 0 => [\x00-`]
+/// 1 => [a-z]
+/// 2 => [{-\xFF]
+/// 3 => [EOI]
+/// ```
+///
+/// Where EOI is the special sentinel value that is always in its own
+/// singleton equivalence class.
+#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)]
+pub struct Unit(UnitKind);
+
+#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)]
+enum UnitKind {
+    /// Represents a byte value, or more typically, an equivalence class
+    /// represented as a byte value.
+    U8(u8),
+    /// Represents the "end of input" sentinel. We regretably use a `u16`
+    /// here since the maximum sentinel value is `256`. Thankfully, we don't
+    /// actually store a `Unit` anywhere, so this extra space shouldn't be too
+    /// bad.
+    EOI(u16),
+}
+
+impl Unit {
+    /// Create a new haystack unit from a byte value.
+    ///
+    /// All possible byte values are legal. However, when creating a haystack
+    /// unit for a specific DFA, one should be careful to only construct units
+    /// that are in that DFA's alphabet. Namely, one way to compact a DFA's
+    /// in-memory representation is to collapse its transitions to a set of
+    /// equivalence classes into a set of all possible byte values. If a DFA
+    /// uses equivalence classes instead of byte values, then the byte given
+    /// here should be the equivalence class.
+    pub fn u8(byte: u8) -> Unit {
+        Unit(UnitKind::U8(byte))
+    }
+
+    /// Create a new "end of input" haystack unit.
+    ///
+    /// The value given is the sentinel value used by this unit to represent
+    /// the "end of input." The value should be the total number of equivalence
+    /// classes in the corresponding alphabet. Its maximum value is `256`,
+    /// which occurs when every byte is its own equivalence class.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `num_byte_equiv_classes` is greater than `256`.
+    pub fn eoi(num_byte_equiv_classes: usize) -> Unit {
+        assert!(
+            num_byte_equiv_classes <= 256,
+            "max number of byte-based equivalent classes is 256, but got {}",
+            num_byte_equiv_classes,
+        );
+        Unit(UnitKind::EOI(u16::try_from(num_byte_equiv_classes).unwrap()))
+    }
+
+    /// If this unit is not an "end of input" sentinel, then returns its
+    /// underlying byte value. Otherwise return `None`.
+    pub fn as_u8(self) -> Option<u8> {
+        match self.0 {
+            UnitKind::U8(b) => Some(b),
+            UnitKind::EOI(_) => None,
+        }
+    }
+
+    /// If this unit is an "end of input" sentinel, then return the underlying
+    /// sentinel value that was given to [`Unit::eoi`]. Otherwise return
+    /// `None`.
+    pub fn as_eoi(self) -> Option<u16> {
+        match self.0 {
+            UnitKind::U8(_) => None,
+            UnitKind::EOI(sentinel) => Some(sentinel),
+        }
+    }
+
+    /// Return this unit as a `usize`, regardless of whether it is a byte value
+    /// or an "end of input" sentinel. In the latter case, the underlying
+    /// sentinel value given to [`Unit::eoi`] is returned.
+    pub fn as_usize(self) -> usize {
+        match self.0 {
+            UnitKind::U8(b) => usize::from(b),
+            UnitKind::EOI(eoi) => usize::from(eoi),
+        }
+    }
+
+    /// Returns true if and only of this unit is a byte value equivalent to the
+    /// byte given. This always returns false when this is an "end of input"
+    /// sentinel.
+    pub fn is_byte(self, byte: u8) -> bool {
+        self.as_u8().map_or(false, |b| b == byte)
+    }
+
+    /// Returns true when this unit represents an "end of input" sentinel.
+    pub fn is_eoi(self) -> bool {
+        self.as_eoi().is_some()
+    }
+
+    /// Returns true when this unit corresponds to an ASCII word byte.
+    ///
+    /// This always returns false when this unit represents an "end of input"
+    /// sentinel.
+    pub fn is_word_byte(self) -> bool {
+        self.as_u8().map_or(false, crate::util::utf8::is_word_byte)
+    }
+}
+
+impl core::fmt::Debug for Unit {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        match self.0 {
+            UnitKind::U8(b) => write!(f, "{:?}", DebugByte(b)),
+            UnitKind::EOI(_) => write!(f, "EOI"),
+        }
+    }
+}
+
+/// A representation of byte oriented equivalence classes.
+///
+/// This is used in a DFA to reduce the size of the transition table. This can
+/// have a particularly large impact not only on the total size of a dense DFA,
+/// but also on compile times.
+///
+/// The essential idea here is that the alphabet of a DFA is shrunk from the
+/// usual 256 distinct byte values down to a set of equivalence classes. The
+/// guarantee you get is that any byte belonging to the same equivalence class
+/// can be treated as if it were any other byte in the same class, and the
+/// result of a search wouldn't change.
+///
+/// # Example
+///
+/// This example shows how to get byte classes from an
+/// [`NFA`](crate::nfa::thompson::NFA) and ask for the class of various bytes.
+///
+/// ```
+/// use regex_automata::nfa::thompson::NFA;
+///
+/// let nfa = NFA::new("[a-z]+")?;
+/// let classes = nfa.byte_classes();
+/// // 'a' and 'z' are in the same class for this regex.
+/// assert_eq!(classes.get(b'a'), classes.get(b'z'));
+/// // But 'a' and 'A' are not.
+/// assert_ne!(classes.get(b'a'), classes.get(b'A'));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Copy)]
+pub struct ByteClasses([u8; 256]);
+
+impl ByteClasses {
+    /// Creates a new set of equivalence classes where all bytes are mapped to
+    /// the same class.
+    #[inline]
+    pub fn empty() -> ByteClasses {
+        ByteClasses([0; 256])
+    }
+
+    /// Creates a new set of equivalence classes where each byte belongs to
+    /// its own equivalence class.
+    #[inline]
+    pub fn singletons() -> ByteClasses {
+        let mut classes = ByteClasses::empty();
+        for b in 0..=255 {
+            classes.set(b, b);
+        }
+        classes
+    }
+
+    /// Deserializes a byte class map from the given slice. If the slice is of
+    /// insufficient length or otherwise contains an impossible mapping, then
+    /// an error is returned. Upon success, the number of bytes read along with
+    /// the map are returned. The number of bytes read is always a multiple of
+    /// 8.
+    pub(crate) fn from_bytes(
+        slice: &[u8],
+    ) -> Result<(ByteClasses, usize), DeserializeError> {
+        wire::check_slice_len(slice, 256, "byte class map")?;
+        let mut classes = ByteClasses::empty();
+        for (b, &class) in slice[..256].iter().enumerate() {
+            classes.set(u8::try_from(b).unwrap(), class);
+        }
+        // We specifically don't use 'classes.iter()' here because that
+        // iterator depends on 'classes.alphabet_len()' being correct. But that
+        // is precisely the thing we're trying to verify below!
+        for &b in classes.0.iter() {
+            if usize::from(b) >= classes.alphabet_len() {
+                return Err(DeserializeError::generic(
+                    "found equivalence class greater than alphabet len",
+                ));
+            }
+        }
+        Ok((classes, 256))
+    }
+
+    /// Writes this byte class map to the given byte buffer. if the given
+    /// buffer is too small, then an error is returned. Upon success, the total
+    /// number of bytes written is returned. The number of bytes written is
+    /// guaranteed to be a multiple of 8.
+    pub(crate) fn write_to(
+        &self,
+        mut dst: &mut [u8],
+    ) -> Result<usize, SerializeError> {
+        let nwrite = self.write_to_len();
+        if dst.len() < nwrite {
+            return Err(SerializeError::buffer_too_small("byte class map"));
+        }
+        for b in 0..=255 {
+            dst[0] = self.get(b);
+            dst = &mut dst[1..];
+        }
+        Ok(nwrite)
+    }
+
+    /// Returns the total number of bytes written by `write_to`.
+    pub(crate) fn write_to_len(&self) -> usize {
+        256
+    }
+
+    /// Set the equivalence class for the given byte.
+    #[inline]
+    pub fn set(&mut self, byte: u8, class: u8) {
+        self.0[usize::from(byte)] = class;
+    }
+
+    /// Get the equivalence class for the given byte.
+    #[inline]
+    pub fn get(&self, byte: u8) -> u8 {
+        self.0[usize::from(byte)]
+    }
+
+    /// Get the equivalence class for the given haystack unit and return the
+    /// class as a `usize`.
+    #[inline]
+    pub fn get_by_unit(&self, unit: Unit) -> usize {
+        match unit.0 {
+            UnitKind::U8(b) => usize::from(self.get(b)),
+            UnitKind::EOI(b) => usize::from(b),
+        }
+    }
+
+    /// Create a unit that represents the "end of input" sentinel based on the
+    /// number of equivalence classes.
+    #[inline]
+    pub fn eoi(&self) -> Unit {
+        // The alphabet length already includes the EOI sentinel, hence why
+        // we subtract 1.
+        Unit::eoi(self.alphabet_len().checked_sub(1).unwrap())
+    }
+
+    /// Return the total number of elements in the alphabet represented by
+    /// these equivalence classes. Equivalently, this returns the total number
+    /// of equivalence classes.
+    #[inline]
+    pub fn alphabet_len(&self) -> usize {
+        // Add one since the number of equivalence classes is one bigger than
+        // the last one. But add another to account for the final EOI class
+        // that isn't explicitly represented.
+        usize::from(self.0[255]) + 1 + 1
+    }
+
+    /// Returns the stride, as a base-2 exponent, required for these
+    /// equivalence classes.
+    ///
+    /// The stride is always the smallest power of 2 that is greater than or
+    /// equal to the alphabet length, and the `stride2` returned here is the
+    /// exponent applied to `2` to get the smallest power. This is done so that
+    /// converting between premultiplied state IDs and indices can be done with
+    /// shifts alone, which is much faster than integer division.
+    #[inline]
+    pub fn stride2(&self) -> usize {
+        let zeros = self.alphabet_len().next_power_of_two().trailing_zeros();
+        usize::try_from(zeros).unwrap()
+    }
+
+    /// Returns true if and only if every byte in this class maps to its own
+    /// equivalence class. Equivalently, there are 257 equivalence classes
+    /// and each class contains either exactly one byte or corresponds to the
+    /// singleton class containing the "end of input" sentinel.
+    #[inline]
+    pub fn is_singleton(&self) -> bool {
+        self.alphabet_len() == 257
+    }
+
+    /// Returns an iterator over all equivalence classes in this set.
+    #[inline]
+    pub fn iter(&self) -> ByteClassIter<'_> {
+        ByteClassIter { classes: self, i: 0 }
+    }
+
+    /// Returns an iterator over a sequence of representative bytes from each
+    /// equivalence class within the range of bytes given.
+    ///
+    /// When the given range is unbounded on both sides, the iterator yields
+    /// exactly N items, where N is equivalent to the number of equivalence
+    /// classes. Each item is an arbitrary byte drawn from each equivalence
+    /// class.
+    ///
+    /// This is useful when one is determinizing an NFA and the NFA's alphabet
+    /// hasn't been converted to equivalence classes. Picking an arbitrary byte
+    /// from each equivalence class then permits a full exploration of the NFA
+    /// instead of using every possible byte value and thus potentially saves
+    /// quite a lot of redundant work.
+    ///
+    /// # Example
+    ///
+    /// This shows an example of what a complete sequence of representatives
+    /// might look like from a real example.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::NFA, util::alphabet::Unit};
+    ///
+    /// let nfa = NFA::new("[a-z]+")?;
+    /// let classes = nfa.byte_classes();
+    /// let reps: Vec<Unit> = classes.representatives(..).collect();
+    /// // Note that the specific byte values yielded are not guaranteed!
+    /// let expected = vec![
+    ///     Unit::u8(b'\x00'),
+    ///     Unit::u8(b'a'),
+    ///     Unit::u8(b'{'),
+    ///     Unit::eoi(3),
+    /// ];
+    /// assert_eq!(expected, reps);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// Note though, that you can ask for an arbitrary range of bytes, and only
+    /// representatives for that range will be returned:
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::NFA, util::alphabet::Unit};
+    ///
+    /// let nfa = NFA::new("[a-z]+")?;
+    /// let classes = nfa.byte_classes();
+    /// let reps: Vec<Unit> = classes.representatives(b'A'..=b'z').collect();
+    /// // Note that the specific byte values yielded are not guaranteed!
+    /// let expected = vec![
+    ///     Unit::u8(b'A'),
+    ///     Unit::u8(b'a'),
+    /// ];
+    /// assert_eq!(expected, reps);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn representatives<R: core::ops::RangeBounds<u8>>(
+        &self,
+        range: R,
+    ) -> ByteClassRepresentatives<'_> {
+        use core::ops::Bound;
+
+        let cur_byte = match range.start_bound() {
+            Bound::Included(&i) => usize::from(i),
+            Bound::Excluded(&i) => usize::from(i).checked_add(1).unwrap(),
+            Bound::Unbounded => 0,
+        };
+        let end_byte = match range.end_bound() {
+            Bound::Included(&i) => {
+                Some(usize::from(i).checked_add(1).unwrap())
+            }
+            Bound::Excluded(&i) => Some(usize::from(i)),
+            Bound::Unbounded => None,
+        };
+        assert_ne!(
+            cur_byte,
+            usize::MAX,
+            "start range must be less than usize::MAX",
+        );
+        ByteClassRepresentatives {
+            classes: self,
+            cur_byte,
+            end_byte,
+            last_class: None,
+        }
+    }
+
+    /// Returns an iterator of the bytes in the given equivalence class.
+    ///
+    /// This is useful when one needs to know the actual bytes that belong to
+    /// an equivalence class. For example, conceptually speaking, accelerating
+    /// a DFA state occurs when a state only has a few outgoing transitions.
+    /// But in reality, what is required is that there are only a small
+    /// number of distinct bytes that can lead to an outgoing transition. The
+    /// difference is that any one transition can correspond to an equivalence
+    /// class which may contains many bytes. Therefore, DFA state acceleration
+    /// considers the actual elements in each equivalence class of each
+    /// outgoing transition.
+    ///
+    /// # Example
+    ///
+    /// This shows an example of how to get all of the elements in an
+    /// equivalence class.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::NFA, util::alphabet::Unit};
+    ///
+    /// let nfa = NFA::new("[a-z]+")?;
+    /// let classes = nfa.byte_classes();
+    /// let elements: Vec<Unit> = classes.elements(Unit::u8(1)).collect();
+    /// let expected: Vec<Unit> = (b'a'..=b'z').map(Unit::u8).collect();
+    /// assert_eq!(expected, elements);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn elements(&self, class: Unit) -> ByteClassElements {
+        ByteClassElements { classes: self, class, byte: 0 }
+    }
+
+    /// Returns an iterator of byte ranges in the given equivalence class.
+    ///
+    /// That is, a sequence of contiguous ranges are returned. Typically, every
+    /// class maps to a single contiguous range.
+    fn element_ranges(&self, class: Unit) -> ByteClassElementRanges {
+        ByteClassElementRanges { elements: self.elements(class), range: None }
+    }
+}
+
+impl Default for ByteClasses {
+    fn default() -> ByteClasses {
+        ByteClasses::singletons()
+    }
+}
+
+impl core::fmt::Debug for ByteClasses {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        if self.is_singleton() {
+            write!(f, "ByteClasses({{singletons}})")
+        } else {
+            write!(f, "ByteClasses(")?;
+            for (i, class) in self.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{:?} => [", class.as_usize())?;
+                for (start, end) in self.element_ranges(class) {
+                    if start == end {
+                        write!(f, "{:?}", start)?;
+                    } else {
+                        write!(f, "{:?}-{:?}", start, end)?;
+                    }
+                }
+                write!(f, "]")?;
+            }
+            write!(f, ")")
+        }
+    }
+}
+
+/// An iterator over each equivalence class.
+///
+/// The last element in this iterator always corresponds to [`Unit::eoi`].
+///
+/// This is created by the [`ByteClasses::iter`] method.
+///
+/// The lifetime `'a` refers to the lifetime of the byte classes that this
+/// iterator was created from.
+#[derive(Debug)]
+pub struct ByteClassIter<'a> {
+    classes: &'a ByteClasses,
+    i: usize,
+}
+
+impl<'a> Iterator for ByteClassIter<'a> {
+    type Item = Unit;
+
+    fn next(&mut self) -> Option<Unit> {
+        if self.i + 1 == self.classes.alphabet_len() {
+            self.i += 1;
+            Some(self.classes.eoi())
+        } else if self.i < self.classes.alphabet_len() {
+            let class = u8::try_from(self.i).unwrap();
+            self.i += 1;
+            Some(Unit::u8(class))
+        } else {
+            None
+        }
+    }
+}
+
+/// An iterator over representative bytes from each equivalence class.
+///
+/// This is created by the [`ByteClasses::representatives`] method.
+///
+/// The lifetime `'a` refers to the lifetime of the byte classes that this
+/// iterator was created from.
+#[derive(Debug)]
+pub struct ByteClassRepresentatives<'a> {
+    classes: &'a ByteClasses,
+    cur_byte: usize,
+    end_byte: Option<usize>,
+    last_class: Option<u8>,
+}
+
+impl<'a> Iterator for ByteClassRepresentatives<'a> {
+    type Item = Unit;
+
+    fn next(&mut self) -> Option<Unit> {
+        while self.cur_byte < self.end_byte.unwrap_or(256) {
+            let byte = u8::try_from(self.cur_byte).unwrap();
+            let class = self.classes.get(byte);
+            self.cur_byte += 1;
+
+            if self.last_class != Some(class) {
+                self.last_class = Some(class);
+                return Some(Unit::u8(byte));
+            }
+        }
+        if self.cur_byte != usize::MAX && self.end_byte.is_none() {
+            // Using usize::MAX as a sentinel is OK because we ban usize::MAX
+            // from appearing as a start bound in iterator construction. But
+            // why do it this way? Well, we want to return the EOI class
+            // whenever the end of the given range is unbounded because EOI
+            // isn't really a "byte" per se, so the only way it should be
+            // excluded is if there is a bounded end to the range. Therefore,
+            // when the end is unbounded, we just need to know whether we've
+            // reported EOI or not. When we do, we set cur_byte to a value it
+            // can never otherwise be.
+            self.cur_byte = usize::MAX;
+            return Some(self.classes.eoi());
+        }
+        None
+    }
+}
+
+/// An iterator over all elements in an equivalence class.
+///
+/// This is created by the [`ByteClasses::elements`] method.
+///
+/// The lifetime `'a` refers to the lifetime of the byte classes that this
+/// iterator was created from.
+#[derive(Debug)]
+pub struct ByteClassElements<'a> {
+    classes: &'a ByteClasses,
+    class: Unit,
+    byte: usize,
+}
+
+impl<'a> Iterator for ByteClassElements<'a> {
+    type Item = Unit;
+
+    fn next(&mut self) -> Option<Unit> {
+        while self.byte < 256 {
+            let byte = u8::try_from(self.byte).unwrap();
+            self.byte += 1;
+            if self.class.is_byte(self.classes.get(byte)) {
+                return Some(Unit::u8(byte));
+            }
+        }
+        if self.byte < 257 {
+            self.byte += 1;
+            if self.class.is_eoi() {
+                return Some(Unit::eoi(256));
+            }
+        }
+        None
+    }
+}
+
+/// An iterator over all elements in an equivalence class expressed as a
+/// sequence of contiguous ranges.
+#[derive(Debug)]
+struct ByteClassElementRanges<'a> {
+    elements: ByteClassElements<'a>,
+    range: Option<(Unit, Unit)>,
+}
+
+impl<'a> Iterator for ByteClassElementRanges<'a> {
+    type Item = (Unit, Unit);
+
+    fn next(&mut self) -> Option<(Unit, Unit)> {
+        loop {
+            let element = match self.elements.next() {
+                None => return self.range.take(),
+                Some(element) => element,
+            };
+            match self.range.take() {
+                None => {
+                    self.range = Some((element, element));
+                }
+                Some((start, end)) => {
+                    if end.as_usize() + 1 != element.as_usize()
+                        || element.is_eoi()
+                    {
+                        self.range = Some((element, element));
+                        return Some((start, end));
+                    }
+                    self.range = Some((start, element));
+                }
+            }
+        }
+    }
+}
+
+/// A partitioning of bytes into equivalence classes.
+///
+/// A byte class set keeps track of an *approximation* of equivalence classes
+/// of bytes during NFA construction. That is, every byte in an equivalence
+/// class cannot discriminate between a match and a non-match.
+///
+/// For example, in the regex `[ab]+`, the bytes `a` and `b` would be in the
+/// same equivalence class because it never matters whether an `a` or a `b` is
+/// seen, and no combination of `a`s and `b`s in the text can discriminate a
+/// match.
+///
+/// Note though that this does not compute the minimal set of equivalence
+/// classes. For example, in the regex `[ac]+`, both `a` and `c` are in the
+/// same equivalence class for the same reason that `a` and `b` are in the
+/// same equivalence class in the aforementioned regex. However, in this
+/// implementation, `a` and `c` are put into distinct equivalence classes. The
+/// reason for this is implementation complexity. In the future, we should
+/// endeavor to compute the minimal equivalence classes since they can have a
+/// rather large impact on the size of the DFA. (Doing this will likely require
+/// rethinking how equivalence classes are computed, including changing the
+/// representation here, which is only able to group contiguous bytes into the
+/// same equivalence class.)
+#[cfg(feature = "alloc")]
+#[derive(Clone, Debug)]
+pub(crate) struct ByteClassSet(ByteSet);
+
+#[cfg(feature = "alloc")]
+impl Default for ByteClassSet {
+    fn default() -> ByteClassSet {
+        ByteClassSet::empty()
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl ByteClassSet {
+    /// Create a new set of byte classes where all bytes are part of the same
+    /// equivalence class.
+    pub(crate) fn empty() -> Self {
+        ByteClassSet(ByteSet::empty())
+    }
+
+    /// Indicate the the range of byte given (inclusive) can discriminate a
+    /// match between it and all other bytes outside of the range.
+    pub(crate) fn set_range(&mut self, start: u8, end: u8) {
+        debug_assert!(start <= end);
+        if start > 0 {
+            self.0.add(start - 1);
+        }
+        self.0.add(end);
+    }
+
+    /// Add the contiguous ranges in the set given to this byte class set.
+    pub(crate) fn add_set(&mut self, set: &ByteSet) {
+        for (start, end) in set.iter_ranges() {
+            self.set_range(start, end);
+        }
+    }
+
+    /// Convert this boolean set to a map that maps all byte values to their
+    /// corresponding equivalence class. The last mapping indicates the largest
+    /// equivalence class identifier (which is never bigger than 255).
+    pub(crate) fn byte_classes(&self) -> ByteClasses {
+        let mut classes = ByteClasses::empty();
+        let mut class = 0u8;
+        let mut b = 0u8;
+        loop {
+            classes.set(b, class);
+            if b == 255 {
+                break;
+            }
+            if self.0.contains(b) {
+                class = class.checked_add(1).unwrap();
+            }
+            b = b.checked_add(1).unwrap();
+        }
+        classes
+    }
+}
+
+/// A simple set of bytes that is reasonably cheap to copy and allocation free.
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
+pub(crate) struct ByteSet {
+    bits: BitSet,
+}
+
+/// The representation of a byte set. Split out so that we can define a
+/// convenient Debug impl for it while keeping "ByteSet" in the output.
+#[derive(Clone, Copy, Default, Eq, PartialEq)]
+struct BitSet([u128; 2]);
+
+impl ByteSet {
+    /// Create an empty set of bytes.
+    pub(crate) fn empty() -> ByteSet {
+        ByteSet { bits: BitSet([0; 2]) }
+    }
+
+    /// Add a byte to this set.
+    ///
+    /// If the given byte already belongs to this set, then this is a no-op.
+    pub(crate) fn add(&mut self, byte: u8) {
+        let bucket = byte / 128;
+        let bit = byte % 128;
+        self.bits.0[usize::from(bucket)] |= 1 << bit;
+    }
+
+    /// Remove a byte from this set.
+    ///
+    /// If the given byte is not in this set, then this is a no-op.
+    pub(crate) fn remove(&mut self, byte: u8) {
+        let bucket = byte / 128;
+        let bit = byte % 128;
+        self.bits.0[usize::from(bucket)] &= !(1 << bit);
+    }
+
+    /// Return true if and only if the given byte is in this set.
+    pub(crate) fn contains(&self, byte: u8) -> bool {
+        let bucket = byte / 128;
+        let bit = byte % 128;
+        self.bits.0[usize::from(bucket)] & (1 << bit) > 0
+    }
+
+    /// Return true if and only if the given inclusive range of bytes is in
+    /// this set.
+    pub(crate) fn contains_range(&self, start: u8, end: u8) -> bool {
+        (start..=end).all(|b| self.contains(b))
+    }
+
+    /// Returns an iterator over all bytes in this set.
+    pub(crate) fn iter(&self) -> ByteSetIter {
+        ByteSetIter { set: self, b: 0 }
+    }
+
+    /// Returns an iterator over all contiguous ranges of bytes in this set.
+    pub(crate) fn iter_ranges(&self) -> ByteSetRangeIter {
+        ByteSetRangeIter { set: self, b: 0 }
+    }
+
+    /// Return true if and only if this set is empty.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn is_empty(&self) -> bool {
+        self.bits.0 == [0, 0]
+    }
+
+    /// Deserializes a byte set from the given slice. If the slice is of
+    /// incorrect length or is otherwise malformed, then an error is returned.
+    /// Upon success, the number of bytes read along with the set are returned.
+    /// The number of bytes read is always a multiple of 8.
+    pub(crate) fn from_bytes(
+        slice: &[u8],
+    ) -> Result<(ByteSet, usize), DeserializeError> {
+        use core::mem::size_of;
+
+        wire::check_slice_len(slice, 2 * size_of::<u128>(), "byte set")?;
+        let mut nread = 0;
+        let (low, nr) = wire::try_read_u128(slice, "byte set low bucket")?;
+        nread += nr;
+        let (high, nr) = wire::try_read_u128(slice, "byte set high bucket")?;
+        nread += nr;
+        Ok((ByteSet { bits: BitSet([low, high]) }, nread))
+    }
+
+    /// Writes this byte set to the given byte buffer. If the given buffer is
+    /// too small, then an error is returned. Upon success, the total number of
+    /// bytes written is returned. The number of bytes written is guaranteed to
+    /// be a multiple of 8.
+    pub(crate) fn write_to<E: crate::util::wire::Endian>(
+        &self,
+        dst: &mut [u8],
+    ) -> Result<usize, SerializeError> {
+        use core::mem::size_of;
+
+        let nwrite = self.write_to_len();
+        if dst.len() < nwrite {
+            return Err(SerializeError::buffer_too_small("byte set"));
+        }
+        let mut nw = 0;
+        E::write_u128(self.bits.0[0], &mut dst[nw..]);
+        nw += size_of::<u128>();
+        E::write_u128(self.bits.0[1], &mut dst[nw..]);
+        nw += size_of::<u128>();
+        assert_eq!(nwrite, nw, "expected to write certain number of bytes",);
+        assert_eq!(
+            nw % 8,
+            0,
+            "expected to write multiple of 8 bytes for byte set",
+        );
+        Ok(nw)
+    }
+
+    /// Returns the total number of bytes written by `write_to`.
+    pub(crate) fn write_to_len(&self) -> usize {
+        2 * core::mem::size_of::<u128>()
+    }
+}
+
+impl core::fmt::Debug for BitSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let mut fmtd = f.debug_set();
+        for b in 0u8..=255 {
+            if (ByteSet { bits: *self }).contains(b) {
+                fmtd.entry(&b);
+            }
+        }
+        fmtd.finish()
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct ByteSetIter<'a> {
+    set: &'a ByteSet,
+    b: usize,
+}
+
+impl<'a> Iterator for ByteSetIter<'a> {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<u8> {
+        while self.b <= 255 {
+            let b = u8::try_from(self.b).unwrap();
+            self.b += 1;
+            if self.set.contains(b) {
+                return Some(b);
+            }
+        }
+        None
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct ByteSetRangeIter<'a> {
+    set: &'a ByteSet,
+    b: usize,
+}
+
+impl<'a> Iterator for ByteSetRangeIter<'a> {
+    type Item = (u8, u8);
+
+    fn next(&mut self) -> Option<(u8, u8)> {
+        let asu8 = |n: usize| u8::try_from(n).unwrap();
+        while self.b <= 255 {
+            let start = asu8(self.b);
+            self.b += 1;
+            if !self.set.contains(start) {
+                continue;
+            }
+
+            let mut end = start;
+            while self.b <= 255 && self.set.contains(asu8(self.b)) {
+                end = asu8(self.b);
+                self.b += 1;
+            }
+            return Some((start, end));
+        }
+        None
+    }
+}
+
+#[cfg(all(test, feature = "alloc"))]
+mod tests {
+    use alloc::{vec, vec::Vec};
+
+    use super::*;
+
+    #[test]
+    fn byte_classes() {
+        let mut set = ByteClassSet::empty();
+        set.set_range(b'a', b'z');
+
+        let classes = set.byte_classes();
+        assert_eq!(classes.get(0), 0);
+        assert_eq!(classes.get(1), 0);
+        assert_eq!(classes.get(2), 0);
+        assert_eq!(classes.get(b'a' - 1), 0);
+        assert_eq!(classes.get(b'a'), 1);
+        assert_eq!(classes.get(b'm'), 1);
+        assert_eq!(classes.get(b'z'), 1);
+        assert_eq!(classes.get(b'z' + 1), 2);
+        assert_eq!(classes.get(254), 2);
+        assert_eq!(classes.get(255), 2);
+
+        let mut set = ByteClassSet::empty();
+        set.set_range(0, 2);
+        set.set_range(4, 6);
+        let classes = set.byte_classes();
+        assert_eq!(classes.get(0), 0);
+        assert_eq!(classes.get(1), 0);
+        assert_eq!(classes.get(2), 0);
+        assert_eq!(classes.get(3), 1);
+        assert_eq!(classes.get(4), 2);
+        assert_eq!(classes.get(5), 2);
+        assert_eq!(classes.get(6), 2);
+        assert_eq!(classes.get(7), 3);
+        assert_eq!(classes.get(255), 3);
+    }
+
+    #[test]
+    fn full_byte_classes() {
+        let mut set = ByteClassSet::empty();
+        for b in 0u8..=255 {
+            set.set_range(b, b);
+        }
+        assert_eq!(set.byte_classes().alphabet_len(), 257);
+    }
+
+    #[test]
+    fn elements_typical() {
+        let mut set = ByteClassSet::empty();
+        set.set_range(b'b', b'd');
+        set.set_range(b'g', b'm');
+        set.set_range(b'z', b'z');
+        let classes = set.byte_classes();
+        // class 0: \x00-a
+        // class 1: b-d
+        // class 2: e-f
+        // class 3: g-m
+        // class 4: n-y
+        // class 5: z-z
+        // class 6: \x7B-\xFF
+        // class 7: EOI
+        assert_eq!(classes.alphabet_len(), 8);
+
+        let elements = classes.elements(Unit::u8(0)).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 98);
+        assert_eq!(elements[0], Unit::u8(b'\x00'));
+        assert_eq!(elements[97], Unit::u8(b'a'));
+
+        let elements = classes.elements(Unit::u8(1)).collect::<Vec<_>>();
+        assert_eq!(
+            elements,
+            vec![Unit::u8(b'b'), Unit::u8(b'c'), Unit::u8(b'd')],
+        );
+
+        let elements = classes.elements(Unit::u8(2)).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::u8(b'e'), Unit::u8(b'f')],);
+
+        let elements = classes.elements(Unit::u8(3)).collect::<Vec<_>>();
+        assert_eq!(
+            elements,
+            vec![
+                Unit::u8(b'g'),
+                Unit::u8(b'h'),
+                Unit::u8(b'i'),
+                Unit::u8(b'j'),
+                Unit::u8(b'k'),
+                Unit::u8(b'l'),
+                Unit::u8(b'm'),
+            ],
+        );
+
+        let elements = classes.elements(Unit::u8(4)).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 12);
+        assert_eq!(elements[0], Unit::u8(b'n'));
+        assert_eq!(elements[11], Unit::u8(b'y'));
+
+        let elements = classes.elements(Unit::u8(5)).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::u8(b'z')]);
+
+        let elements = classes.elements(Unit::u8(6)).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 133);
+        assert_eq!(elements[0], Unit::u8(b'\x7B'));
+        assert_eq!(elements[132], Unit::u8(b'\xFF'));
+
+        let elements = classes.elements(Unit::eoi(7)).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::eoi(256)]);
+    }
+
+    #[test]
+    fn elements_singletons() {
+        let classes = ByteClasses::singletons();
+        assert_eq!(classes.alphabet_len(), 257);
+
+        let elements = classes.elements(Unit::u8(b'a')).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::u8(b'a')]);
+
+        let elements = classes.elements(Unit::eoi(5)).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::eoi(256)]);
+    }
+
+    #[test]
+    fn elements_empty() {
+        let classes = ByteClasses::empty();
+        assert_eq!(classes.alphabet_len(), 2);
+
+        let elements = classes.elements(Unit::u8(0)).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 256);
+        assert_eq!(elements[0], Unit::u8(b'\x00'));
+        assert_eq!(elements[255], Unit::u8(b'\xFF'));
+
+        let elements = classes.elements(Unit::eoi(1)).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::eoi(256)]);
+    }
+
+    #[test]
+    fn representatives() {
+        let mut set = ByteClassSet::empty();
+        set.set_range(b'b', b'd');
+        set.set_range(b'g', b'm');
+        set.set_range(b'z', b'z');
+        let classes = set.byte_classes();
+
+        let got: Vec<Unit> = classes.representatives(..).collect();
+        let expected = vec![
+            Unit::u8(b'\x00'),
+            Unit::u8(b'b'),
+            Unit::u8(b'e'),
+            Unit::u8(b'g'),
+            Unit::u8(b'n'),
+            Unit::u8(b'z'),
+            Unit::u8(b'\x7B'),
+            Unit::eoi(7),
+        ];
+        assert_eq!(expected, got);
+
+        let got: Vec<Unit> = classes.representatives(..0).collect();
+        assert!(got.is_empty());
+        let got: Vec<Unit> = classes.representatives(1..1).collect();
+        assert!(got.is_empty());
+        let got: Vec<Unit> = classes.representatives(255..255).collect();
+        assert!(got.is_empty());
+
+        // A weird case that is the only guaranteed to way to get an iterator
+        // of just the EOI class by excluding all possible byte values.
+        let got: Vec<Unit> = classes
+            .representatives((
+                core::ops::Bound::Excluded(255),
+                core::ops::Bound::Unbounded,
+            ))
+            .collect();
+        let expected = vec![Unit::eoi(7)];
+        assert_eq!(expected, got);
+
+        let got: Vec<Unit> = classes.representatives(..=255).collect();
+        let expected = vec![
+            Unit::u8(b'\x00'),
+            Unit::u8(b'b'),
+            Unit::u8(b'e'),
+            Unit::u8(b'g'),
+            Unit::u8(b'n'),
+            Unit::u8(b'z'),
+            Unit::u8(b'\x7B'),
+        ];
+        assert_eq!(expected, got);
+
+        let got: Vec<Unit> = classes.representatives(b'b'..=b'd').collect();
+        let expected = vec![Unit::u8(b'b')];
+        assert_eq!(expected, got);
+
+        let got: Vec<Unit> = classes.representatives(b'a'..=b'd').collect();
+        let expected = vec![Unit::u8(b'a'), Unit::u8(b'b')];
+        assert_eq!(expected, got);
+
+        let got: Vec<Unit> = classes.representatives(b'b'..=b'e').collect();
+        let expected = vec![Unit::u8(b'b'), Unit::u8(b'e')];
+        assert_eq!(expected, got);
+
+        let got: Vec<Unit> = classes.representatives(b'A'..=b'Z').collect();
+        let expected = vec![Unit::u8(b'A')];
+        assert_eq!(expected, got);
+
+        let got: Vec<Unit> = classes.representatives(b'A'..=b'z').collect();
+        let expected = vec![
+            Unit::u8(b'A'),
+            Unit::u8(b'b'),
+            Unit::u8(b'e'),
+            Unit::u8(b'g'),
+            Unit::u8(b'n'),
+            Unit::u8(b'z'),
+        ];
+        assert_eq!(expected, got);
+
+        let got: Vec<Unit> = classes.representatives(b'z'..).collect();
+        let expected = vec![Unit::u8(b'z'), Unit::u8(b'\x7B'), Unit::eoi(7)];
+        assert_eq!(expected, got);
+
+        let got: Vec<Unit> = classes.representatives(b'z'..=0xFF).collect();
+        let expected = vec![Unit::u8(b'z'), Unit::u8(b'\x7B')];
+        assert_eq!(expected, got);
+    }
+}
diff --git a/vendor/regex-automata/src/util/captures.rs b/vendor/regex-automata/src/util/captures.rs
new file mode 100644
index 0000000..05db6a9
--- /dev/null
+++ b/vendor/regex-automata/src/util/captures.rs
@@ -0,0 +1,2548 @@
+/*!
+Provides types for dealing with capturing groups.
+
+Capturing groups refer to sub-patterns of regexes that some regex engines can
+report matching offsets for. For example, matching `[a-z]([0-9]+)` against
+`a789` would give `a789` as the overall match (for the implicit capturing group
+at index `0`) and `789` as the match for the capturing group `([0-9]+)` (an
+explicit capturing group at index `1`).
+
+Not all regex engines can report match offsets for capturing groups. Indeed,
+to a first approximation, regex engines that can report capturing group offsets
+tend to be quite a bit slower than regex engines that can't. This is because
+tracking capturing groups at search time usually requires more "power" that
+in turn adds overhead.
+
+Other regex implementations might call capturing groups "submatches."
+
+# Overview
+
+The main types in this module are:
+
+* [`Captures`] records the capturing group offsets found during a search. It
+provides convenience routines for looking up capturing group offsets by either
+index or name.
+* [`GroupInfo`] records the mapping between capturing groups and "slots,"
+where the latter are how capturing groups are recorded during a regex search.
+This also keeps a mapping from capturing group name to index, and capture
+group index to name. A `GroupInfo` is used by `Captures` internally to
+provide a convenient API. It is unlikely that you'll use a `GroupInfo`
+directly, but for example, if you've compiled an Thompson NFA, then you can use
+[`thompson::NFA::group_info`](crate::nfa::thompson::NFA::group_info) to get its
+underlying `GroupInfo`.
+*/
+
+use alloc::{string::String, sync::Arc, vec, vec::Vec};
+
+use crate::util::{
+    interpolate,
+    primitives::{
+        NonMaxUsize, PatternID, PatternIDError, PatternIDIter, SmallIndex,
+    },
+    search::{Match, Span},
+};
+
+/// The span offsets of capturing groups after a match has been found.
+///
+/// This type represents the output of regex engines that can report the
+/// offsets at which capturing groups matches or "submatches" occur. For
+/// example, the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM). When a match
+/// occurs, it will at minimum contain the [`PatternID`] of the pattern that
+/// matched. Depending upon how it was constructed, it may also contain the
+/// start/end offsets of the entire match of the pattern and the start/end
+/// offsets of each capturing group that participated in the match.
+///
+/// Values of this type are always created for a specific [`GroupInfo`]. It is
+/// unspecified behavior to use a `Captures` value in a search with any regex
+/// engine that has a different `GroupInfo` than the one the `Captures` were
+/// created with.
+///
+/// # Constructors
+///
+/// There are three constructors for this type that control what kind of
+/// information is available upon a match:
+///
+/// * [`Captures::all`]: Will store overall pattern match offsets in addition
+/// to the offsets of capturing groups that participated in the match.
+/// * [`Captures::matches`]: Will store only the overall pattern
+/// match offsets. The offsets of capturing groups (even ones that participated
+/// in the match) are not available.
+/// * [`Captures::empty`]: Will only store the pattern ID that matched. No
+/// match offsets are available at all.
+///
+/// If you aren't sure which to choose, then pick the first one. The first one
+/// is what convenience routines like,
+/// [`PikeVM::create_captures`](crate::nfa::thompson::pikevm::PikeVM::create_captures),
+/// will use automatically.
+///
+/// The main difference between these choices is performance. Namely, if you
+/// ask for _less_ information, then the execution of regex search may be able
+/// to run more quickly.
+///
+/// # Notes
+///
+/// It is worth pointing out that this type is not coupled to any one specific
+/// regex engine. Instead, its coupling is with [`GroupInfo`], which is the
+/// thing that is responsible for mapping capturing groups to "slot" offsets.
+/// Slot offsets are indices into a single sequence of memory at which matching
+/// haystack offsets for the corresponding group are written by regex engines.
+///
+/// # Example
+///
+/// This example shows how to parse a simple date and extract the components of
+/// the date via capturing groups:
+///
+/// ```
+/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
+///
+/// let re = PikeVM::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?;
+/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+///
+/// re.captures(&mut cache, "2010-03-14", &mut caps);
+/// assert!(caps.is_match());
+/// assert_eq!(Some(Span::from(0..4)), caps.get_group(1));
+/// assert_eq!(Some(Span::from(5..7)), caps.get_group(2));
+/// assert_eq!(Some(Span::from(8..10)), caps.get_group(3));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// # Example: named capturing groups
+///
+/// This example is like the one above, but leverages the ability to name
+/// capturing groups in order to make the code a bit clearer:
+///
+/// ```
+/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
+///
+/// let re = PikeVM::new(r"^(?P<y>[0-9]{4})-(?P<m>[0-9]{2})-(?P<d>[0-9]{2})$")?;
+/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+///
+/// re.captures(&mut cache, "2010-03-14", &mut caps);
+/// assert!(caps.is_match());
+/// assert_eq!(Some(Span::from(0..4)), caps.get_group_by_name("y"));
+/// assert_eq!(Some(Span::from(5..7)), caps.get_group_by_name("m"));
+/// assert_eq!(Some(Span::from(8..10)), caps.get_group_by_name("d"));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone)]
+pub struct Captures {
+    /// The group info that these capture groups are coupled to. This is what
+    /// gives the "convenience" of the `Captures` API. Namely, it provides the
+    /// slot mapping and the name|-->index mapping for capture lookups by name.
+    group_info: GroupInfo,
+    /// The ID of the pattern that matched. Regex engines must set this to
+    /// None when no match occurs.
+    pid: Option<PatternID>,
+    /// The slot values, i.e., submatch offsets.
+    ///
+    /// In theory, the smallest sequence of slots would be something like
+    /// `max(groups(pattern) for pattern in regex) * 2`, but instead, we use
+    /// `sum(groups(pattern) for pattern in regex) * 2`. Why?
+    ///
+    /// Well, the former could be used in theory, because we don't generally
+    /// have any overlapping APIs that involve capturing groups. Therefore,
+    /// there's technically never any need to have slots set for multiple
+    /// patterns. However, this might change some day, in which case, we would
+    /// need to have slots available.
+    ///
+    /// The other reason is that during the execution of some regex engines,
+    /// there exists a point in time where multiple slots for different
+    /// patterns may be written to before knowing which pattern has matched.
+    /// Therefore, the regex engines themselves, in order to support multiple
+    /// patterns correctly, must have all slots available. If `Captures`
+    /// doesn't have all slots available, then regex engines can't write
+    /// directly into the caller provided `Captures` and must instead write
+    /// into some other storage and then copy the slots involved in the match
+    /// at the end of the search.
+    ///
+    /// So overall, at least as of the time of writing, it seems like the path
+    /// of least resistance is to just require allocating all possible slots
+    /// instead of the conceptual minimum. Another way to justify this is that
+    /// the most common case is a single pattern, in which case, there is no
+    /// inefficiency here since the 'max' and 'sum' calculations above are
+    /// equivalent in that case.
+    ///
+    /// N.B. The mapping from group index to slot is maintained by `GroupInfo`
+    /// and is considered an API guarantee. See `GroupInfo` for more details on
+    /// that mapping.
+    ///
+    /// N.B. `Option<NonMaxUsize>` has the same size as a `usize`.
+    slots: Vec<Option<NonMaxUsize>>,
+}
+
+impl Captures {
+    /// Create new storage for the offsets of all matching capturing groups.
+    ///
+    /// This routine provides the most information for matches---namely, the
+    /// spans of matching capturing groups---but also requires the regex search
+    /// routines to do the most work.
+    ///
+    /// It is unspecified behavior to use the returned `Captures` value in a
+    /// search with a `GroupInfo` other than the one that is provided to this
+    /// constructor.
+    ///
+    /// # Example
+    ///
+    /// This example shows that all capturing groups---but only ones that
+    /// participated in a match---are available to query after a match has
+    /// been found:
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     util::captures::Captures,
+    ///     Span, Match,
+    /// };
+    ///
+    /// let re = PikeVM::new(
+    ///     r"^(?:(?P<lower>[a-z]+)|(?P<upper>[A-Z]+))(?P<digits>[0-9]+)$",
+    /// )?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = Captures::all(re.get_nfa().group_info().clone());
+    ///
+    /// re.captures(&mut cache, "ABC123", &mut caps);
+    /// assert!(caps.is_match());
+    /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match());
+    /// // The 'lower' group didn't match, so it won't have any offsets.
+    /// assert_eq!(None, caps.get_group_by_name("lower"));
+    /// assert_eq!(Some(Span::from(0..3)), caps.get_group_by_name("upper"));
+    /// assert_eq!(Some(Span::from(3..6)), caps.get_group_by_name("digits"));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn all(group_info: GroupInfo) -> Captures {
+        let slots = group_info.slot_len();
+        Captures { group_info, pid: None, slots: vec![None; slots] }
+    }
+
+    /// Create new storage for only the full match spans of a pattern. This
+    /// does not include any capturing group offsets.
+    ///
+    /// It is unspecified behavior to use the returned `Captures` value in a
+    /// search with a `GroupInfo` other than the one that is provided to this
+    /// constructor.
+    ///
+    /// # Example
+    ///
+    /// This example shows that only overall match offsets are reported when
+    /// this constructor is used. Accessing any capturing groups other than
+    /// the 0th will always return `None`.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     util::captures::Captures,
+    ///     Match,
+    /// };
+    ///
+    /// let re = PikeVM::new(
+    ///     r"^(?:(?P<lower>[a-z]+)|(?P<upper>[A-Z]+))(?P<digits>[0-9]+)$",
+    /// )?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = Captures::matches(re.get_nfa().group_info().clone());
+    ///
+    /// re.captures(&mut cache, "ABC123", &mut caps);
+    /// assert!(caps.is_match());
+    /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match());
+    /// // We didn't ask for capturing group offsets, so they aren't available.
+    /// assert_eq!(None, caps.get_group_by_name("lower"));
+    /// assert_eq!(None, caps.get_group_by_name("upper"));
+    /// assert_eq!(None, caps.get_group_by_name("digits"));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn matches(group_info: GroupInfo) -> Captures {
+        // This is OK because we know there are at least this many slots,
+        // and GroupInfo construction guarantees that the number of slots fits
+        // into a usize.
+        let slots = group_info.pattern_len().checked_mul(2).unwrap();
+        Captures { group_info, pid: None, slots: vec![None; slots] }
+    }
+
+    /// Create new storage for only tracking which pattern matched. No offsets
+    /// are stored at all.
+    ///
+    /// It is unspecified behavior to use the returned `Captures` value in a
+    /// search with a `GroupInfo` other than the one that is provided to this
+    /// constructor.
+    ///
+    /// # Example
+    ///
+    /// This example shows that only the pattern that matched can be accessed
+    /// from a `Captures` value created via this constructor.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     util::captures::Captures,
+    ///     PatternID,
+    /// };
+    ///
+    /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
+    ///
+    /// re.captures(&mut cache, "aABCz", &mut caps);
+    /// assert!(caps.is_match());
+    /// assert_eq!(Some(PatternID::must(0)), caps.pattern());
+    /// // We didn't ask for any offsets, so they aren't available.
+    /// assert_eq!(None, caps.get_match());
+    ///
+    /// re.captures(&mut cache, &"aABCz"[1..], &mut caps);
+    /// assert!(caps.is_match());
+    /// assert_eq!(Some(PatternID::must(1)), caps.pattern());
+    /// // We didn't ask for any offsets, so they aren't available.
+    /// assert_eq!(None, caps.get_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn empty(group_info: GroupInfo) -> Captures {
+        Captures { group_info, pid: None, slots: vec![] }
+    }
+
+    /// Returns true if and only if this capturing group represents a match.
+    ///
+    /// This is a convenience routine for `caps.pattern().is_some()`.
+    ///
+    /// # Example
+    ///
+    /// When using the PikeVM (for example), the lightest weight way of
+    /// detecting whether a match exists is to create capturing groups that
+    /// only track the ID of the pattern that match (if any):
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     util::captures::Captures,
+    /// };
+    ///
+    /// let re = PikeVM::new(r"[a-z]+")?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
+    ///
+    /// re.captures(&mut cache, "aABCz", &mut caps);
+    /// assert!(caps.is_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn is_match(&self) -> bool {
+        self.pid.is_some()
+    }
+
+    /// Returns the identifier of the pattern that matched when this
+    /// capturing group represents a match. If no match was found, then this
+    /// always returns `None`.
+    ///
+    /// This returns a pattern ID in precisely the cases in which `is_match`
+    /// returns `true`. Similarly, the pattern ID returned is always the
+    /// same pattern ID found in the `Match` returned by `get_match`.
+    ///
+    /// # Example
+    ///
+    /// When using the PikeVM (for example), the lightest weight way of
+    /// detecting which pattern matched is to create capturing groups that only
+    /// track the ID of the pattern that match (if any):
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     util::captures::Captures,
+    ///     PatternID,
+    /// };
+    ///
+    /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = Captures::empty(re.get_nfa().group_info().clone());
+    ///
+    /// re.captures(&mut cache, "ABC", &mut caps);
+    /// assert_eq!(Some(PatternID::must(1)), caps.pattern());
+    /// // Recall that offsets are only available when using a non-empty
+    /// // Captures value. So even though a match occurred, this returns None!
+    /// assert_eq!(None, caps.get_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn pattern(&self) -> Option<PatternID> {
+        self.pid
+    }
+
+    /// Returns the pattern ID and the span of the match, if one occurred.
+    ///
+    /// This always returns `None` when `Captures` was created with
+    /// [`Captures::empty`], even if a match was found.
+    ///
+    /// If this routine returns a non-`None` value, then `is_match` is
+    /// guaranteed to return `true` and `pattern` is also guaranteed to return
+    /// a non-`None` value.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to get the full match from a search:
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
+    ///
+    /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// re.captures(&mut cache, "ABC", &mut caps);
+    /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn get_match(&self) -> Option<Match> {
+        Some(Match::new(self.pattern()?, self.get_group(0)?))
+    }
+
+    /// Returns the span of a capturing group match corresponding to the group
+    /// index given, only if both the overall pattern matched and the capturing
+    /// group participated in that match.
+    ///
+    /// This returns `None` if `index` is invalid. `index` is valid if and only
+    /// if it's less than [`Captures::group_len`] for the matching pattern.
+    ///
+    /// This always returns `None` when `Captures` was created with
+    /// [`Captures::empty`], even if a match was found. This also always
+    /// returns `None` for any `index > 0` when `Captures` was created with
+    /// [`Captures::matches`].
+    ///
+    /// If this routine returns a non-`None` value, then `is_match` is
+    /// guaranteed to return `true`, `pattern` is guaranteed to return a
+    /// non-`None` value and `get_match` is guaranteed to return a non-`None`
+    /// value.
+    ///
+    /// By convention, the 0th capture group will always return the same
+    /// span as the span returned by `get_match`. This is because the 0th
+    /// capture group always corresponds to the entirety of the pattern's
+    /// match. (It is similarly always unnamed because it is implicit.) This
+    /// isn't necessarily true of all regex engines. For example, one can
+    /// hand-compile a [`thompson::NFA`](crate::nfa::thompson::NFA) via a
+    /// [`thompson::Builder`](crate::nfa::thompson::Builder), which isn't
+    /// technically forced to make the 0th capturing group always correspond to
+    /// the entire match.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to get the capturing groups, by index, from a
+    /// match:
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match};
+    ///
+    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
+    /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match());
+    /// assert_eq!(Some(Span::from(0..5)), caps.get_group(1));
+    /// assert_eq!(Some(Span::from(6..17)), caps.get_group(2));
+    /// // Looking for a non-existent capturing group will return None:
+    /// assert_eq!(None, caps.get_group(3));
+    /// # // literals are too big for 32-bit usize: #1039
+    /// # #[cfg(target_pointer_width = "64")]
+    /// assert_eq!(None, caps.get_group(9944060567225171988));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn get_group(&self, index: usize) -> Option<Span> {
+        let pid = self.pattern()?;
+        // There's a little bit of work needed to map captures to slots in the
+        // fully general case. But in the overwhelming common case of a single
+        // pattern, we can just do some simple arithmetic.
+        let (slot_start, slot_end) = if self.group_info().pattern_len() == 1 {
+            (index.checked_mul(2)?, index.checked_mul(2)?.checked_add(1)?)
+        } else {
+            self.group_info().slots(pid, index)?
+        };
+        let start = self.slots.get(slot_start).copied()??;
+        let end = self.slots.get(slot_end).copied()??;
+        Some(Span { start: start.get(), end: end.get() })
+    }
+
+    /// Returns the span of a capturing group match corresponding to the group
+    /// name given, only if both the overall pattern matched and the capturing
+    /// group participated in that match.
+    ///
+    /// This returns `None` if `name` does not correspond to a valid capturing
+    /// group for the pattern that matched.
+    ///
+    /// This always returns `None` when `Captures` was created with
+    /// [`Captures::empty`], even if a match was found. This also always
+    /// returns `None` for any `index > 0` when `Captures` was created with
+    /// [`Captures::matches`].
+    ///
+    /// If this routine returns a non-`None` value, then `is_match` is
+    /// guaranteed to return `true`, `pattern` is guaranteed to return a
+    /// non-`None` value and `get_match` is guaranteed to return a non-`None`
+    /// value.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to get the capturing groups, by name, from a
+    /// match:
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match};
+    ///
+    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
+    /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match());
+    /// assert_eq!(Some(Span::from(0..5)), caps.get_group_by_name("first"));
+    /// assert_eq!(Some(Span::from(6..17)), caps.get_group_by_name("last"));
+    /// // Looking for a non-existent capturing group will return None:
+    /// assert_eq!(None, caps.get_group_by_name("middle"));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn get_group_by_name(&self, name: &str) -> Option<Span> {
+        let index = self.group_info().to_index(self.pattern()?, name)?;
+        self.get_group(index)
+    }
+
+    /// Returns an iterator of possible spans for every capturing group in the
+    /// matching pattern.
+    ///
+    /// If this `Captures` value does not correspond to a match, then the
+    /// iterator returned yields no elements.
+    ///
+    /// Note that the iterator returned yields elements of type `Option<Span>`.
+    /// A span is present if and only if it corresponds to a capturing group
+    /// that participated in a match.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to collect all capturing groups:
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
+    ///
+    /// let re = PikeVM::new(
+    ///     // Matches first/last names, with an optional middle name.
+    ///     r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
+    /// )?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// re.captures(&mut cache, "Harry James Potter", &mut caps);
+    /// assert!(caps.is_match());
+    /// let groups: Vec<Option<Span>> = caps.iter().collect();
+    /// assert_eq!(groups, vec![
+    ///     Some(Span::from(0..18)),
+    ///     Some(Span::from(0..5)),
+    ///     Some(Span::from(6..11)),
+    ///     Some(Span::from(12..18)),
+    /// ]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// This example uses the same regex as the previous example, but with a
+    /// haystack that omits the middle name. This results in a capturing group
+    /// that is present in the elements yielded by the iterator but without a
+    /// match:
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
+    ///
+    /// let re = PikeVM::new(
+    ///     // Matches first/last names, with an optional middle name.
+    ///     r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
+    /// )?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// re.captures(&mut cache, "Harry Potter", &mut caps);
+    /// assert!(caps.is_match());
+    /// let groups: Vec<Option<Span>> = caps.iter().collect();
+    /// assert_eq!(groups, vec![
+    ///     Some(Span::from(0..12)),
+    ///     Some(Span::from(0..5)),
+    ///     None,
+    ///     Some(Span::from(6..12)),
+    /// ]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn iter(&self) -> CapturesPatternIter<'_> {
+        let names = self
+            .pattern()
+            .map_or(GroupInfoPatternNames::empty().enumerate(), |pid| {
+                self.group_info().pattern_names(pid).enumerate()
+            });
+        CapturesPatternIter { caps: self, names }
+    }
+
+    /// Return the total number of capturing groups for the matching pattern.
+    ///
+    /// If this `Captures` value does not correspond to a match, then this
+    /// always returns `0`.
+    ///
+    /// This always returns the same number of elements yielded by
+    /// [`Captures::iter`]. That is, the number includes capturing groups even
+    /// if they don't participate in the match.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to count the total number of capturing groups
+    /// associated with a pattern. Notice that it includes groups that did not
+    /// participate in a match (just like `Captures::iter` does).
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
+    ///
+    /// let re = PikeVM::new(
+    ///     // Matches first/last names, with an optional middle name.
+    ///     r"^(?P<first>\pL+)\s+(?:(?P<middle>\pL+)\s+)?(?P<last>\pL+)$",
+    /// )?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// re.captures(&mut cache, "Harry Potter", &mut caps);
+    /// assert_eq!(4, caps.group_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn group_len(&self) -> usize {
+        let pid = match self.pattern() {
+            None => return 0,
+            Some(pid) => pid,
+        };
+        self.group_info().group_len(pid)
+    }
+
+    /// Returns a reference to the underlying group info on which these
+    /// captures are based.
+    ///
+    /// The difference between `GroupInfo` and `Captures` is that the former
+    /// defines the structure of capturing groups where as the latter is what
+    /// stores the actual match information. So where as `Captures` only gives
+    /// you access to the current match, `GroupInfo` lets you query any
+    /// information about all capturing groups, even ones for patterns that
+    /// weren't involved in a match.
+    ///
+    /// Note that a `GroupInfo` uses reference counting internally, so it may
+    /// be cloned cheaply.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to get all capturing group names from the
+    /// underlying `GroupInfo`. Notice that we don't even need to run a
+    /// search.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
+    ///
+    /// let re = PikeVM::new_many(&[
+    ///     r"(?P<foo>a)",
+    ///     r"(a)(b)",
+    ///     r"ab",
+    ///     r"(?P<bar>a)(?P<quux>a)",
+    ///     r"(?P<foo>z)",
+    /// ])?;
+    /// let caps = re.create_captures();
+    ///
+    /// let expected = vec![
+    ///     (PatternID::must(0), 0, None),
+    ///     (PatternID::must(0), 1, Some("foo")),
+    ///     (PatternID::must(1), 0, None),
+    ///     (PatternID::must(1), 1, None),
+    ///     (PatternID::must(1), 2, None),
+    ///     (PatternID::must(2), 0, None),
+    ///     (PatternID::must(3), 0, None),
+    ///     (PatternID::must(3), 1, Some("bar")),
+    ///     (PatternID::must(3), 2, Some("quux")),
+    ///     (PatternID::must(4), 0, None),
+    ///     (PatternID::must(4), 1, Some("foo")),
+    /// ];
+    /// // We could also just use 're.get_nfa().group_info()'.
+    /// let got: Vec<(PatternID, usize, Option<&str>)> =
+    ///     caps.group_info().all_names().collect();
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn group_info(&self) -> &GroupInfo {
+        &self.group_info
+    }
+
+    /// Interpolates the capture references in `replacement` with the
+    /// corresponding substrings in `haystack` matched by each reference. The
+    /// interpolated string is returned.
+    ///
+    /// See the [`interpolate` module](interpolate) for documentation on the
+    /// format of the replacement string.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to use interpolation, and also shows how it
+    /// can work with multi-pattern regexes.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
+    ///
+    /// let re = PikeVM::new_many(&[
+    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
+    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
+    /// ])?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = re.create_captures();
+    ///
+    /// let replacement = "year=$year, month=$month, day=$day";
+    ///
+    /// // This matches the first pattern.
+    /// let hay = "On 14-03-2010, I became a Tenneessee lamb.";
+    /// re.captures(&mut cache, hay, &mut caps);
+    /// let result = caps.interpolate_string(hay, replacement);
+    /// assert_eq!("year=2010, month=03, day=14", result);
+    ///
+    /// // And this matches the second pattern.
+    /// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
+    /// re.captures(&mut cache, hay, &mut caps);
+    /// let result = caps.interpolate_string(hay, replacement);
+    /// assert_eq!("year=2010, month=03, day=14", result);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn interpolate_string(
+        &self,
+        haystack: &str,
+        replacement: &str,
+    ) -> String {
+        let mut dst = String::new();
+        self.interpolate_string_into(haystack, replacement, &mut dst);
+        dst
+    }
+
+    /// Interpolates the capture references in `replacement` with the
+    /// corresponding substrings in `haystack` matched by each reference. The
+    /// interpolated string is written to `dst`.
+    ///
+    /// See the [`interpolate` module](interpolate) for documentation on the
+    /// format of the replacement string.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to use interpolation, and also shows how it
+    /// can work with multi-pattern regexes.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
+    ///
+    /// let re = PikeVM::new_many(&[
+    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
+    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
+    /// ])?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = re.create_captures();
+    ///
+    /// let replacement = "year=$year, month=$month, day=$day";
+    ///
+    /// // This matches the first pattern.
+    /// let hay = "On 14-03-2010, I became a Tenneessee lamb.";
+    /// re.captures(&mut cache, hay, &mut caps);
+    /// let mut dst = String::new();
+    /// caps.interpolate_string_into(hay, replacement, &mut dst);
+    /// assert_eq!("year=2010, month=03, day=14", dst);
+    ///
+    /// // And this matches the second pattern.
+    /// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
+    /// re.captures(&mut cache, hay, &mut caps);
+    /// let mut dst = String::new();
+    /// caps.interpolate_string_into(hay, replacement, &mut dst);
+    /// assert_eq!("year=2010, month=03, day=14", dst);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn interpolate_string_into(
+        &self,
+        haystack: &str,
+        replacement: &str,
+        dst: &mut String,
+    ) {
+        interpolate::string(
+            replacement,
+            |index, dst| {
+                let span = match self.get_group(index) {
+                    None => return,
+                    Some(span) => span,
+                };
+                dst.push_str(&haystack[span]);
+            },
+            |name| self.group_info().to_index(self.pattern()?, name),
+            dst,
+        );
+    }
+
+    /// Interpolates the capture references in `replacement` with the
+    /// corresponding substrings in `haystack` matched by each reference. The
+    /// interpolated byte string is returned.
+    ///
+    /// See the [`interpolate` module](interpolate) for documentation on the
+    /// format of the replacement string.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to use interpolation, and also shows how it
+    /// can work with multi-pattern regexes.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
+    ///
+    /// let re = PikeVM::new_many(&[
+    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
+    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
+    /// ])?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = re.create_captures();
+    ///
+    /// let replacement = b"year=$year, month=$month, day=$day";
+    ///
+    /// // This matches the first pattern.
+    /// let hay = b"On 14-03-2010, I became a Tenneessee lamb.";
+    /// re.captures(&mut cache, hay, &mut caps);
+    /// let result = caps.interpolate_bytes(hay, replacement);
+    /// assert_eq!(&b"year=2010, month=03, day=14"[..], result);
+    ///
+    /// // And this matches the second pattern.
+    /// let hay = b"On 2010-03-14, I became a Tenneessee lamb.";
+    /// re.captures(&mut cache, hay, &mut caps);
+    /// let result = caps.interpolate_bytes(hay, replacement);
+    /// assert_eq!(&b"year=2010, month=03, day=14"[..], result);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn interpolate_bytes(
+        &self,
+        haystack: &[u8],
+        replacement: &[u8],
+    ) -> Vec<u8> {
+        let mut dst = vec![];
+        self.interpolate_bytes_into(haystack, replacement, &mut dst);
+        dst
+    }
+
+    /// Interpolates the capture references in `replacement` with the
+    /// corresponding substrings in `haystack` matched by each reference. The
+    /// interpolated byte string is written to `dst`.
+    ///
+    /// See the [`interpolate` module](interpolate) for documentation on the
+    /// format of the replacement string.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to use interpolation, and also shows how it
+    /// can work with multi-pattern regexes.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID};
+    ///
+    /// let re = PikeVM::new_many(&[
+    ///     r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})",
+    ///     r"(?<year>[0-9]{4})-(?<month>[0-9]{2})-(?<day>[0-9]{2})",
+    /// ])?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = re.create_captures();
+    ///
+    /// let replacement = b"year=$year, month=$month, day=$day";
+    ///
+    /// // This matches the first pattern.
+    /// let hay = b"On 14-03-2010, I became a Tenneessee lamb.";
+    /// re.captures(&mut cache, hay, &mut caps);
+    /// let mut dst = vec![];
+    /// caps.interpolate_bytes_into(hay, replacement, &mut dst);
+    /// assert_eq!(&b"year=2010, month=03, day=14"[..], dst);
+    ///
+    /// // And this matches the second pattern.
+    /// let hay = b"On 2010-03-14, I became a Tenneessee lamb.";
+    /// re.captures(&mut cache, hay, &mut caps);
+    /// let mut dst = vec![];
+    /// caps.interpolate_bytes_into(hay, replacement, &mut dst);
+    /// assert_eq!(&b"year=2010, month=03, day=14"[..], dst);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn interpolate_bytes_into(
+        &self,
+        haystack: &[u8],
+        replacement: &[u8],
+        dst: &mut Vec<u8>,
+    ) {
+        interpolate::bytes(
+            replacement,
+            |index, dst| {
+                let span = match self.get_group(index) {
+                    None => return,
+                    Some(span) => span,
+                };
+                dst.extend_from_slice(&haystack[span]);
+            },
+            |name| self.group_info().to_index(self.pattern()?, name),
+            dst,
+        );
+    }
+
+    /// This is a convenience routine for extracting the substrings
+    /// corresponding to matching capture groups in the given `haystack`. The
+    /// `haystack` should be the same substring used to find the match spans in
+    /// this `Captures` value.
+    ///
+    /// This is identical to [`Captures::extract_bytes`], except it works with
+    /// `&str` instead of `&[u8]`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the number of explicit matching groups in this
+    /// `Captures` value is less than `N`. This also panics if this `Captures`
+    /// value does not correspond to a match.
+    ///
+    /// Note that this does *not* panic if the number of explicit matching
+    /// groups is bigger than `N`. In that case, only the first `N` matching
+    /// groups are extracted.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
+    ///
+    /// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = re.create_captures();
+    ///
+    /// let hay = "On 2010-03-14, I became a Tenneessee lamb.";
+    /// re.captures(&mut cache, hay, &mut caps);
+    /// assert!(caps.is_match());
+    /// let (full, [year, month, day]) = caps.extract(hay);
+    /// assert_eq!("2010-03-14", full);
+    /// assert_eq!("2010", year);
+    /// assert_eq!("03", month);
+    /// assert_eq!("14", day);
+    ///
+    /// // We can also ask for fewer than all capture groups.
+    /// let (full, [year]) = caps.extract(hay);
+    /// assert_eq!("2010-03-14", full);
+    /// assert_eq!("2010", year);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn extract<'h, const N: usize>(
+        &self,
+        haystack: &'h str,
+    ) -> (&'h str, [&'h str; N]) {
+        let mut matched = self.iter().flatten();
+        let whole_match = &haystack[matched.next().expect("a match")];
+        let group_matches = [0; N].map(|_| {
+            let sp = matched.next().expect("too few matching groups");
+            &haystack[sp]
+        });
+        (whole_match, group_matches)
+    }
+
+    /// This is a convenience routine for extracting the substrings
+    /// corresponding to matching capture groups in the given `haystack`. The
+    /// `haystack` should be the same substring used to find the match spans in
+    /// this `Captures` value.
+    ///
+    /// This is identical to [`Captures::extract`], except it works with
+    /// `&[u8]` instead of `&str`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the number of explicit matching groups in this
+    /// `Captures` value is less than `N`. This also panics if this `Captures`
+    /// value does not correspond to a match.
+    ///
+    /// Note that this does *not* panic if the number of explicit matching
+    /// groups is bigger than `N`. In that case, only the first `N` matching
+    /// groups are extracted.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
+    ///
+    /// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = re.create_captures();
+    ///
+    /// let hay = b"On 2010-03-14, I became a Tenneessee lamb.";
+    /// re.captures(&mut cache, hay, &mut caps);
+    /// assert!(caps.is_match());
+    /// let (full, [year, month, day]) = caps.extract_bytes(hay);
+    /// assert_eq!(b"2010-03-14", full);
+    /// assert_eq!(b"2010", year);
+    /// assert_eq!(b"03", month);
+    /// assert_eq!(b"14", day);
+    ///
+    /// // We can also ask for fewer than all capture groups.
+    /// let (full, [year]) = caps.extract_bytes(hay);
+    /// assert_eq!(b"2010-03-14", full);
+    /// assert_eq!(b"2010", year);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn extract_bytes<'h, const N: usize>(
+        &self,
+        haystack: &'h [u8],
+    ) -> (&'h [u8], [&'h [u8]; N]) {
+        let mut matched = self.iter().flatten();
+        let whole_match = &haystack[matched.next().expect("a match")];
+        let group_matches = [0; N].map(|_| {
+            let sp = matched.next().expect("too few matching groups");
+            &haystack[sp]
+        });
+        (whole_match, group_matches)
+    }
+}
+
+/// Lower level "slot" oriented APIs. One does not typically need to use these
+/// when executing a search. They are instead mostly intended for folks that
+/// are writing their own regex engine while reusing this `Captures` type.
+impl Captures {
+    /// Clear this `Captures` value.
+    ///
+    /// After clearing, all slots inside this `Captures` value will be set to
+    /// `None`. Similarly, any pattern ID that it was previously associated
+    /// with (for a match) is erased.
+    ///
+    /// It is not usually necessary to call this routine. Namely, a `Captures`
+    /// value only provides high level access to the capturing groups of the
+    /// pattern that matched, and only low level access to individual slots.
+    /// Thus, even if slots corresponding to groups that aren't associated
+    /// with the matching pattern are set, then it won't impact the higher
+    /// level APIs. Namely, higher level APIs like [`Captures::get_group`] will
+    /// return `None` if no pattern ID is present, even if there are spans set
+    /// in the underlying slots.
+    ///
+    /// Thus, to "clear" a `Captures` value of a match, it is usually only
+    /// necessary to call [`Captures::set_pattern`] with `None`.
+    ///
+    /// # Example
+    ///
+    /// This example shows what happens when a `Captures` value is cleared.
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
+    ///
+    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
+    /// assert!(caps.is_match());
+    /// let slots: Vec<Option<usize>> =
+    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
+    /// // Note that the following ordering is considered an API guarantee.
+    /// assert_eq!(slots, vec![
+    ///     Some(0),
+    ///     Some(17),
+    ///     Some(0),
+    ///     Some(5),
+    ///     Some(6),
+    ///     Some(17),
+    /// ]);
+    ///
+    /// // Now clear the slots. Everything is gone and it is no longer a match.
+    /// caps.clear();
+    /// assert!(!caps.is_match());
+    /// let slots: Vec<Option<usize>> =
+    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
+    /// assert_eq!(slots, vec![
+    ///     None,
+    ///     None,
+    ///     None,
+    ///     None,
+    ///     None,
+    ///     None,
+    /// ]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn clear(&mut self) {
+        self.pid = None;
+        for slot in self.slots.iter_mut() {
+            *slot = None;
+        }
+    }
+
+    /// Set the pattern on this `Captures` value.
+    ///
+    /// When the pattern ID is `None`, then this `Captures` value does not
+    /// correspond to a match (`is_match` will return `false`). Otherwise, it
+    /// corresponds to a match.
+    ///
+    /// This is useful in search implementations where you might want to
+    /// initially call `set_pattern(None)` in order to avoid the cost of
+    /// calling `clear()` if it turns out to not be necessary.
+    ///
+    /// # Example
+    ///
+    /// This example shows that `set_pattern` merely overwrites the pattern ID.
+    /// It does not actually change the underlying slot values.
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
+    ///
+    /// let re = PikeVM::new(r"^(?P<first>\pL+)\s+(?P<last>\pL+)$")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// re.captures(&mut cache, "Bruce Springsteen", &mut caps);
+    /// assert!(caps.is_match());
+    /// assert!(caps.pattern().is_some());
+    /// let slots: Vec<Option<usize>> =
+    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
+    /// // Note that the following ordering is considered an API guarantee.
+    /// assert_eq!(slots, vec![
+    ///     Some(0),
+    ///     Some(17),
+    ///     Some(0),
+    ///     Some(5),
+    ///     Some(6),
+    ///     Some(17),
+    /// ]);
+    ///
+    /// // Now set the pattern to None. Note that the slot values remain.
+    /// caps.set_pattern(None);
+    /// assert!(!caps.is_match());
+    /// assert!(!caps.pattern().is_some());
+    /// let slots: Vec<Option<usize>> =
+    ///     caps.slots().iter().map(|s| s.map(|x| x.get())).collect();
+    /// // Note that the following ordering is considered an API guarantee.
+    /// assert_eq!(slots, vec![
+    ///     Some(0),
+    ///     Some(17),
+    ///     Some(0),
+    ///     Some(5),
+    ///     Some(6),
+    ///     Some(17),
+    /// ]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn set_pattern(&mut self, pid: Option<PatternID>) {
+        self.pid = pid;
+    }
+
+    /// Returns the underlying slots, where each slot stores a single offset.
+    ///
+    /// Every matching capturing group generally corresponds to two slots: one
+    /// slot for the starting position and another for the ending position.
+    /// Typically, either both are present or neither are. (The weasel word
+    /// "typically" is used here because it really depends on the regex engine
+    /// implementation. Every sensible regex engine likely adheres to this
+    /// invariant, and every regex engine in this crate is sensible.)
+    ///
+    /// Generally speaking, callers should prefer to use higher level routines
+    /// like [`Captures::get_match`] or [`Captures::get_group`].
+    ///
+    /// An important note here is that a regex engine may not reset all of the
+    /// slots to `None` values when no match occurs, or even when a match of
+    /// a different pattern occurs. But this depends on how the regex engine
+    /// implementation deals with slots.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to get the underlying slots from a regex match.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     util::primitives::{PatternID, NonMaxUsize},
+    /// };
+    ///
+    /// let re = PikeVM::new_many(&[
+    ///     r"[a-z]+",
+    ///     r"[0-9]+",
+    /// ])?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// re.captures(&mut cache, "123", &mut caps);
+    /// assert_eq!(Some(PatternID::must(1)), caps.pattern());
+    /// // Note that the only guarantee we have here is that slots 2 and 3
+    /// // are set to correct values. The contents of the first two slots are
+    /// // unspecified since the 0th pattern did not match.
+    /// let expected = &[
+    ///     None,
+    ///     None,
+    ///     NonMaxUsize::new(0),
+    ///     NonMaxUsize::new(3),
+    /// ];
+    /// assert_eq!(expected, caps.slots());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn slots(&self) -> &[Option<NonMaxUsize>] {
+        &self.slots
+    }
+
+    /// Returns the underlying slots as a mutable slice, where each slot stores
+    /// a single offset.
+    ///
+    /// This tends to be most useful for regex engine implementations for
+    /// writing offsets for matching capturing groups to slots.
+    ///
+    /// See [`Captures::slots`] for more information about slots.
+    #[inline]
+    pub fn slots_mut(&mut self) -> &mut [Option<NonMaxUsize>] {
+        &mut self.slots
+    }
+}
+
+impl core::fmt::Debug for Captures {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let mut dstruct = f.debug_struct("Captures");
+        dstruct.field("pid", &self.pid);
+        if let Some(pid) = self.pid {
+            dstruct.field("spans", &CapturesDebugMap { pid, caps: self });
+        }
+        dstruct.finish()
+    }
+}
+
+/// A little helper type to provide a nice map-like debug representation for
+/// our capturing group spans.
+struct CapturesDebugMap<'a> {
+    pid: PatternID,
+    caps: &'a Captures,
+}
+
+impl<'a> core::fmt::Debug for CapturesDebugMap<'a> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        struct Key<'a>(usize, Option<&'a str>);
+
+        impl<'a> core::fmt::Debug for Key<'a> {
+            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                write!(f, "{}", self.0)?;
+                if let Some(name) = self.1 {
+                    write!(f, "/{:?}", name)?;
+                }
+                Ok(())
+            }
+        }
+
+        let mut map = f.debug_map();
+        let names = self.caps.group_info().pattern_names(self.pid);
+        for (group_index, maybe_name) in names.enumerate() {
+            let key = Key(group_index, maybe_name);
+            match self.caps.get_group(group_index) {
+                None => map.entry(&key, &None::<()>),
+                Some(span) => map.entry(&key, &span),
+            };
+        }
+        map.finish()
+    }
+}
+
+/// An iterator over all capturing groups in a `Captures` value.
+///
+/// This iterator includes capturing groups that did not participate in a
+/// match. See the [`Captures::iter`] method documentation for more details
+/// and examples.
+///
+/// The lifetime parameter `'a` refers to the lifetime of the underlying
+/// `Captures` value.
+#[derive(Clone, Debug)]
+pub struct CapturesPatternIter<'a> {
+    caps: &'a Captures,
+    names: core::iter::Enumerate<GroupInfoPatternNames<'a>>,
+}
+
+impl<'a> Iterator for CapturesPatternIter<'a> {
+    type Item = Option<Span>;
+
+    fn next(&mut self) -> Option<Option<Span>> {
+        let (group_index, _) = self.names.next()?;
+        Some(self.caps.get_group(group_index))
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.names.size_hint()
+    }
+
+    fn count(self) -> usize {
+        self.names.count()
+    }
+}
+
+impl<'a> ExactSizeIterator for CapturesPatternIter<'a> {}
+impl<'a> core::iter::FusedIterator for CapturesPatternIter<'a> {}
+
+/// Represents information about capturing groups in a compiled regex.
+///
+/// The information encapsulated by this type consists of the following. For
+/// each pattern:
+///
+/// * A map from every capture group name to its corresponding capture group
+/// index.
+/// * A map from every capture group index to its corresponding capture group
+/// name.
+/// * A map from capture group index to its corresponding slot index. A slot
+/// refers to one half of a capturing group. That is, a capture slot is either
+/// the start or end of a capturing group. A slot is usually the mechanism
+/// by which a regex engine records offsets for each capturing group during a
+/// search.
+///
+/// A `GroupInfo` uses reference counting internally and is thus cheap to
+/// clone.
+///
+/// # Mapping from capture groups to slots
+///
+/// One of the main responsibilities of a `GroupInfo` is to build a mapping
+/// from `(PatternID, u32)` (where the `u32` is a capture index) to something
+/// called a "slot." As mentioned above, a slot refers to one half of a
+/// capturing group. Both combined provide the start and end offsets of
+/// a capturing group that participated in a match.
+///
+/// **The mapping between group indices and slots is an API guarantee.** That
+/// is, the mapping won't change within a semver compatible release.
+///
+/// Slots exist primarily because this is a convenient mechanism by which
+/// regex engines report group offsets at search time. For example, the
+/// [`nfa::thompson::State::Capture`](crate::nfa::thompson::State::Capture)
+/// NFA state includes the slot index. When a regex engine transitions through
+/// this state, it will likely use the slot index to write the current haystack
+/// offset to some region of memory. When a match is found, those slots are
+/// then reported to the caller, typically via a convenient abstraction like a
+/// [`Captures`] value.
+///
+/// Because this crate provides first class support for multi-pattern regexes,
+/// and because of some performance related reasons, the mapping between
+/// capturing groups and slots is a little complex. However, in the case of a
+/// single pattern, the mapping can be described very simply: for all capture
+/// group indices `i`, its corresponding slots are at `i * 2` and `i * 2 + 1`.
+/// Notice that the pattern ID isn't involved at all here, because it only
+/// applies to a single-pattern regex, it is therefore always `0`.
+///
+/// In the multi-pattern case, the mapping is a bit more complicated. To talk
+/// about it, we must define what we mean by "implicit" vs "explicit"
+/// capturing groups:
+///
+/// * An **implicit** capturing group refers to the capturing group that is
+/// present for every pattern automatically, and corresponds to the overall
+/// match of a pattern. Every pattern has precisely one implicit capturing
+/// group. It is always unnamed and it always corresponds to the capture group
+/// index `0`.
+/// * An **explicit** capturing group refers to any capturing group that
+/// appears in the concrete syntax of the pattern. (Or, if an NFA was hand
+/// built without any concrete syntax, it refers to any capturing group with an
+/// index greater than `0`.)
+///
+/// Some examples:
+///
+/// * `\w+` has one implicit capturing group and zero explicit capturing
+/// groups.
+/// * `(\w+)` has one implicit group and one explicit group.
+/// * `foo(\d+)(?:\pL+)(\d+)` has one implicit group and two explicit groups.
+///
+/// Turning back to the slot mapping, we can now state it as follows:
+///
+/// * Given a pattern ID `pid`, the slots for its implicit group are always
+/// at `pid * 2` and `pid * 2 + 1`.
+/// * Given a pattern ID `0`, the slots for its explicit groups start
+/// at `group_info.pattern_len() * 2`.
+/// * Given a pattern ID `pid > 0`, the slots for its explicit groups start
+/// immediately following where the slots for the explicit groups of `pid - 1`
+/// end.
+///
+/// In particular, while there is a concrete formula one can use to determine
+/// where the slots for the implicit group of any pattern are, there is no
+/// general formula for determining where the slots for explicit capturing
+/// groups are. This is because each pattern can contain a different number
+/// of groups.
+///
+/// The intended way of getting the slots for a particular capturing group
+/// (whether implicit or explicit) is via the [`GroupInfo::slot`] or
+/// [`GroupInfo::slots`] method.
+///
+/// See below for a concrete example of how capturing groups get mapped to
+/// slots.
+///
+/// # Example
+///
+/// This example shows how to build a new `GroupInfo` and query it for
+/// information.
+///
+/// ```
+/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID};
+///
+/// let info = GroupInfo::new(vec![
+///     vec![None, Some("foo")],
+///     vec![None],
+///     vec![None, None, None, Some("bar"), None],
+///     vec![None, None, Some("foo")],
+/// ])?;
+/// // The number of patterns being tracked.
+/// assert_eq!(4, info.pattern_len());
+/// // We can query the number of groups for any pattern.
+/// assert_eq!(2, info.group_len(PatternID::must(0)));
+/// assert_eq!(1, info.group_len(PatternID::must(1)));
+/// assert_eq!(5, info.group_len(PatternID::must(2)));
+/// assert_eq!(3, info.group_len(PatternID::must(3)));
+/// // An invalid pattern always has zero groups.
+/// assert_eq!(0, info.group_len(PatternID::must(999)));
+/// // 2 slots per group
+/// assert_eq!(22, info.slot_len());
+///
+/// // We can map a group index for a particular pattern to its name, if
+/// // one exists.
+/// assert_eq!(Some("foo"), info.to_name(PatternID::must(3), 2));
+/// assert_eq!(None, info.to_name(PatternID::must(2), 4));
+/// // Or map a name to its group index.
+/// assert_eq!(Some(1), info.to_index(PatternID::must(0), "foo"));
+/// assert_eq!(Some(2), info.to_index(PatternID::must(3), "foo"));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// # Example: mapping from capture groups to slots
+///
+/// This example shows the specific mapping from capture group indices for
+/// each pattern to their corresponding slots. The slot values shown in this
+/// example are considered an API guarantee.
+///
+/// ```
+/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID};
+///
+/// let info = GroupInfo::new(vec![
+///     vec![None, Some("foo")],
+///     vec![None],
+///     vec![None, None, None, Some("bar"), None],
+///     vec![None, None, Some("foo")],
+/// ])?;
+///
+/// // We first show the slots for each pattern's implicit group.
+/// assert_eq!(Some((0, 1)), info.slots(PatternID::must(0), 0));
+/// assert_eq!(Some((2, 3)), info.slots(PatternID::must(1), 0));
+/// assert_eq!(Some((4, 5)), info.slots(PatternID::must(2), 0));
+/// assert_eq!(Some((6, 7)), info.slots(PatternID::must(3), 0));
+///
+/// // And now we show the slots for each pattern's explicit group.
+/// assert_eq!(Some((8, 9)), info.slots(PatternID::must(0), 1));
+/// assert_eq!(Some((10, 11)), info.slots(PatternID::must(2), 1));
+/// assert_eq!(Some((12, 13)), info.slots(PatternID::must(2), 2));
+/// assert_eq!(Some((14, 15)), info.slots(PatternID::must(2), 3));
+/// assert_eq!(Some((16, 17)), info.slots(PatternID::must(2), 4));
+/// assert_eq!(Some((18, 19)), info.slots(PatternID::must(3), 1));
+/// assert_eq!(Some((20, 21)), info.slots(PatternID::must(3), 2));
+///
+/// // Asking for the slots for an invalid pattern ID or even for an invalid
+/// // group index for a specific pattern will return None. So for example,
+/// // you're guaranteed to not get the slots for a different pattern than the
+/// // one requested.
+/// assert_eq!(None, info.slots(PatternID::must(5), 0));
+/// assert_eq!(None, info.slots(PatternID::must(1), 1));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Debug, Default)]
+pub struct GroupInfo(Arc<GroupInfoInner>);
+
+impl GroupInfo {
+    /// Creates a new group info from a sequence of patterns, where each
+    /// sequence of patterns yields a sequence of possible group names. The
+    /// index of each pattern in the sequence corresponds to its `PatternID`,
+    /// and the index of each group in each pattern's sequence corresponds to
+    /// its corresponding group index.
+    ///
+    /// While this constructor is very generic and therefore perhaps hard to
+    /// chew on, an example of a valid concrete type that can be passed to
+    /// this constructor is `Vec<Vec<Option<String>>>`. The outer `Vec`
+    /// corresponds to the patterns, i.e., one `Vec<Option<String>>` per
+    /// pattern. The inner `Vec` corresponds to the capturing groups for
+    /// each pattern. The `Option<String>` corresponds to the name of the
+    /// capturing group, if present.
+    ///
+    /// It is legal to pass an empty iterator to this constructor. It will
+    /// return an empty group info with zero slots. An empty group info is
+    /// useful for cases where you have no patterns or for cases where slots
+    /// aren't being used at all (e.g., for most DFAs in this crate).
+    ///
+    /// # Errors
+    ///
+    /// This constructor returns an error if the given capturing groups are
+    /// invalid in some way. Those reasons include, but are not necessarily
+    /// limited to:
+    ///
+    /// * Too many patterns (i.e., `PatternID` would overflow).
+    /// * Too many capturing groups (e.g., `u32` would overflow).
+    /// * A pattern is given that has no capturing groups. (All patterns must
+    /// have at least an implicit capturing group at index `0`.)
+    /// * The capturing group at index `0` has a name. It must be unnamed.
+    /// * There are duplicate capturing group names within the same pattern.
+    /// (Multiple capturing groups with the same name may exist, but they
+    /// must be in different patterns.)
+    ///
+    /// An example below shows how to trigger some of the above error
+    /// conditions.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to build a new `GroupInfo` and query it for
+    /// information.
+    ///
+    /// ```
+    /// use regex_automata::util::captures::GroupInfo;
+    ///
+    /// let info = GroupInfo::new(vec![
+    ///     vec![None, Some("foo")],
+    ///     vec![None],
+    ///     vec![None, None, None, Some("bar"), None],
+    ///     vec![None, None, Some("foo")],
+    /// ])?;
+    /// // The number of patterns being tracked.
+    /// assert_eq!(4, info.pattern_len());
+    /// // 2 slots per group
+    /// assert_eq!(22, info.slot_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: empty `GroupInfo`
+    ///
+    /// This example shows how to build a new `GroupInfo` and query it for
+    /// information.
+    ///
+    /// ```
+    /// use regex_automata::util::captures::GroupInfo;
+    ///
+    /// let info = GroupInfo::empty();
+    /// // Everything is zero.
+    /// assert_eq!(0, info.pattern_len());
+    /// assert_eq!(0, info.slot_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: error conditions
+    ///
+    /// This example shows how to provoke some of the ways in which building
+    /// a `GroupInfo` can fail.
+    ///
+    /// ```
+    /// use regex_automata::util::captures::GroupInfo;
+    ///
+    /// // Either the group info is empty, or all patterns must have at least
+    /// // one capturing group.
+    /// assert!(GroupInfo::new(vec![
+    ///     vec![None, Some("a")], // ok
+    ///     vec![None], // ok
+    ///     vec![], // not ok
+    /// ]).is_err());
+    /// // Note that building an empty group info is OK.
+    /// assert!(GroupInfo::new(Vec::<Vec<Option<String>>>::new()).is_ok());
+    ///
+    /// // The first group in each pattern must correspond to an implicit
+    /// // anonymous group. i.e., One that is not named. By convention, this
+    /// // group corresponds to the overall match of a regex. Every other group
+    /// // in a pattern is explicit and optional.
+    /// assert!(GroupInfo::new(vec![vec![Some("foo")]]).is_err());
+    ///
+    /// // There must not be duplicate group names within the same pattern.
+    /// assert!(GroupInfo::new(vec![
+    ///     vec![None, Some("foo"), Some("foo")],
+    /// ]).is_err());
+    /// // But duplicate names across distinct patterns is OK.
+    /// assert!(GroupInfo::new(vec![
+    ///     vec![None, Some("foo")],
+    ///     vec![None, Some("foo")],
+    /// ]).is_ok());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// There are other ways for building a `GroupInfo` to fail but are
+    /// difficult to show. For example, if the number of patterns given would
+    /// overflow `PatternID`.
+    pub fn new<P, G, N>(pattern_groups: P) -> Result<GroupInfo, GroupInfoError>
+    where
+        P: IntoIterator<Item = G>,
+        G: IntoIterator<Item = Option<N>>,
+        N: AsRef<str>,
+    {
+        let mut group_info = GroupInfoInner {
+            slot_ranges: vec![],
+            name_to_index: vec![],
+            index_to_name: vec![],
+            memory_extra: 0,
+        };
+        for (pattern_index, groups) in pattern_groups.into_iter().enumerate() {
+            // If we can't convert the pattern index to an ID, then the caller
+            // tried to build capture info for too many patterns.
+            let pid = PatternID::new(pattern_index)
+                .map_err(GroupInfoError::too_many_patterns)?;
+
+            let mut groups_iter = groups.into_iter().enumerate();
+            match groups_iter.next() {
+                None => return Err(GroupInfoError::missing_groups(pid)),
+                Some((_, Some(_))) => {
+                    return Err(GroupInfoError::first_must_be_unnamed(pid))
+                }
+                Some((_, None)) => {}
+            }
+            group_info.add_first_group(pid);
+            // Now iterate over the rest, which correspond to all of the
+            // (conventionally) explicit capture groups in a regex pattern.
+            for (group_index, maybe_name) in groups_iter {
+                // Just like for patterns, if the group index can't be
+                // converted to a "small" index, then the caller has given too
+                // many groups for a particular pattern.
+                let group = SmallIndex::new(group_index).map_err(|_| {
+                    GroupInfoError::too_many_groups(pid, group_index)
+                })?;
+                group_info.add_explicit_group(pid, group, maybe_name)?;
+            }
+        }
+        group_info.fixup_slot_ranges()?;
+        Ok(GroupInfo(Arc::new(group_info)))
+    }
+
+    /// This creates an empty `GroupInfo`.
+    ///
+    /// This is a convenience routine for calling `GroupInfo::new` with an
+    /// iterator that yields no elements.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to build a new empty `GroupInfo` and query it
+    /// for information.
+    ///
+    /// ```
+    /// use regex_automata::util::captures::GroupInfo;
+    ///
+    /// let info = GroupInfo::empty();
+    /// // Everything is zero.
+    /// assert_eq!(0, info.pattern_len());
+    /// assert_eq!(0, info.all_group_len());
+    /// assert_eq!(0, info.slot_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn empty() -> GroupInfo {
+        GroupInfo::new(core::iter::empty::<[Option<&str>; 0]>())
+            .expect("empty group info is always valid")
+    }
+
+    /// Return the capture group index corresponding to the given name in the
+    /// given pattern. If no such capture group name exists in the given
+    /// pattern, then this returns `None`.
+    ///
+    /// If the given pattern ID is invalid, then this returns `None`.
+    ///
+    /// This also returns `None` for all inputs if these captures are empty
+    /// (e.g., built from an empty [`GroupInfo`]). To check whether captures
+    /// are are present for a specific pattern, use [`GroupInfo::group_len`].
+    ///
+    /// # Example
+    ///
+    /// This example shows how to find the capture index for the given pattern
+    /// and group name.
+    ///
+    /// Remember that capture indices are relative to the pattern, such that
+    /// the same capture index value may refer to different capturing groups
+    /// for distinct patterns.
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{nfa::thompson::NFA, PatternID};
+    ///
+    /// let (pid0, pid1) = (PatternID::must(0), PatternID::must(1));
+    ///
+    /// let nfa = NFA::new_many(&[
+    ///     r"a(?P<quux>\w+)z(?P<foo>\s+)",
+    ///     r"a(?P<foo>\d+)z",
+    /// ])?;
+    /// let groups = nfa.group_info();
+    /// assert_eq!(Some(2), groups.to_index(pid0, "foo"));
+    /// // Recall that capture index 0 is always unnamed and refers to the
+    /// // entire pattern. So the first capturing group present in the pattern
+    /// // itself always starts at index 1.
+    /// assert_eq!(Some(1), groups.to_index(pid1, "foo"));
+    ///
+    /// // And if a name does not exist for a particular pattern, None is
+    /// // returned.
+    /// assert!(groups.to_index(pid0, "quux").is_some());
+    /// assert!(groups.to_index(pid1, "quux").is_none());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn to_index(&self, pid: PatternID, name: &str) -> Option<usize> {
+        let indices = self.0.name_to_index.get(pid.as_usize())?;
+        indices.get(name).cloned().map(|i| i.as_usize())
+    }
+
+    /// Return the capture name for the given index and given pattern. If the
+    /// corresponding group does not have a name, then this returns `None`.
+    ///
+    /// If the pattern ID is invalid, then this returns `None`.
+    ///
+    /// If the group index is invalid for the given pattern, then this returns
+    /// `None`. A group `index` is valid for a pattern `pid` in an `nfa` if and
+    /// only if `index < nfa.pattern_capture_len(pid)`.
+    ///
+    /// This also returns `None` for all inputs if these captures are empty
+    /// (e.g., built from an empty [`GroupInfo`]). To check whether captures
+    /// are are present for a specific pattern, use [`GroupInfo::group_len`].
+    ///
+    /// # Example
+    ///
+    /// This example shows how to find the capture group name for the given
+    /// pattern and group index.
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{nfa::thompson::NFA, PatternID};
+    ///
+    /// let (pid0, pid1) = (PatternID::must(0), PatternID::must(1));
+    ///
+    /// let nfa = NFA::new_many(&[
+    ///     r"a(?P<foo>\w+)z(\s+)x(\d+)",
+    ///     r"a(\d+)z(?P<foo>\s+)",
+    /// ])?;
+    /// let groups = nfa.group_info();
+    /// assert_eq!(None, groups.to_name(pid0, 0));
+    /// assert_eq!(Some("foo"), groups.to_name(pid0, 1));
+    /// assert_eq!(None, groups.to_name(pid0, 2));
+    /// assert_eq!(None, groups.to_name(pid0, 3));
+    ///
+    /// assert_eq!(None, groups.to_name(pid1, 0));
+    /// assert_eq!(None, groups.to_name(pid1, 1));
+    /// assert_eq!(Some("foo"), groups.to_name(pid1, 2));
+    /// // '3' is not a valid capture index for the second pattern.
+    /// assert_eq!(None, groups.to_name(pid1, 3));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn to_name(&self, pid: PatternID, group_index: usize) -> Option<&str> {
+        let pattern_names = self.0.index_to_name.get(pid.as_usize())?;
+        pattern_names.get(group_index)?.as_deref()
+    }
+
+    /// Return an iterator of all capture groups and their names (if present)
+    /// for a particular pattern.
+    ///
+    /// If the given pattern ID is invalid or if this `GroupInfo` is empty,
+    /// then the iterator yields no elements.
+    ///
+    /// The number of elements yielded by this iterator is always equal to
+    /// the result of calling [`GroupInfo::group_len`] with the same
+    /// `PatternID`.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to get a list of all capture group names for
+    /// a particular pattern.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::NFA, PatternID};
+    ///
+    /// let nfa = NFA::new(r"(a)(?P<foo>b)(c)(d)(?P<bar>e)")?;
+    /// // The first is the implicit group that is always unnammed. The next
+    /// // 5 groups are the explicit groups found in the concrete syntax above.
+    /// let expected = vec![None, None, Some("foo"), None, None, Some("bar")];
+    /// let got: Vec<Option<&str>> =
+    ///     nfa.group_info().pattern_names(PatternID::ZERO).collect();
+    /// assert_eq!(expected, got);
+    ///
+    /// // Using an invalid pattern ID will result in nothing yielded.
+    /// let got = nfa.group_info().pattern_names(PatternID::must(999)).count();
+    /// assert_eq!(0, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn pattern_names(&self, pid: PatternID) -> GroupInfoPatternNames<'_> {
+        GroupInfoPatternNames {
+            it: self
+                .0
+                .index_to_name
+                .get(pid.as_usize())
+                .map(|indices| indices.iter())
+                .unwrap_or([].iter()),
+        }
+    }
+
+    /// Return an iterator of all capture groups for all patterns supported by
+    /// this `GroupInfo`. Each item yielded is a triple of the group's pattern
+    /// ID, index in the pattern and the group's name, if present.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to get a list of all capture groups found in
+    /// one NFA, potentially spanning multiple patterns.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::NFA, PatternID};
+    ///
+    /// let nfa = NFA::new_many(&[
+    ///     r"(?P<foo>a)",
+    ///     r"a",
+    ///     r"(a)",
+    /// ])?;
+    /// let expected = vec![
+    ///     (PatternID::must(0), 0, None),
+    ///     (PatternID::must(0), 1, Some("foo")),
+    ///     (PatternID::must(1), 0, None),
+    ///     (PatternID::must(2), 0, None),
+    ///     (PatternID::must(2), 1, None),
+    /// ];
+    /// let got: Vec<(PatternID, usize, Option<&str>)> =
+    ///     nfa.group_info().all_names().collect();
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// Unlike other capturing group related routines, this routine doesn't
+    /// panic even if captures aren't enabled on this NFA:
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::{NFA, WhichCaptures};
+    ///
+    /// let nfa = NFA::compiler()
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
+    ///     .build_many(&[
+    ///         r"(?P<foo>a)",
+    ///         r"a",
+    ///         r"(a)",
+    ///     ])?;
+    /// // When captures aren't enabled, there's nothing to return.
+    /// assert_eq!(0, nfa.group_info().all_names().count());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn all_names(&self) -> GroupInfoAllNames<'_> {
+        GroupInfoAllNames {
+            group_info: self,
+            pids: PatternID::iter(self.pattern_len()),
+            current_pid: None,
+            names: None,
+        }
+    }
+
+    /// Returns the starting and ending slot corresponding to the given
+    /// capturing group for the given pattern. The ending slot is always one
+    /// more than the starting slot returned.
+    ///
+    /// Note that this is like [`GroupInfo::slot`], except that it also returns
+    /// the ending slot value for convenience.
+    ///
+    /// If either the pattern ID or the capture index is invalid, then this
+    /// returns None.
+    ///
+    /// # Example
+    ///
+    /// This example shows that the starting slots for the first capturing
+    /// group of each pattern are distinct.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::NFA, PatternID};
+    ///
+    /// let nfa = NFA::new_many(&["a", "b"])?;
+    /// assert_ne!(
+    ///     nfa.group_info().slots(PatternID::must(0), 0),
+    ///     nfa.group_info().slots(PatternID::must(1), 0),
+    /// );
+    ///
+    /// // Also, the start and end slot values are never equivalent.
+    /// let (start, end) = nfa.group_info().slots(PatternID::ZERO, 0).unwrap();
+    /// assert_ne!(start, end);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn slots(
+        &self,
+        pid: PatternID,
+        group_index: usize,
+    ) -> Option<(usize, usize)> {
+        // Since 'slot' only even returns valid starting slots, we know that
+        // there must also be an end slot and that end slot is always one more
+        // than the start slot.
+        self.slot(pid, group_index).map(|start| (start, start + 1))
+    }
+
+    /// Returns the starting slot corresponding to the given capturing group
+    /// for the given pattern. The ending slot is always one more than the
+    /// value returned.
+    ///
+    /// If either the pattern ID or the capture index is invalid, then this
+    /// returns None.
+    ///
+    /// # Example
+    ///
+    /// This example shows that the starting slots for the first capturing
+    /// group of each pattern are distinct.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::NFA, PatternID};
+    ///
+    /// let nfa = NFA::new_many(&["a", "b"])?;
+    /// assert_ne!(
+    ///     nfa.group_info().slot(PatternID::must(0), 0),
+    ///     nfa.group_info().slot(PatternID::must(1), 0),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn slot(&self, pid: PatternID, group_index: usize) -> Option<usize> {
+        if group_index >= self.group_len(pid) {
+            return None;
+        }
+        // At this point, we know that 'pid' refers to a real pattern and that
+        // 'group_index' refers to a real group. We therefore also know that
+        // the pattern and group can be combined to return a correct slot.
+        // That's why we don't need to use checked arithmetic below.
+        if group_index == 0 {
+            Some(pid.as_usize() * 2)
+        } else {
+            // As above, we don't need to check that our slot is less than the
+            // end of our range since we already know the group index is a
+            // valid index for the given pattern.
+            let (start, _) = self.0.slot_ranges[pid];
+            Some(start.as_usize() + ((group_index - 1) * 2))
+        }
+    }
+
+    /// Returns the total number of patterns in this `GroupInfo`.
+    ///
+    /// This may return zero if the `GroupInfo` was constructed with no
+    /// patterns.
+    ///
+    /// This is guaranteed to be no bigger than [`PatternID::LIMIT`] because
+    /// `GroupInfo` construction will fail if too many patterns are added.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::NFA;
+    ///
+    /// let nfa = NFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?;
+    /// assert_eq!(3, nfa.group_info().pattern_len());
+    ///
+    /// let nfa = NFA::never_match();
+    /// assert_eq!(0, nfa.group_info().pattern_len());
+    ///
+    /// let nfa = NFA::always_match();
+    /// assert_eq!(1, nfa.group_info().pattern_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn pattern_len(&self) -> usize {
+        self.0.pattern_len()
+    }
+
+    /// Return the number of capture groups in a pattern.
+    ///
+    /// If the pattern ID is invalid, then this returns `0`.
+    ///
+    /// # Example
+    ///
+    /// This example shows how the values returned by this routine may vary
+    /// for different patterns and NFA configurations.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID};
+    ///
+    /// let nfa = NFA::new(r"(a)(b)(c)")?;
+    /// // There are 3 explicit groups in the pattern's concrete syntax and
+    /// // 1 unnamed and implicit group spanning the entire pattern.
+    /// assert_eq!(4, nfa.group_info().group_len(PatternID::ZERO));
+    ///
+    /// let nfa = NFA::new(r"abc")?;
+    /// // There is just the unnamed implicit group.
+    /// assert_eq!(1, nfa.group_info().group_len(PatternID::ZERO));
+    ///
+    /// let nfa = NFA::compiler()
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
+    ///     .build(r"abc")?;
+    /// // We disabled capturing groups, so there are none.
+    /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO));
+    ///
+    /// let nfa = NFA::compiler()
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
+    ///     .build(r"(a)(b)(c)")?;
+    /// // We disabled capturing groups, so there are none, even if there are
+    /// // explicit groups in the concrete syntax.
+    /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn group_len(&self, pid: PatternID) -> usize {
+        self.0.group_len(pid)
+    }
+
+    /// Return the total number of capture groups across all patterns.
+    ///
+    /// This includes implicit groups that represent the entire match of a
+    /// pattern.
+    ///
+    /// # Example
+    ///
+    /// This example shows how the values returned by this routine may vary
+    /// for different patterns and NFA configurations.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID};
+    ///
+    /// let nfa = NFA::new(r"(a)(b)(c)")?;
+    /// // There are 3 explicit groups in the pattern's concrete syntax and
+    /// // 1 unnamed and implicit group spanning the entire pattern.
+    /// assert_eq!(4, nfa.group_info().all_group_len());
+    ///
+    /// let nfa = NFA::new(r"abc")?;
+    /// // There is just the unnamed implicit group.
+    /// assert_eq!(1, nfa.group_info().all_group_len());
+    ///
+    /// let nfa = NFA::new_many(&["(a)", "b", "(c)"])?;
+    /// // Each pattern has one implicit groups, and two
+    /// // patterns have one explicit group each.
+    /// assert_eq!(5, nfa.group_info().all_group_len());
+    ///
+    /// let nfa = NFA::compiler()
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
+    ///     .build(r"abc")?;
+    /// // We disabled capturing groups, so there are none.
+    /// assert_eq!(0, nfa.group_info().all_group_len());
+    ///
+    /// let nfa = NFA::compiler()
+    ///     .configure(NFA::config().which_captures(WhichCaptures::None))
+    ///     .build(r"(a)(b)(c)")?;
+    /// // We disabled capturing groups, so there are none, even if there are
+    /// // explicit groups in the concrete syntax.
+    /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn all_group_len(&self) -> usize {
+        self.slot_len() / 2
+    }
+
+    /// Returns the total number of slots in this `GroupInfo` across all
+    /// patterns.
+    ///
+    /// The total number of slots is always twice the total number of capturing
+    /// groups, including both implicit and explicit groups.
+    ///
+    /// # Example
+    ///
+    /// This example shows the relationship between the number of capturing
+    /// groups and slots.
+    ///
+    /// ```
+    /// use regex_automata::util::captures::GroupInfo;
+    ///
+    /// // There are 11 total groups here.
+    /// let info = GroupInfo::new(vec![
+    ///     vec![None, Some("foo")],
+    ///     vec![None],
+    ///     vec![None, None, None, Some("bar"), None],
+    ///     vec![None, None, Some("foo")],
+    /// ])?;
+    /// // 2 slots per group gives us 11*2=22 slots.
+    /// assert_eq!(22, info.slot_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn slot_len(&self) -> usize {
+        self.0.small_slot_len().as_usize()
+    }
+
+    /// Returns the total number of slots for implicit capturing groups.
+    ///
+    /// This is like [`GroupInfo::slot_len`], except it doesn't include the
+    /// explicit slots for each pattern. Since there are always exactly 2
+    /// implicit slots for each pattern, the number of implicit slots is always
+    /// equal to twice the number of patterns.
+    ///
+    /// # Example
+    ///
+    /// This example shows the relationship between the number of capturing
+    /// groups, implicit slots and explicit slots.
+    ///
+    /// ```
+    /// use regex_automata::util::captures::GroupInfo;
+    ///
+    /// // There are 11 total groups here.
+    /// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?;
+    /// // 2 slots per group gives us 11*2=22 slots.
+    /// assert_eq!(6, info.slot_len());
+    /// // 2 implicit slots per pattern gives us 2 implicit slots since there
+    /// // is 1 pattern.
+    /// assert_eq!(2, info.implicit_slot_len());
+    /// // 2 explicit capturing groups gives us 2*2=4 explicit slots.
+    /// assert_eq!(4, info.explicit_slot_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn implicit_slot_len(&self) -> usize {
+        self.pattern_len() * 2
+    }
+
+    /// Returns the total number of slots for explicit capturing groups.
+    ///
+    /// This is like [`GroupInfo::slot_len`], except it doesn't include the
+    /// implicit slots for each pattern. (There are always 2 implicit slots for
+    /// each pattern.)
+    ///
+    /// For a non-empty `GroupInfo`, it is always the case that `slot_len` is
+    /// strictly greater than `explicit_slot_len`. For an empty `GroupInfo`,
+    /// both the total number of slots and the number of explicit slots is
+    /// `0`.
+    ///
+    /// # Example
+    ///
+    /// This example shows the relationship between the number of capturing
+    /// groups, implicit slots and explicit slots.
+    ///
+    /// ```
+    /// use regex_automata::util::captures::GroupInfo;
+    ///
+    /// // There are 11 total groups here.
+    /// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?;
+    /// // 2 slots per group gives us 11*2=22 slots.
+    /// assert_eq!(6, info.slot_len());
+    /// // 2 implicit slots per pattern gives us 2 implicit slots since there
+    /// // is 1 pattern.
+    /// assert_eq!(2, info.implicit_slot_len());
+    /// // 2 explicit capturing groups gives us 2*2=4 explicit slots.
+    /// assert_eq!(4, info.explicit_slot_len());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn explicit_slot_len(&self) -> usize {
+        self.slot_len().saturating_sub(self.implicit_slot_len())
+    }
+
+    /// Returns the memory usage, in bytes, of this `GroupInfo`.
+    ///
+    /// This does **not** include the stack size used up by this `GroupInfo`.
+    /// To compute that, use `std::mem::size_of::<GroupInfo>()`.
+    #[inline]
+    pub fn memory_usage(&self) -> usize {
+        use core::mem::size_of as s;
+
+        s::<GroupInfoInner>()
+            + self.0.slot_ranges.len() * s::<(SmallIndex, SmallIndex)>()
+            + self.0.name_to_index.len() * s::<CaptureNameMap>()
+            + self.0.index_to_name.len() * s::<Vec<Option<Arc<str>>>>()
+            + self.0.memory_extra
+    }
+}
+
+/// A map from capture group name to its corresponding capture group index.
+///
+/// This type is actually wrapped inside a Vec indexed by pattern ID on a
+/// `GroupInfo`, since multiple patterns may have the same capture group name.
+/// That is, each pattern gets its own namespace of capture group names.
+///
+/// Perhaps a more memory efficient representation would be
+/// HashMap<(PatternID, Arc<str>), usize>, but this makes it difficult to look
+/// up a capture index by name without producing a `Arc<str>`, which requires
+/// an allocation. To fix this, I think we'd need to define our own unsized
+/// type or something? Anyway, I didn't give this much thought since it
+/// probably doesn't matter much in the grand scheme of things. But it did
+/// stand out to me as mildly wasteful.
+#[cfg(feature = "std")]
+type CaptureNameMap = std::collections::HashMap<Arc<str>, SmallIndex>;
+#[cfg(not(feature = "std"))]
+type CaptureNameMap = alloc::collections::BTreeMap<Arc<str>, SmallIndex>;
+
+/// The inner guts of `GroupInfo`. This type only exists so that it can
+/// be wrapped in an `Arc` to make `GroupInfo` reference counted.
+#[derive(Debug, Default)]
+struct GroupInfoInner {
+    slot_ranges: Vec<(SmallIndex, SmallIndex)>,
+    name_to_index: Vec<CaptureNameMap>,
+    index_to_name: Vec<Vec<Option<Arc<str>>>>,
+    memory_extra: usize,
+}
+
+impl GroupInfoInner {
+    /// This adds the first unnamed group for the given pattern ID. The given
+    /// pattern ID must be zero if this is the first time this method is
+    /// called, or must be exactly one more than the pattern ID supplied to the
+    /// previous call to this method. (This method panics if this rule is
+    /// violated.)
+    ///
+    /// This can be thought of as initializing the GroupInfo state for the
+    /// given pattern and closing off the state for any previous pattern.
+    fn add_first_group(&mut self, pid: PatternID) {
+        assert_eq!(pid.as_usize(), self.slot_ranges.len());
+        assert_eq!(pid.as_usize(), self.name_to_index.len());
+        assert_eq!(pid.as_usize(), self.index_to_name.len());
+        // This is the start of our slots for the explicit capturing groups.
+        // Note that since the slots for the 0th group for every pattern appear
+        // before any slots for the nth group (where n > 0) in any pattern, we
+        // will have to fix up the slot ranges once we know how many patterns
+        // we've added capture groups for.
+        let slot_start = self.small_slot_len();
+        self.slot_ranges.push((slot_start, slot_start));
+        self.name_to_index.push(CaptureNameMap::new());
+        self.index_to_name.push(vec![None]);
+        self.memory_extra += core::mem::size_of::<Option<Arc<str>>>();
+    }
+
+    /// Add an explicit capturing group for the given pattern with the given
+    /// index. If the group has a name, then that must be given as well.
+    ///
+    /// Note that every capturing group except for the first or zeroth group is
+    /// explicit.
+    ///
+    /// This returns an error if adding this group would result in overflowing
+    /// slot indices or if a capturing group with the same name for this
+    /// pattern has already been added.
+    fn add_explicit_group<N: AsRef<str>>(
+        &mut self,
+        pid: PatternID,
+        group: SmallIndex,
+        maybe_name: Option<N>,
+    ) -> Result<(), GroupInfoError> {
+        // We also need to check that the slot index generated for
+        // this group is also valid. Although, this is a little weird
+        // because we offset these indices below, at which point, we'll
+        // have to recheck them. Gosh this is annoying. Note that
+        // the '+2' below is OK because 'end' is guaranteed to be less
+        // than isize::MAX.
+        let end = &mut self.slot_ranges[pid].1;
+        *end = SmallIndex::new(end.as_usize() + 2).map_err(|_| {
+            GroupInfoError::too_many_groups(pid, group.as_usize())
+        })?;
+        if let Some(name) = maybe_name {
+            let name = Arc::<str>::from(name.as_ref());
+            if self.name_to_index[pid].contains_key(&*name) {
+                return Err(GroupInfoError::duplicate(pid, &name));
+            }
+            let len = name.len();
+            self.name_to_index[pid].insert(Arc::clone(&name), group);
+            self.index_to_name[pid].push(Some(name));
+            // Adds the memory used by the Arc<str> in both maps.
+            self.memory_extra +=
+                2 * (len + core::mem::size_of::<Option<Arc<str>>>());
+            // And also the value entry for the 'name_to_index' map.
+            // This is probably an underestimate for 'name_to_index' since
+            // hashmaps/btrees likely have some non-zero overhead, but we
+            // assume here that they have zero overhead.
+            self.memory_extra += core::mem::size_of::<SmallIndex>();
+        } else {
+            self.index_to_name[pid].push(None);
+            self.memory_extra += core::mem::size_of::<Option<Arc<str>>>();
+        }
+        // This is a sanity assert that checks that our group index
+        // is in line with the number of groups added so far for this
+        // pattern.
+        assert_eq!(group.one_more(), self.group_len(pid));
+        // And is also in line with the 'index_to_name' map.
+        assert_eq!(group.one_more(), self.index_to_name[pid].len());
+        Ok(())
+    }
+
+    /// This corrects the slot ranges to account for the slots corresponding
+    /// to the zeroth group of each pattern. That is, every slot range is
+    /// offset by 'pattern_len() * 2', since each pattern uses two slots to
+    /// represent the zeroth group.
+    fn fixup_slot_ranges(&mut self) -> Result<(), GroupInfoError> {
+        use crate::util::primitives::IteratorIndexExt;
+        // Since we know number of patterns fits in PatternID and
+        // PatternID::MAX < isize::MAX, it follows that multiplying by 2 will
+        // never overflow usize.
+        let offset = self.pattern_len().checked_mul(2).unwrap();
+        for (pid, &mut (ref mut start, ref mut end)) in
+            self.slot_ranges.iter_mut().with_pattern_ids()
+        {
+            let group_len = 1 + ((end.as_usize() - start.as_usize()) / 2);
+            let new_end = match end.as_usize().checked_add(offset) {
+                Some(new_end) => new_end,
+                None => {
+                    return Err(GroupInfoError::too_many_groups(
+                        pid, group_len,
+                    ))
+                }
+            };
+            *end = SmallIndex::new(new_end).map_err(|_| {
+                GroupInfoError::too_many_groups(pid, group_len)
+            })?;
+            // Since start <= end, if end is valid then start must be too.
+            *start = SmallIndex::new(start.as_usize() + offset).unwrap();
+        }
+        Ok(())
+    }
+
+    /// Return the total number of patterns represented by this capture slot
+    /// info.
+    fn pattern_len(&self) -> usize {
+        self.slot_ranges.len()
+    }
+
+    /// Return the total number of capturing groups for the given pattern. If
+    /// the given pattern isn't valid for this capture slot info, then 0 is
+    /// returned.
+    fn group_len(&self, pid: PatternID) -> usize {
+        let (start, end) = match self.slot_ranges.get(pid.as_usize()) {
+            None => return 0,
+            Some(range) => range,
+        };
+        // The difference between any two SmallIndex values always fits in a
+        // usize since we know that SmallIndex::MAX <= isize::MAX-1. We also
+        // know that start<=end by construction and that the number of groups
+        // never exceeds SmallIndex and thus never overflows usize.
+        1 + ((end.as_usize() - start.as_usize()) / 2)
+    }
+
+    /// Return the total number of slots in this capture slot info as a
+    /// "small index."
+    fn small_slot_len(&self) -> SmallIndex {
+        // Since slots are allocated in order of pattern (starting at 0) and
+        // then in order of capture group, it follows that the number of slots
+        // is the end of the range of slots for the last pattern. This is
+        // true even when the last pattern has no capturing groups, since
+        // 'slot_ranges' will still represent it explicitly with an empty
+        // range.
+        self.slot_ranges.last().map_or(SmallIndex::ZERO, |&(_, end)| end)
+    }
+}
+
+/// An error that may occur when building a `GroupInfo`.
+///
+/// Building a `GroupInfo` does a variety of checks to make sure the
+/// capturing groups satisfy a number of invariants. This includes, but is not
+/// limited to, ensuring that the first capturing group is unnamed and that
+/// there are no duplicate capture groups for a specific pattern.
+#[derive(Clone, Debug)]
+pub struct GroupInfoError {
+    kind: GroupInfoErrorKind,
+}
+
+/// The kind of error that occurs when building a `GroupInfo` fails.
+///
+/// We keep this un-exported because it's not clear how useful it is to
+/// export it.
+#[derive(Clone, Debug)]
+enum GroupInfoErrorKind {
+    /// This occurs when too many patterns have been added. i.e., It would
+    /// otherwise overflow a `PatternID`.
+    TooManyPatterns { err: PatternIDError },
+    /// This occurs when too many capturing groups have been added for a
+    /// particular pattern.
+    TooManyGroups {
+        /// The ID of the pattern that had too many groups.
+        pattern: PatternID,
+        /// The minimum number of groups that the caller has tried to add for
+        /// a pattern.
+        minimum: usize,
+    },
+    /// An error that occurs when a pattern has no capture groups. Either the
+    /// group info must be empty, or all patterns must have at least one group
+    /// (corresponding to the unnamed group for the entire pattern).
+    MissingGroups {
+        /// The ID of the pattern that had no capturing groups.
+        pattern: PatternID,
+    },
+    /// An error that occurs when one tries to provide a name for the capture
+    /// group at index 0. This capturing group must currently always be
+    /// unnamed.
+    FirstMustBeUnnamed {
+        /// The ID of the pattern that was found to have a named first
+        /// capturing group.
+        pattern: PatternID,
+    },
+    /// An error that occurs when duplicate capture group names for the same
+    /// pattern are added.
+    ///
+    /// NOTE: At time of writing, this error can never occur if you're using
+    /// regex-syntax, since the parser itself will reject patterns with
+    /// duplicate capture group names. This error can only occur when the
+    /// builder is used to hand construct NFAs.
+    Duplicate {
+        /// The pattern in which the duplicate capture group name was found.
+        pattern: PatternID,
+        /// The duplicate name.
+        name: String,
+    },
+}
+
+impl GroupInfoError {
+    fn too_many_patterns(err: PatternIDError) -> GroupInfoError {
+        GroupInfoError { kind: GroupInfoErrorKind::TooManyPatterns { err } }
+    }
+
+    fn too_many_groups(pattern: PatternID, minimum: usize) -> GroupInfoError {
+        GroupInfoError {
+            kind: GroupInfoErrorKind::TooManyGroups { pattern, minimum },
+        }
+    }
+
+    fn missing_groups(pattern: PatternID) -> GroupInfoError {
+        GroupInfoError { kind: GroupInfoErrorKind::MissingGroups { pattern } }
+    }
+
+    fn first_must_be_unnamed(pattern: PatternID) -> GroupInfoError {
+        GroupInfoError {
+            kind: GroupInfoErrorKind::FirstMustBeUnnamed { pattern },
+        }
+    }
+
+    fn duplicate(pattern: PatternID, name: &str) -> GroupInfoError {
+        GroupInfoError {
+            kind: GroupInfoErrorKind::Duplicate {
+                pattern,
+                name: String::from(name),
+            },
+        }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for GroupInfoError {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        match self.kind {
+            GroupInfoErrorKind::TooManyPatterns { .. }
+            | GroupInfoErrorKind::TooManyGroups { .. }
+            | GroupInfoErrorKind::MissingGroups { .. }
+            | GroupInfoErrorKind::FirstMustBeUnnamed { .. }
+            | GroupInfoErrorKind::Duplicate { .. } => None,
+        }
+    }
+}
+
+impl core::fmt::Display for GroupInfoError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        use self::GroupInfoErrorKind::*;
+
+        match self.kind {
+            TooManyPatterns { ref err } => {
+                write!(f, "too many patterns to build capture info: {}", err)
+            }
+            TooManyGroups { pattern, minimum } => {
+                write!(
+                    f,
+                    "too many capture groups (at least {}) were \
+                     found for pattern {}",
+                    minimum,
+                    pattern.as_usize()
+                )
+            }
+            MissingGroups { pattern } => write!(
+                f,
+                "no capturing groups found for pattern {} \
+                 (either all patterns have zero groups or all patterns have \
+                  at least one group)",
+                pattern.as_usize(),
+            ),
+            FirstMustBeUnnamed { pattern } => write!(
+                f,
+                "first capture group (at index 0) for pattern {} has a name \
+                 (it must be unnamed)",
+                pattern.as_usize(),
+            ),
+            Duplicate { pattern, ref name } => write!(
+                f,
+                "duplicate capture group name '{}' found for pattern {}",
+                name,
+                pattern.as_usize(),
+            ),
+        }
+    }
+}
+
+/// An iterator over capturing groups and their names for a specific pattern.
+///
+/// This iterator is created by [`GroupInfo::pattern_names`].
+///
+/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo`
+/// from which this iterator was created.
+#[derive(Clone, Debug)]
+pub struct GroupInfoPatternNames<'a> {
+    it: core::slice::Iter<'a, Option<Arc<str>>>,
+}
+
+impl GroupInfoPatternNames<'static> {
+    fn empty() -> GroupInfoPatternNames<'static> {
+        GroupInfoPatternNames { it: [].iter() }
+    }
+}
+
+impl<'a> Iterator for GroupInfoPatternNames<'a> {
+    type Item = Option<&'a str>;
+
+    fn next(&mut self) -> Option<Option<&'a str>> {
+        self.it.next().map(|x| x.as_deref())
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+
+    fn count(self) -> usize {
+        self.it.count()
+    }
+}
+
+impl<'a> ExactSizeIterator for GroupInfoPatternNames<'a> {}
+impl<'a> core::iter::FusedIterator for GroupInfoPatternNames<'a> {}
+
+/// An iterator over capturing groups and their names for a `GroupInfo`.
+///
+/// This iterator is created by [`GroupInfo::all_names`].
+///
+/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo`
+/// from which this iterator was created.
+#[derive(Debug)]
+pub struct GroupInfoAllNames<'a> {
+    group_info: &'a GroupInfo,
+    pids: PatternIDIter,
+    current_pid: Option<PatternID>,
+    names: Option<core::iter::Enumerate<GroupInfoPatternNames<'a>>>,
+}
+
+impl<'a> Iterator for GroupInfoAllNames<'a> {
+    type Item = (PatternID, usize, Option<&'a str>);
+
+    fn next(&mut self) -> Option<(PatternID, usize, Option<&'a str>)> {
+        // If the group info has no captures, then we never have anything
+        // to yield. We need to consider this case explicitly (at time of
+        // writing) because 'pattern_capture_names' will panic if captures
+        // aren't enabled.
+        if self.group_info.0.index_to_name.is_empty() {
+            return None;
+        }
+        if self.current_pid.is_none() {
+            self.current_pid = Some(self.pids.next()?);
+        }
+        let pid = self.current_pid.unwrap();
+        if self.names.is_none() {
+            self.names = Some(self.group_info.pattern_names(pid).enumerate());
+        }
+        let (group_index, name) = match self.names.as_mut().unwrap().next() {
+            Some((group_index, name)) => (group_index, name),
+            None => {
+                self.current_pid = None;
+                self.names = None;
+                return self.next();
+            }
+        };
+        Some((pid, group_index, name))
+    }
+}
diff --git a/vendor/regex-automata/src/util/determinize/mod.rs b/vendor/regex-automata/src/util/determinize/mod.rs
new file mode 100644
index 0000000..ba32991
--- /dev/null
+++ b/vendor/regex-automata/src/util/determinize/mod.rs
@@ -0,0 +1,682 @@
+/*!
+This module contains types and routines for implementing determinization.
+
+In this crate, there are at least two places where we implement
+determinization: fully ahead-of-time compiled DFAs in the `dfa` module and
+lazily compiled DFAs in the `hybrid` module. The stuff in this module
+corresponds to the things that are in common between these implementations.
+
+There are three broad things that our implementations of determinization have
+in common, as defined by this module:
+
+* The classification of start states. That is, whether we're dealing with
+word boundaries, line boundaries, etc., is all the same. This also includes
+the look-behind assertions that are satisfied by each starting state
+classification.
+* The representation of DFA states as sets of NFA states, including
+convenience types for building these DFA states that are amenable to reusing
+allocations.
+* Routines for the "classical" parts of determinization: computing the
+epsilon closure, tracking match states (with corresponding pattern IDs, since
+we support multi-pattern finite automata) and, of course, computing the
+transition function between states for units of input.
+
+I did consider a couple of alternatives to this particular form of code reuse:
+
+1. Don't do any code reuse. The problem here is that we *really* want both
+forms of determinization to do exactly identical things when it comes to
+their handling of NFA states. While our tests generally ensure this, the code
+is tricky and large enough where not reusing code is a pretty big bummer.
+
+2. Implement all of determinization once and make it generic over fully
+compiled DFAs and lazily compiled DFAs. While I didn't actually try this
+approach, my instinct is that it would be more complex than is needed here.
+And the interface required would be pretty hairy. Instead, I think splitting
+it into logical sub-components works better.
+*/
+
+use alloc::vec::Vec;
+
+pub(crate) use self::state::{
+    State, StateBuilderEmpty, StateBuilderMatches, StateBuilderNFA,
+};
+
+use crate::{
+    nfa::thompson,
+    util::{
+        alphabet,
+        look::{Look, LookSet},
+        primitives::StateID,
+        search::MatchKind,
+        sparse_set::{SparseSet, SparseSets},
+        start::Start,
+        utf8,
+    },
+};
+
+mod state;
+
+/// Compute the set of all reachable NFA states, including the full epsilon
+/// closure, from a DFA state for a single unit of input. The set of reachable
+/// states is returned as a `StateBuilderNFA`. The `StateBuilderNFA` returned
+/// also includes any look-behind assertions satisfied by `unit`, in addition
+/// to whether it is a match state. For multi-pattern DFAs, the builder will
+/// also include the pattern IDs that match (in the order seen).
+///
+/// `nfa` must be able to resolve any NFA state in `state` and any NFA state
+/// reachable via the epsilon closure of any NFA state in `state`. `sparses`
+/// must have capacity equivalent to `nfa.len()`.
+///
+/// `match_kind` should correspond to the match semantics implemented by the
+/// DFA being built. Generally speaking, for leftmost-first match semantics,
+/// states that appear after the first NFA match state will not be included in
+/// the `StateBuilderNFA` returned since they are impossible to visit.
+///
+/// `sparses` is used as scratch space for NFA traversal. Other than their
+/// capacity requirements (detailed above), there are no requirements on what's
+/// contained within them (if anything). Similarly, what's inside of them once
+/// this routine returns is unspecified.
+///
+/// `stack` must have length 0. It is used as scratch space for depth first
+/// traversal. After returning, it is guaranteed that `stack` will have length
+/// 0.
+///
+/// `state` corresponds to the current DFA state on which one wants to compute
+/// the transition for the input `unit`.
+///
+/// `empty_builder` corresponds to the builder allocation to use to produce a
+/// complete `StateBuilderNFA` state. If the state is not needed (or is already
+/// cached), then it can be cleared and reused without needing to create a new
+/// `State`. The `StateBuilderNFA` state returned is final and ready to be
+/// turned into a `State` if necessary.
+pub(crate) fn next(
+    nfa: &thompson::NFA,
+    match_kind: MatchKind,
+    sparses: &mut SparseSets,
+    stack: &mut Vec<StateID>,
+    state: &State,
+    unit: alphabet::Unit,
+    empty_builder: StateBuilderEmpty,
+) -> StateBuilderNFA {
+    sparses.clear();
+
+    // Whether the NFA is matched in reverse or not. We use this in some
+    // conditional logic for dealing with the exceptionally annoying CRLF-aware
+    // line anchors.
+    let rev = nfa.is_reverse();
+    // The look-around matcher that our NFA is configured with. We don't
+    // actually use it to match look-around assertions, but we do need its
+    // configuration for constructing states consistent with how it matches.
+    let lookm = nfa.look_matcher();
+
+    // Put the NFA state IDs into a sparse set in case we need to
+    // re-compute their epsilon closure.
+    //
+    // Doing this state shuffling is technically not necessary unless some
+    // kind of look-around is used in the DFA. Some ad hoc experiments
+    // suggested that avoiding this didn't lead to much of an improvement,
+    // but perhaps more rigorous experimentation should be done. And in
+    // particular, avoiding this check requires some light refactoring of
+    // the code below.
+    state.iter_nfa_state_ids(|nfa_id| {
+        sparses.set1.insert(nfa_id);
+    });
+
+    // Compute look-ahead assertions originating from the current state. Based
+    // on the input unit we're transitioning over, some additional set of
+    // assertions may be true. Thus, we re-compute this state's epsilon closure
+    // (but only if necessary). Notably, when we build a DFA state initially,
+    // we don't enable any look-ahead assertions because we don't know whether
+    // they're true or not at that point.
+    if !state.look_need().is_empty() {
+        // Add look-ahead assertions that are now true based on the current
+        // input unit.
+        let mut look_have = state.look_have().clone();
+        match unit.as_u8() {
+            Some(b'\r') => {
+                if !rev || !state.is_half_crlf() {
+                    look_have = look_have.insert(Look::EndCRLF);
+                }
+            }
+            Some(b'\n') => {
+                if rev || !state.is_half_crlf() {
+                    look_have = look_have.insert(Look::EndCRLF);
+                }
+            }
+            Some(_) => {}
+            None => {
+                look_have = look_have
+                    .insert(Look::End)
+                    .insert(Look::EndLF)
+                    .insert(Look::EndCRLF);
+            }
+        }
+        if unit.is_byte(lookm.get_line_terminator()) {
+            look_have = look_have.insert(Look::EndLF);
+        }
+        if state.is_half_crlf()
+            && ((rev && !unit.is_byte(b'\r'))
+                || (!rev && !unit.is_byte(b'\n')))
+        {
+            look_have = look_have.insert(Look::StartCRLF);
+        }
+        if state.is_from_word() == unit.is_word_byte() {
+            look_have = look_have
+                .insert(Look::WordAsciiNegate)
+                .insert(Look::WordUnicodeNegate);
+        } else {
+            look_have =
+                look_have.insert(Look::WordAscii).insert(Look::WordUnicode);
+        }
+        if !unit.is_word_byte() {
+            look_have = look_have
+                .insert(Look::WordEndHalfAscii)
+                .insert(Look::WordEndHalfUnicode);
+        }
+        if state.is_from_word() && !unit.is_word_byte() {
+            look_have = look_have
+                .insert(Look::WordEndAscii)
+                .insert(Look::WordEndUnicode);
+        } else if !state.is_from_word() && unit.is_word_byte() {
+            look_have = look_have
+                .insert(Look::WordStartAscii)
+                .insert(Look::WordStartUnicode);
+        }
+        // If we have new assertions satisfied that are among the set of
+        // assertions that exist in this state (that is, just because we added
+        // an EndLF assertion above doesn't mean there is an EndLF conditional
+        // epsilon transition in this state), then we re-compute this state's
+        // epsilon closure using the updated set of assertions.
+        //
+        // Note that since our DFA states omit unconditional epsilon
+        // transitions, this check is necessary for correctness. If we re-did
+        // the epsilon closure below needlessly, it could change based on the
+        // fact that we omitted epsilon states originally.
+        if !look_have
+            .subtract(state.look_have())
+            .intersect(state.look_need())
+            .is_empty()
+        {
+            for nfa_id in sparses.set1.iter() {
+                epsilon_closure(
+                    nfa,
+                    nfa_id,
+                    look_have,
+                    stack,
+                    &mut sparses.set2,
+                );
+            }
+            sparses.swap();
+            sparses.set2.clear();
+        }
+    }
+
+    // Convert our empty builder into one that can record assertions and match
+    // pattern IDs.
+    let mut builder = empty_builder.into_matches();
+    // Set whether the StartLF look-behind assertion is true for this
+    // transition or not. The look-behind assertion for ASCII word boundaries
+    // is handled below.
+    if nfa.look_set_any().contains_anchor_line()
+        && unit.is_byte(lookm.get_line_terminator())
+    {
+        // Why only handle StartLF here and not Start? That's because Start
+        // can only impact the starting state, which is special cased in
+        // start state handling.
+        builder.set_look_have(|have| have.insert(Look::StartLF));
+    }
+    // We also need to add StartCRLF to our assertions too, if we can. This
+    // is unfortunately a bit more complicated, because it depends on the
+    // direction of the search. In the forward direction, ^ matches after a
+    // \n, but in the reverse direction, ^ only matches after a \r. (This is
+    // further complicated by the fact that reverse a regex means changing a ^
+    // to a $ and vice versa.)
+    if nfa.look_set_any().contains_anchor_crlf()
+        && ((rev && unit.is_byte(b'\r')) || (!rev && unit.is_byte(b'\n')))
+    {
+        builder.set_look_have(|have| have.insert(Look::StartCRLF));
+    }
+    // And also for the start-half word boundary assertions. As long as the
+    // look-behind byte is not a word char, then the assertions are satisfied.
+    if nfa.look_set_any().contains_word() && !unit.is_word_byte() {
+        builder.set_look_have(|have| {
+            have.insert(Look::WordStartHalfAscii)
+                .insert(Look::WordStartHalfUnicode)
+        });
+    }
+    for nfa_id in sparses.set1.iter() {
+        match *nfa.state(nfa_id) {
+            thompson::State::Union { .. }
+            | thompson::State::BinaryUnion { .. }
+            | thompson::State::Fail
+            | thompson::State::Look { .. }
+            | thompson::State::Capture { .. } => {}
+            thompson::State::Match { pattern_id } => {
+                // Notice here that we are calling the NEW state a match
+                // state if the OLD state we are transitioning from
+                // contains an NFA match state. This is precisely how we
+                // delay all matches by one byte and also what therefore
+                // guarantees that starting states cannot be match states.
+                //
+                // If we didn't delay matches by one byte, then whether
+                // a DFA is a matching state or not would be determined
+                // by whether one of its own constituent NFA states
+                // was a match state. (And that would be done in
+                // 'add_nfa_states'.)
+                //
+                // Also, 'add_match_pattern_id' requires that callers never
+                // pass duplicative pattern IDs. We do in fact uphold that
+                // guarantee here, but it's subtle. In particular, a Thompson
+                // NFA guarantees that each pattern has exactly one match
+                // state. Moreover, since we're iterating over the NFA state
+                // IDs in a set, we are guarateed not to have any duplicative
+                // match states. Thus, it is impossible to add the same pattern
+                // ID more than once.
+                //
+                // N.B. We delay matches by 1 byte as a way to hack 1-byte
+                // look-around into DFA searches. This lets us support ^, $
+                // and ASCII-only \b. The delay is also why we need a special
+                // "end-of-input" (EOI) sentinel and why we need to follow the
+                // EOI sentinel at the end of every search. This final EOI
+                // transition is necessary to report matches found at the end
+                // of a haystack.
+                builder.add_match_pattern_id(pattern_id);
+                if !match_kind.continue_past_first_match() {
+                    break;
+                }
+            }
+            thompson::State::ByteRange { ref trans } => {
+                if trans.matches_unit(unit) {
+                    epsilon_closure(
+                        nfa,
+                        trans.next,
+                        builder.look_have(),
+                        stack,
+                        &mut sparses.set2,
+                    );
+                }
+            }
+            thompson::State::Sparse(ref sparse) => {
+                if let Some(next) = sparse.matches_unit(unit) {
+                    epsilon_closure(
+                        nfa,
+                        next,
+                        builder.look_have(),
+                        stack,
+                        &mut sparses.set2,
+                    );
+                }
+            }
+            thompson::State::Dense(ref dense) => {
+                if let Some(next) = dense.matches_unit(unit) {
+                    epsilon_closure(
+                        nfa,
+                        next,
+                        builder.look_have(),
+                        stack,
+                        &mut sparses.set2,
+                    );
+                }
+            }
+        }
+    }
+    // We only set the word byte if there's a word boundary look-around
+    // anywhere in this regex. Otherwise, there's no point in bloating the
+    // number of states if we don't have one.
+    //
+    // We also only set it when the state has a non-zero number of NFA states.
+    // Otherwise, we could wind up with states that *should* be DEAD states
+    // but are otherwise distinct from DEAD states because of this look-behind
+    // assertion being set. While this can't technically impact correctness *in
+    // theory*, it can create pathological DFAs that consume input until EOI or
+    // a quit byte is seen. Consuming until EOI isn't a correctness problem,
+    // but a (serious) perf problem. Hitting a quit byte, however, could be a
+    // correctness problem since it could cause search routines to report an
+    // error instead of a detected match once the quit state is entered. (The
+    // search routine could be made to be a bit smarter by reporting a match
+    // if one was detected once it enters a quit state (and indeed, the search
+    // routines in this crate do just that), but it seems better to prevent
+    // these things by construction if possible.)
+    if !sparses.set2.is_empty() {
+        if nfa.look_set_any().contains_word() && unit.is_word_byte() {
+            builder.set_is_from_word();
+        }
+        if nfa.look_set_any().contains_anchor_crlf()
+            && ((rev && unit.is_byte(b'\n')) || (!rev && unit.is_byte(b'\r')))
+        {
+            builder.set_is_half_crlf();
+        }
+    }
+    let mut builder_nfa = builder.into_nfa();
+    add_nfa_states(nfa, &sparses.set2, &mut builder_nfa);
+    builder_nfa
+}
+
+/// Compute the epsilon closure for the given NFA state. The epsilon closure
+/// consists of all NFA state IDs, including `start_nfa_id`, that can be
+/// reached from `start_nfa_id` without consuming any input. These state IDs
+/// are written to `set` in the order they are visited, but only if they are
+/// not already in `set`. `start_nfa_id` must be a valid state ID for the NFA
+/// given.
+///
+/// `look_have` consists of the satisfied assertions at the current
+/// position. For conditional look-around epsilon transitions, these are
+/// only followed if they are satisfied by `look_have`.
+///
+/// `stack` must have length 0. It is used as scratch space for depth first
+/// traversal. After returning, it is guaranteed that `stack` will have length
+/// 0.
+pub(crate) fn epsilon_closure(
+    nfa: &thompson::NFA,
+    start_nfa_id: StateID,
+    look_have: LookSet,
+    stack: &mut Vec<StateID>,
+    set: &mut SparseSet,
+) {
+    assert!(stack.is_empty());
+    // If this isn't an epsilon state, then the epsilon closure is always just
+    // itself, so there's no need to spin up the machinery below to handle it.
+    if !nfa.state(start_nfa_id).is_epsilon() {
+        set.insert(start_nfa_id);
+        return;
+    }
+
+    stack.push(start_nfa_id);
+    while let Some(mut id) = stack.pop() {
+        // In many cases, we can avoid stack operations when an NFA state only
+        // adds one new state to visit. In that case, we just set our ID to
+        // that state and mush on. We only use the stack when an NFA state
+        // introduces multiple new states to visit.
+        loop {
+            // Insert this NFA state, and if it's already in the set and thus
+            // already visited, then we can move on to the next one.
+            if !set.insert(id) {
+                break;
+            }
+            match *nfa.state(id) {
+                thompson::State::ByteRange { .. }
+                | thompson::State::Sparse { .. }
+                | thompson::State::Dense { .. }
+                | thompson::State::Fail
+                | thompson::State::Match { .. } => break,
+                thompson::State::Look { look, next } => {
+                    if !look_have.contains(look) {
+                        break;
+                    }
+                    id = next;
+                }
+                thompson::State::Union { ref alternates } => {
+                    id = match alternates.get(0) {
+                        None => break,
+                        Some(&id) => id,
+                    };
+                    // We need to process our alternates in order to preserve
+                    // match preferences, so put the earliest alternates closer
+                    // to the top of the stack.
+                    stack.extend(alternates[1..].iter().rev());
+                }
+                thompson::State::BinaryUnion { alt1, alt2 } => {
+                    id = alt1;
+                    stack.push(alt2);
+                }
+                thompson::State::Capture { next, .. } => {
+                    id = next;
+                }
+            }
+        }
+    }
+}
+
+/// Add the NFA state IDs in the given `set` to the given DFA builder state.
+/// The order in which states are added corresponds to the order in which they
+/// were added to `set`.
+///
+/// The DFA builder state given should already have its complete set of match
+/// pattern IDs added (if any) and any look-behind assertions (StartLF, Start
+/// and whether this state is being generated for a transition over a word byte
+/// when applicable) that are true immediately prior to transitioning into this
+/// state (via `builder.look_have()`). The match pattern IDs should correspond
+/// to matches that occurred on the previous transition, since all matches are
+/// delayed by one byte. The things that should _not_ be set are look-ahead
+/// assertions (EndLF, End and whether the next byte is a word byte or not).
+/// The builder state should also not have anything in `look_need` set, as this
+/// routine will compute that for you.
+///
+/// The given NFA should be able to resolve all identifiers in `set` to a
+/// particular NFA state. Additionally, `set` must have capacity equivalent
+/// to `nfa.len()`.
+pub(crate) fn add_nfa_states(
+    nfa: &thompson::NFA,
+    set: &SparseSet,
+    builder: &mut StateBuilderNFA,
+) {
+    for nfa_id in set.iter() {
+        match *nfa.state(nfa_id) {
+            thompson::State::ByteRange { .. } => {
+                builder.add_nfa_state_id(nfa_id);
+            }
+            thompson::State::Sparse { .. } => {
+                builder.add_nfa_state_id(nfa_id);
+            }
+            thompson::State::Dense { .. } => {
+                builder.add_nfa_state_id(nfa_id);
+            }
+            thompson::State::Look { look, .. } => {
+                builder.add_nfa_state_id(nfa_id);
+                builder.set_look_need(|need| need.insert(look));
+            }
+            thompson::State::Union { .. }
+            | thompson::State::BinaryUnion { .. } => {
+                // Pure epsilon transitions don't need to be tracked as part
+                // of the DFA state. Tracking them is actually superfluous;
+                // they won't cause any harm other than making determinization
+                // slower.
+                //
+                // Why aren't these needed? Well, in an NFA, epsilon
+                // transitions are really just jumping points to other states.
+                // So once you hit an epsilon transition, the same set of
+                // resulting states always appears. Therefore, putting them in
+                // a DFA's set of ordered NFA states is strictly redundant.
+                //
+                // Look-around states are also epsilon transitions, but
+                // they are *conditional*. So their presence could be
+                // discriminatory, and thus, they are tracked above.
+                //
+                // But wait... why are epsilon states in our `set` in the first
+                // place? Why not just leave them out? They're in our `set`
+                // because it was generated by computing an epsilon closure,
+                // and we want to keep track of all states we visited to avoid
+                // re-visiting them. In exchange, we have to do this second
+                // iteration over our collected states to finalize our DFA
+                // state. In theory, we could avoid this second iteration if
+                // we maintained two sets during epsilon closure: the set of
+                // visited states (to avoid cycles) and the set of states that
+                // will actually be used to construct the next DFA state.
+                //
+                // Note that this optimization requires that we re-compute the
+                // epsilon closure to account for look-ahead in 'next' *only
+                // when necessary*. Namely, only when the set of look-around
+                // assertions changes and only when those changes are within
+                // the set of assertions that are needed in order to step
+                // through the closure correctly. Otherwise, if we re-do the
+                // epsilon closure needlessly, it could change based on the
+                // fact that we are omitting epsilon states here.
+                //
+                // -----
+                //
+                // Welp, scratch the above. It turns out that recording these
+                // is in fact necessary to seemingly handle one particularly
+                // annoying case: when a conditional epsilon transition is
+                // put inside of a repetition operator. One specific case I
+                // ran into was the regex `(?:\b|%)+` on the haystack `z%`.
+                // The correct leftmost first matches are: [0, 0] and [1, 1].
+                // But the DFA was reporting [0, 0] and [1, 2]. To understand
+                // why this happens, consider the NFA for the aforementioned
+                // regex:
+                //
+                //     >000000: binary-union(4, 1)
+                //      000001: \x00-\xFF => 0
+                //      000002: WordAscii => 5
+                //      000003: % => 5
+                //     ^000004: binary-union(2, 3)
+                //      000005: binary-union(4, 6)
+                //      000006: MATCH(0)
+                //
+                // The problem here is that one of the DFA start states is
+                // going to consist of the NFA states [2, 3] by computing the
+                // epsilon closure of state 4. State 4 isn't included because
+                // we previously were not keeping track of union states. But
+                // only a subset of transitions out of this state will be able
+                // to follow WordAscii, and in those cases, the epsilon closure
+                // is redone. The only problem is that computing the epsilon
+                // closure from [2, 3] is different than computing the epsilon
+                // closure from [4]. In the former case, assuming the WordAscii
+                // assertion is satisfied, you get: [2, 3, 6]. In the latter
+                // case, you get: [2, 6, 3]. Notice that '6' is the match state
+                // and appears AFTER '3' in the former case. This leads to a
+                // preferential but incorrect match of '%' before returning
+                // a match. In the latter case, the match is preferred over
+                // continuing to accept the '%'.
+                //
+                // It almost feels like we might be able to fix the NFA states
+                // to avoid this, or to at least only keep track of union
+                // states where this actually matters, since in the vast
+                // majority of cases, this doesn't matter.
+                //
+                // Another alternative would be to define a new HIR property
+                // called "assertion is repeated anywhere" and compute it
+                // inductively over the entire pattern. If it happens anywhere,
+                // which is probably pretty rare, then we record union states.
+                // Otherwise we don't.
+                builder.add_nfa_state_id(nfa_id);
+            }
+            // Capture states we definitely do not need to record, since they
+            // are unconditional epsilon transitions with no branching.
+            thompson::State::Capture { .. } => {}
+            // It's not totally clear whether we need to record fail states or
+            // not, but we do so out of an abundance of caution. Since they are
+            // quite rare in practice, there isn't much cost to recording them.
+            thompson::State::Fail => {
+                builder.add_nfa_state_id(nfa_id);
+            }
+            thompson::State::Match { .. } => {
+                // Normally, the NFA match state doesn't actually need to
+                // be inside the DFA state. But since we delay matches by
+                // one byte, the matching DFA state corresponds to states
+                // that transition from the one we're building here. And
+                // the way we detect those cases is by looking for an NFA
+                // match state. See 'next' for how this is handled.
+                builder.add_nfa_state_id(nfa_id);
+            }
+        }
+    }
+    // If we know this state contains no look-around assertions, then
+    // there's no reason to track which look-around assertions were
+    // satisfied when this state was created.
+    if builder.look_need().is_empty() {
+        builder.set_look_have(|_| LookSet::empty());
+    }
+}
+
+/// Sets the appropriate look-behind assertions on the given state based on
+/// this starting configuration.
+pub(crate) fn set_lookbehind_from_start(
+    nfa: &thompson::NFA,
+    start: &Start,
+    builder: &mut StateBuilderMatches,
+) {
+    let rev = nfa.is_reverse();
+    let lineterm = nfa.look_matcher().get_line_terminator();
+    let lookset = nfa.look_set_any();
+    match *start {
+        Start::NonWordByte => {
+            if lookset.contains_word() {
+                builder.set_look_have(|have| {
+                    have.insert(Look::WordStartHalfAscii)
+                        .insert(Look::WordStartHalfUnicode)
+                });
+            }
+        }
+        Start::WordByte => {
+            if lookset.contains_word() {
+                builder.set_is_from_word();
+            }
+        }
+        Start::Text => {
+            if lookset.contains_anchor_haystack() {
+                builder.set_look_have(|have| have.insert(Look::Start));
+            }
+            if lookset.contains_anchor_line() {
+                builder.set_look_have(|have| {
+                    have.insert(Look::StartLF).insert(Look::StartCRLF)
+                });
+            }
+            if lookset.contains_word() {
+                builder.set_look_have(|have| {
+                    have.insert(Look::WordStartHalfAscii)
+                        .insert(Look::WordStartHalfUnicode)
+                });
+            }
+        }
+        Start::LineLF => {
+            if rev {
+                if lookset.contains_anchor_crlf() {
+                    builder.set_is_half_crlf();
+                }
+                if lookset.contains_anchor_line() {
+                    builder.set_look_have(|have| have.insert(Look::StartLF));
+                }
+            } else {
+                if lookset.contains_anchor_line() {
+                    builder.set_look_have(|have| have.insert(Look::StartCRLF));
+                }
+            }
+            if lookset.contains_anchor_line() && lineterm == b'\n' {
+                builder.set_look_have(|have| have.insert(Look::StartLF));
+            }
+            if lookset.contains_word() {
+                builder.set_look_have(|have| {
+                    have.insert(Look::WordStartHalfAscii)
+                        .insert(Look::WordStartHalfUnicode)
+                });
+            }
+        }
+        Start::LineCR => {
+            if lookset.contains_anchor_crlf() {
+                if rev {
+                    builder.set_look_have(|have| have.insert(Look::StartCRLF));
+                } else {
+                    builder.set_is_half_crlf();
+                }
+            }
+            if lookset.contains_anchor_line() && lineterm == b'\r' {
+                builder.set_look_have(|have| have.insert(Look::StartLF));
+            }
+            if lookset.contains_word() {
+                builder.set_look_have(|have| {
+                    have.insert(Look::WordStartHalfAscii)
+                        .insert(Look::WordStartHalfUnicode)
+                });
+            }
+        }
+        Start::CustomLineTerminator => {
+            if lookset.contains_anchor_line() {
+                builder.set_look_have(|have| have.insert(Look::StartLF));
+            }
+            // This is a bit of a tricky case, but if the line terminator was
+            // set to a word byte, then we also need to behave as if the start
+            // configuration is Start::WordByte. That is, we need to mark our
+            // state as having come from a word byte.
+            if lookset.contains_word() {
+                if utf8::is_word_byte(lineterm) {
+                    builder.set_is_from_word();
+                } else {
+                    builder.set_look_have(|have| {
+                        have.insert(Look::WordStartHalfAscii)
+                            .insert(Look::WordStartHalfUnicode)
+                    });
+                }
+            }
+        }
+    }
+}
diff --git a/vendor/regex-automata/src/util/determinize/state.rs b/vendor/regex-automata/src/util/determinize/state.rs
new file mode 100644
index 0000000..effa6f4
--- /dev/null
+++ b/vendor/regex-automata/src/util/determinize/state.rs
@@ -0,0 +1,907 @@
+/*!
+This module defines a DFA state representation and builders for constructing
+DFA states.
+
+This representation is specifically for use in implementations of NFA-to-DFA
+conversion via powerset construction. (Also called "determinization" in this
+crate.)
+
+The term "DFA state" is somewhat overloaded in this crate. In some cases, it
+refers to the set of transitions over an alphabet for a particular state. In
+other cases, it refers to a set of NFA states. The former is really about the
+final representation of a state in a DFA's transition table, where as the
+latter---what this module is focused on---is closer to an intermediate form
+that is used to help eventually build the transition table.
+
+This module exports four types. All four types represent the same idea: an
+ordered set of NFA states. This ordered set represents the epsilon closure of a
+particular NFA state, where the "epsilon closure" is the set of NFA states that
+can be transitioned to without consuming any input. i.e., Follow all of the NFA
+state's epsilon transitions. In addition, this implementation of DFA states
+cares about two other things: the ordered set of pattern IDs corresponding
+to the patterns that match if the state is a match state, and the set of
+look-behind assertions that were true when the state was created.
+
+The first, `State`, is a frozen representation of a state that cannot be
+modified. It may be cheaply cloned without copying the state itself and can be
+accessed safely from multiple threads simultaneously. This type is useful for
+when one knows that the DFA state being constructed is distinct from any other
+previously constructed states. Namely, powerset construction, in practice,
+requires one to keep a cache of previously created DFA states. Otherwise,
+the number of DFA states created in memory balloons to an impractically
+large number. For this reason, equivalent states should endeavor to have an
+equivalent byte-level representation. (In general, "equivalency" here means,
+"equivalent assertions, pattern IDs and NFA state IDs." We do not require that
+full DFA minimization be implemented here. This form of equivalency is only
+surface deep and is more-or-less a practical necessity.)
+
+The other three types represent different phases in the construction of a
+DFA state. Internally, these three types (and `State`) all use the same
+byte-oriented representation. That means one can use any of the builder types
+to check whether the state it represents already exists or not. If it does,
+then there is no need to freeze it into a `State` (which requires an alloc and
+a copy). Here are the three types described succinctly:
+
+* `StateBuilderEmpty` represents a state with no pattern IDs, no assertions
+and no NFA states. Creating a `StateBuilderEmpty` performs no allocs. A
+`StateBuilderEmpty` can only be used to query its underlying memory capacity,
+or to convert into a builder for recording pattern IDs and/or assertions.
+
+* `StateBuilderMatches` represents a state with zero or more pattern IDs, zero
+or more satisfied assertions and zero NFA state IDs. A `StateBuilderMatches`
+can only be used for adding pattern IDs and recording assertions.
+
+* `StateBuilderNFA` represents a state with zero or more pattern IDs, zero or
+more satisfied assertions and zero or more NFA state IDs. A `StateBuilderNFA`
+can only be used for adding NFA state IDs and recording some assertions.
+
+The expected flow here is to use the above builders to construct a candidate
+DFA state to check if it already exists. If it does, then there's no need to
+freeze it into a `State`. It it doesn't exist, then `StateBuilderNFA::to_state`
+can be called to freeze the builder into an immutable `State`. In either
+case, `clear` should be called on the builder to turn it back into a
+`StateBuilderEmpty` that reuses the underlying memory.
+
+The main purpose for splitting the builder into these distinct types is to
+make it impossible to do things like adding a pattern ID after adding an NFA
+state ID. Namely, this makes it simpler to use a space-and-time efficient
+binary representation for the state. (The format is documented on the `Repr`
+type below.) If we just used one type for everything, it would be possible for
+callers to use an incorrect interleaving of calls and thus result in a corrupt
+representation. I chose to use more type machinery to make this impossible to
+do because 1) determinization is itself pretty complex and it wouldn't be too
+hard to foul this up and 2) there isn't too much machinery involved and it's
+well contained.
+
+As an optimization, sometimes states won't have certain things set. For
+example, if the underlying NFA has no word boundary assertions, then there is
+no reason to set a state's look-behind assertion as to whether it was generated
+from a word byte or not. Similarly, if a state has no NFA states corresponding
+to look-around assertions, then there is no reason to set `look_have` to a
+non-empty set. Finally, callers usually omit unconditional epsilon transitions
+when adding NFA state IDs since they aren't discriminatory.
+
+Finally, the binary representation used by these states is, thankfully, not
+serialized anywhere. So any kind of change can be made with reckless abandon,
+as long as everything in this module agrees.
+*/
+
+use core::{convert::TryFrom, mem};
+
+use alloc::{sync::Arc, vec::Vec};
+
+use crate::util::{
+    int::{I32, U32},
+    look::LookSet,
+    primitives::{PatternID, StateID},
+    wire::{self, Endian},
+};
+
+/// A DFA state that, at its core, is represented by an ordered set of NFA
+/// states.
+///
+/// This type is intended to be used only in NFA-to-DFA conversion via powerset
+/// construction.
+///
+/// It may be cheaply cloned and accessed safely from multiple threads
+/// simultaneously.
+#[derive(Clone, Eq, Hash, PartialEq, PartialOrd, Ord)]
+pub(crate) struct State(Arc<[u8]>);
+
+/// This Borrow impl permits us to lookup any state in a map by its byte
+/// representation. This is particularly convenient when one has a StateBuilder
+/// and we want to see if a correspondingly equivalent state already exists. If
+/// one does exist, then we can reuse the allocation required by StateBuilder
+/// without having to convert it into a State first.
+impl core::borrow::Borrow<[u8]> for State {
+    fn borrow(&self) -> &[u8] {
+        &*self.0
+    }
+}
+
+impl core::fmt::Debug for State {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_tuple("State").field(&self.repr()).finish()
+    }
+}
+
+/// For docs on these routines, see the internal Repr and ReprVec types below.
+impl State {
+    pub(crate) fn dead() -> State {
+        StateBuilderEmpty::new().into_matches().into_nfa().to_state()
+    }
+
+    pub(crate) fn is_match(&self) -> bool {
+        self.repr().is_match()
+    }
+
+    pub(crate) fn is_from_word(&self) -> bool {
+        self.repr().is_from_word()
+    }
+
+    pub(crate) fn is_half_crlf(&self) -> bool {
+        self.repr().is_half_crlf()
+    }
+
+    pub(crate) fn look_have(&self) -> LookSet {
+        self.repr().look_have()
+    }
+
+    pub(crate) fn look_need(&self) -> LookSet {
+        self.repr().look_need()
+    }
+
+    pub(crate) fn match_len(&self) -> usize {
+        self.repr().match_len()
+    }
+
+    pub(crate) fn match_pattern(&self, index: usize) -> PatternID {
+        self.repr().match_pattern(index)
+    }
+
+    pub(crate) fn match_pattern_ids(&self) -> Option<Vec<PatternID>> {
+        self.repr().match_pattern_ids()
+    }
+
+    #[cfg(all(test, not(miri)))]
+    pub(crate) fn iter_match_pattern_ids<F: FnMut(PatternID)>(&self, f: F) {
+        self.repr().iter_match_pattern_ids(f)
+    }
+
+    pub(crate) fn iter_nfa_state_ids<F: FnMut(StateID)>(&self, f: F) {
+        self.repr().iter_nfa_state_ids(f)
+    }
+
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.0.len()
+    }
+
+    fn repr(&self) -> Repr<'_> {
+        Repr(&*self.0)
+    }
+}
+
+/// A state builder that represents an empty state.
+///
+/// This is a useful "initial condition" for state construction. It has no
+/// NFA state IDs, no assertions set and no pattern IDs. No allocations are
+/// made when new() is called. Its main use is for being converted into a
+/// builder that can capture assertions and pattern IDs.
+#[derive(Clone, Debug)]
+pub(crate) struct StateBuilderEmpty(Vec<u8>);
+
+/// For docs on these routines, see the internal Repr and ReprVec types below.
+impl StateBuilderEmpty {
+    pub(crate) fn new() -> StateBuilderEmpty {
+        StateBuilderEmpty(alloc::vec![])
+    }
+
+    pub(crate) fn into_matches(mut self) -> StateBuilderMatches {
+        self.0.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0]);
+        StateBuilderMatches(self.0)
+    }
+
+    fn clear(&mut self) {
+        self.0.clear();
+    }
+
+    pub(crate) fn capacity(&self) -> usize {
+        self.0.capacity()
+    }
+}
+
+/// A state builder that collects assertions and pattern IDs.
+///
+/// When collecting pattern IDs is finished, this can be converted into a
+/// builder that collects NFA state IDs.
+#[derive(Clone)]
+pub(crate) struct StateBuilderMatches(Vec<u8>);
+
+impl core::fmt::Debug for StateBuilderMatches {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_tuple("StateBuilderMatches").field(&self.repr()).finish()
+    }
+}
+
+/// For docs on these routines, see the internal Repr and ReprVec types below.
+impl StateBuilderMatches {
+    pub(crate) fn into_nfa(mut self) -> StateBuilderNFA {
+        self.repr_vec().close_match_pattern_ids();
+        StateBuilderNFA { repr: self.0, prev_nfa_state_id: StateID::ZERO }
+    }
+
+    pub(crate) fn set_is_from_word(&mut self) {
+        self.repr_vec().set_is_from_word()
+    }
+
+    pub(crate) fn set_is_half_crlf(&mut self) {
+        self.repr_vec().set_is_half_crlf()
+    }
+
+    pub(crate) fn look_have(&self) -> LookSet {
+        LookSet::read_repr(&self.0[1..])
+    }
+
+    pub(crate) fn set_look_have(
+        &mut self,
+        set: impl FnMut(LookSet) -> LookSet,
+    ) {
+        self.repr_vec().set_look_have(set)
+    }
+
+    pub(crate) fn add_match_pattern_id(&mut self, pid: PatternID) {
+        self.repr_vec().add_match_pattern_id(pid)
+    }
+
+    fn repr(&self) -> Repr<'_> {
+        Repr(&self.0)
+    }
+
+    fn repr_vec(&mut self) -> ReprVec<'_> {
+        ReprVec(&mut self.0)
+    }
+}
+
+/// A state builder that collects some assertions and NFA state IDs.
+///
+/// When collecting NFA state IDs is finished, this can be used to build a
+/// `State` if necessary.
+///
+/// When dont with building a state (regardless of whether it got kept or not),
+/// it's usually a good idea to call `clear` to get an empty builder back so
+/// that it can be reused to build the next state.
+#[derive(Clone)]
+pub(crate) struct StateBuilderNFA {
+    repr: Vec<u8>,
+    prev_nfa_state_id: StateID,
+}
+
+impl core::fmt::Debug for StateBuilderNFA {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_tuple("StateBuilderNFA").field(&self.repr()).finish()
+    }
+}
+
+/// For docs on these routines, see the internal Repr and ReprVec types below.
+impl StateBuilderNFA {
+    pub(crate) fn to_state(&self) -> State {
+        State(Arc::from(&*self.repr))
+    }
+
+    pub(crate) fn clear(self) -> StateBuilderEmpty {
+        let mut builder = StateBuilderEmpty(self.repr);
+        builder.clear();
+        builder
+    }
+
+    pub(crate) fn look_need(&self) -> LookSet {
+        self.repr().look_need()
+    }
+
+    pub(crate) fn set_look_have(
+        &mut self,
+        set: impl FnMut(LookSet) -> LookSet,
+    ) {
+        self.repr_vec().set_look_have(set)
+    }
+
+    pub(crate) fn set_look_need(
+        &mut self,
+        set: impl FnMut(LookSet) -> LookSet,
+    ) {
+        self.repr_vec().set_look_need(set)
+    }
+
+    pub(crate) fn add_nfa_state_id(&mut self, sid: StateID) {
+        ReprVec(&mut self.repr)
+            .add_nfa_state_id(&mut self.prev_nfa_state_id, sid)
+    }
+
+    pub(crate) fn as_bytes(&self) -> &[u8] {
+        &self.repr
+    }
+
+    fn repr(&self) -> Repr<'_> {
+        Repr(&self.repr)
+    }
+
+    fn repr_vec(&mut self) -> ReprVec<'_> {
+        ReprVec(&mut self.repr)
+    }
+}
+
+/// Repr is a read-only view into the representation of a DFA state.
+///
+/// Primarily, a Repr is how we achieve DRY: we implement decoding the format
+/// in one place, and then use a Repr to implement the various methods on the
+/// public state types.
+///
+/// The format is as follows:
+///
+/// The first three bytes correspond to bitsets.
+///
+/// Byte 0 is a bitset corresponding to miscellaneous flags associated with the
+/// state. Bit 0 is set to 1 if the state is a match state. Bit 1 is set to 1
+/// if the state has pattern IDs explicitly written to it. (This is a flag that
+/// is not meant to be set by determinization, but rather, is used as part of
+/// an internal space-saving optimization.) Bit 2 is set to 1 if the state was
+/// generated by a transition over a "word" byte. (Callers may not always set
+/// this. For example, if the NFA has no word boundary assertion, then needing
+/// to track whether a state came from a word byte or not is superfluous and
+/// wasteful.) Bit 3 is set to 1 if the state was generated by a transition
+/// from a `\r` (forward search) or a `\n` (reverse search) when CRLF mode is
+/// enabled.
+///
+/// Bytes 1..5 correspond to the look-behind assertions that were satisfied
+/// by the transition that created this state. (Look-ahead assertions are not
+/// tracked as part of states. Instead, these are applied by re-computing the
+/// epsilon closure of a state when computing the transition function. See
+/// `next` in the parent module.)
+///
+/// Bytes 5..9 correspond to the set of look-around assertions (including both
+/// look-behind and look-ahead) that appear somewhere in this state's set of
+/// NFA state IDs. This is used to determine whether this state's epsilon
+/// closure should be re-computed when computing the transition function.
+/// Namely, look-around assertions are "just" conditional epsilon transitions,
+/// so if there are new assertions available when computing the transition
+/// function, we should only re-compute the epsilon closure if those new
+/// assertions are relevant to this particular state.
+///
+/// Bytes 9..13 correspond to a 32-bit native-endian encoded integer
+/// corresponding to the number of patterns encoded in this state. If the state
+/// is not a match state (byte 0 bit 0 is 0) or if it's only pattern ID is
+/// PatternID::ZERO, then no integer is encoded at this position. Instead, byte
+/// offset 3 is the position at which the first NFA state ID is encoded.
+///
+/// For a match state with at least one non-ZERO pattern ID, the next bytes
+/// correspond to a sequence of 32-bit native endian encoded integers that
+/// represent each pattern ID, in order, that this match state represents.
+///
+/// After the pattern IDs (if any), NFA state IDs are delta encoded as
+/// varints.[1] The first NFA state ID is encoded as itself, and each
+/// subsequent NFA state ID is encoded as the difference between itself and the
+/// previous NFA state ID.
+///
+/// [1] - https://developers.google.com/protocol-buffers/docs/encoding#varints
+struct Repr<'a>(&'a [u8]);
+
+impl<'a> Repr<'a> {
+    /// Returns true if and only if this is a match state.
+    ///
+    /// If callers have added pattern IDs to this state, then callers MUST set
+    /// this state as a match state explicitly. However, as a special case,
+    /// states that are marked as match states but with no pattern IDs, then
+    /// the state is treated as if it had a single pattern ID equivalent to
+    /// PatternID::ZERO.
+    fn is_match(&self) -> bool {
+        self.0[0] & (1 << 0) > 0
+    }
+
+    /// Returns true if and only if this state has had at least one pattern
+    /// ID added to it.
+    ///
+    /// This is an internal-only flag that permits the representation to save
+    /// space in the common case of an NFA with one pattern in it. In that
+    /// case, a match state can only ever have exactly one pattern ID:
+    /// PatternID::ZERO. So there's no need to represent it.
+    fn has_pattern_ids(&self) -> bool {
+        self.0[0] & (1 << 1) > 0
+    }
+
+    /// Returns true if and only if this state is marked as having been created
+    /// from a transition over a word byte. This is useful for checking whether
+    /// a word boundary assertion is true or not, which requires look-behind
+    /// (whether the current state came from a word byte or not) and look-ahead
+    /// (whether the transition byte is a word byte or not).
+    ///
+    /// Since states with this set are distinct from states that don't have
+    /// this set (even if they are otherwise equivalent), callers should not
+    /// set this assertion unless the underlying NFA has at least one word
+    /// boundary assertion somewhere. Otherwise, a superfluous number of states
+    /// may be created.
+    fn is_from_word(&self) -> bool {
+        self.0[0] & (1 << 2) > 0
+    }
+
+    /// Returns true if and only if this state is marked as being inside of a
+    /// CRLF terminator. In the forward direction, this means the state was
+    /// created after seeing a `\r`. In the reverse direction, this means the
+    /// state was created after seeing a `\n`.
+    fn is_half_crlf(&self) -> bool {
+        self.0[0] & (1 << 3) > 0
+    }
+
+    /// The set of look-behind assertions that were true in the transition that
+    /// created this state.
+    ///
+    /// Generally, this should be empty if 'look_need' is empty, since there is
+    /// no reason to track which look-behind assertions are true if the state
+    /// has no conditional epsilon transitions.
+    ///
+    /// Satisfied look-ahead assertions are not tracked in states. Instead,
+    /// these are re-computed on demand via epsilon closure when computing the
+    /// transition function.
+    fn look_have(&self) -> LookSet {
+        LookSet::read_repr(&self.0[1..])
+    }
+
+    /// The set of look-around (both behind and ahead) assertions that appear
+    /// at least once in this state's set of NFA states.
+    ///
+    /// This is used to determine whether the epsilon closure needs to be
+    /// re-computed when computing the transition function. Namely, if the
+    /// state has no conditional epsilon transitions, then there is no need
+    /// to re-compute the epsilon closure.
+    fn look_need(&self) -> LookSet {
+        LookSet::read_repr(&self.0[5..])
+    }
+
+    /// Returns the total number of match pattern IDs in this state.
+    ///
+    /// If this state is not a match state, then this always returns 0.
+    fn match_len(&self) -> usize {
+        if !self.is_match() {
+            return 0;
+        } else if !self.has_pattern_ids() {
+            1
+        } else {
+            self.encoded_pattern_len()
+        }
+    }
+
+    /// Returns the pattern ID for this match state at the given index.
+    ///
+    /// If the given index is greater than or equal to `match_len()` for this
+    /// state, then this could panic or return incorrect results.
+    fn match_pattern(&self, index: usize) -> PatternID {
+        if !self.has_pattern_ids() {
+            PatternID::ZERO
+        } else {
+            let offset = 13 + index * PatternID::SIZE;
+            // This is OK since we only ever serialize valid PatternIDs to
+            // states.
+            wire::read_pattern_id_unchecked(&self.0[offset..]).0
+        }
+    }
+
+    /// Returns a copy of all match pattern IDs in this state. If this state
+    /// is not a match state, then this returns None.
+    fn match_pattern_ids(&self) -> Option<Vec<PatternID>> {
+        if !self.is_match() {
+            return None;
+        }
+        let mut pids = alloc::vec![];
+        self.iter_match_pattern_ids(|pid| pids.push(pid));
+        Some(pids)
+    }
+
+    /// Calls the given function on every pattern ID in this state.
+    fn iter_match_pattern_ids<F: FnMut(PatternID)>(&self, mut f: F) {
+        if !self.is_match() {
+            return;
+        }
+        // As an optimization for a very common case, when this is a match
+        // state for an NFA with only one pattern, we don't actually write the
+        // pattern ID to the state representation. Instead, we know it must
+        // be there since it is the only possible choice.
+        if !self.has_pattern_ids() {
+            f(PatternID::ZERO);
+            return;
+        }
+        let mut pids = &self.0[13..self.pattern_offset_end()];
+        while !pids.is_empty() {
+            let pid = wire::read_u32(pids);
+            pids = &pids[PatternID::SIZE..];
+            // This is OK since we only ever serialize valid PatternIDs to
+            // states. And since pattern IDs can never exceed a usize, the
+            // unwrap is OK.
+            f(PatternID::new_unchecked(usize::try_from(pid).unwrap()));
+        }
+    }
+
+    /// Calls the given function on every NFA state ID in this state.
+    fn iter_nfa_state_ids<F: FnMut(StateID)>(&self, mut f: F) {
+        let mut sids = &self.0[self.pattern_offset_end()..];
+        let mut prev = 0i32;
+        while !sids.is_empty() {
+            let (delta, nr) = read_vari32(sids);
+            sids = &sids[nr..];
+            let sid = prev + delta;
+            prev = sid;
+            // This is OK since we only ever serialize valid StateIDs to
+            // states. And since state IDs can never exceed an isize, they must
+            // always be able to fit into a usize, and thus cast is OK.
+            f(StateID::new_unchecked(sid.as_usize()))
+        }
+    }
+
+    /// Returns the offset into this state's representation where the pattern
+    /// IDs end and the NFA state IDs begin.
+    fn pattern_offset_end(&self) -> usize {
+        let encoded = self.encoded_pattern_len();
+        if encoded == 0 {
+            return 9;
+        }
+        // This arithmetic is OK since we were able to address this many bytes
+        // when writing to the state, thus, it must fit into a usize.
+        encoded.checked_mul(4).unwrap().checked_add(13).unwrap()
+    }
+
+    /// Returns the total number of *encoded* pattern IDs in this state.
+    ///
+    /// This may return 0 even when this is a match state, since the pattern
+    /// ID `PatternID::ZERO` is not encoded when it's the only pattern ID in
+    /// the match state (the overwhelming common case).
+    fn encoded_pattern_len(&self) -> usize {
+        if !self.has_pattern_ids() {
+            return 0;
+        }
+        // This unwrap is OK since the total number of patterns is always
+        // guaranteed to fit into a usize.
+        usize::try_from(wire::read_u32(&self.0[9..13])).unwrap()
+    }
+}
+
+impl<'a> core::fmt::Debug for Repr<'a> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let mut nfa_ids = alloc::vec![];
+        self.iter_nfa_state_ids(|sid| nfa_ids.push(sid));
+        f.debug_struct("Repr")
+            .field("is_match", &self.is_match())
+            .field("is_from_word", &self.is_from_word())
+            .field("is_half_crlf", &self.is_half_crlf())
+            .field("look_have", &self.look_have())
+            .field("look_need", &self.look_need())
+            .field("match_pattern_ids", &self.match_pattern_ids())
+            .field("nfa_state_ids", &nfa_ids)
+            .finish()
+    }
+}
+
+/// ReprVec is a write-only view into the representation of a DFA state.
+///
+/// See Repr for more details on the purpose of this type and also the format.
+///
+/// Note that not all possible combinations of methods may be called. This is
+/// precisely what the various StateBuilder types encapsulate: they only
+/// permit valid combinations via Rust's linear typing.
+struct ReprVec<'a>(&'a mut Vec<u8>);
+
+impl<'a> ReprVec<'a> {
+    /// Set this state as a match state.
+    ///
+    /// This should not be exposed explicitly outside of this module. It is
+    /// set automatically when a pattern ID is added.
+    fn set_is_match(&mut self) {
+        self.0[0] |= 1 << 0;
+    }
+
+    /// Set that this state has pattern IDs explicitly written to it.
+    ///
+    /// This should not be exposed explicitly outside of this module. This is
+    /// used internally as a space saving optimization. Namely, if the state
+    /// is a match state but does not have any pattern IDs written to it,
+    /// then it is automatically inferred to have a pattern ID of ZERO.
+    fn set_has_pattern_ids(&mut self) {
+        self.0[0] |= 1 << 1;
+    }
+
+    /// Set this state as being built from a transition over a word byte.
+    ///
+    /// Setting this is only necessary when one needs to deal with word
+    /// boundary assertions. Therefore, if the underlying NFA has no word
+    /// boundary assertions, callers should not set this.
+    fn set_is_from_word(&mut self) {
+        self.0[0] |= 1 << 2;
+    }
+
+    /// Set this state as having seen half of a CRLF terminator.
+    ///
+    /// In the forward direction, this should be set when a `\r` has been seen.
+    /// In the reverse direction, this should be set when a `\n` has been seen.
+    fn set_is_half_crlf(&mut self) {
+        self.0[0] |= 1 << 3;
+    }
+
+    /// The set of look-behind assertions that were true in the transition that
+    /// created this state.
+    fn look_have(&self) -> LookSet {
+        self.repr().look_have()
+    }
+
+    /// The set of look-around (both behind and ahead) assertions that appear
+    /// at least once in this state's set of NFA states.
+    fn look_need(&self) -> LookSet {
+        self.repr().look_need()
+    }
+
+    /// Mutate the set of look-behind assertions that were true in the
+    /// transition that created this state.
+    fn set_look_have(&mut self, mut set: impl FnMut(LookSet) -> LookSet) {
+        set(self.look_have()).write_repr(&mut self.0[1..]);
+    }
+
+    /// Mutate the set of look-around (both behind and ahead) assertions that
+    /// appear at least once in this state's set of NFA states.
+    fn set_look_need(&mut self, mut set: impl FnMut(LookSet) -> LookSet) {
+        set(self.look_need()).write_repr(&mut self.0[5..]);
+    }
+
+    /// Add a pattern ID to this state. All match states must have at least
+    /// one pattern ID associated with it.
+    ///
+    /// Callers must never add duplicative pattern IDs.
+    ///
+    /// The order in which patterns are added must correspond to the order
+    /// in which patterns are reported as matches.
+    fn add_match_pattern_id(&mut self, pid: PatternID) {
+        // As a (somewhat small) space saving optimization, in the case where
+        // a matching state has exactly one pattern ID, PatternID::ZERO, we do
+        // not write either the pattern ID or the number of patterns encoded.
+        // Instead, all we do is set the 'is_match' bit on this state. Overall,
+        // this saves 8 bytes per match state for the overwhelming majority of
+        // match states.
+        //
+        // In order to know whether pattern IDs need to be explicitly read or
+        // not, we use another internal-only bit, 'has_pattern_ids', to
+        // indicate whether they have been explicitly written or not.
+        if !self.repr().has_pattern_ids() {
+            if pid == PatternID::ZERO {
+                self.set_is_match();
+                return;
+            }
+            // Make room for 'close_match_pattern_ids' to write the total
+            // number of pattern IDs written.
+            self.0.extend(core::iter::repeat(0).take(PatternID::SIZE));
+            self.set_has_pattern_ids();
+            // If this was already a match state, then the only way that's
+            // possible when the state doesn't have pattern IDs is if
+            // PatternID::ZERO was added by the caller previously. In this
+            // case, we are now adding a non-ZERO pattern ID after it, in
+            // which case, we want to make sure to represent ZERO explicitly
+            // now.
+            if self.repr().is_match() {
+                write_u32(self.0, 0)
+            } else {
+                // Otherwise, just make sure the 'is_match' bit is set.
+                self.set_is_match();
+            }
+        }
+        write_u32(self.0, pid.as_u32());
+    }
+
+    /// Indicate that no more pattern IDs will be added to this state.
+    ///
+    /// Once this is called, callers must not call it or 'add_match_pattern_id'
+    /// again.
+    ///
+    /// This should not be exposed explicitly outside of this module. It
+    /// should be called only when converting a StateBuilderMatches into a
+    /// StateBuilderNFA.
+    fn close_match_pattern_ids(&mut self) {
+        // If we never wrote any pattern IDs, then there's nothing to do here.
+        if !self.repr().has_pattern_ids() {
+            return;
+        }
+        let patsize = PatternID::SIZE;
+        let pattern_bytes = self.0.len() - 13;
+        // Every pattern ID uses 4 bytes, so number of bytes should be
+        // divisible by 4.
+        assert_eq!(pattern_bytes % patsize, 0);
+        // This unwrap is OK since we are guaranteed that the maximum number
+        // of possible patterns fits into a u32.
+        let count32 = u32::try_from(pattern_bytes / patsize).unwrap();
+        wire::NE::write_u32(count32, &mut self.0[9..13]);
+    }
+
+    /// Add an NFA state ID to this state. The order in which NFA states are
+    /// added matters. It is the caller's responsibility to ensure that
+    /// duplicate NFA state IDs are not added.
+    fn add_nfa_state_id(&mut self, prev: &mut StateID, sid: StateID) {
+        let delta = sid.as_i32() - prev.as_i32();
+        write_vari32(self.0, delta);
+        *prev = sid;
+    }
+
+    /// Return a read-only view of this state's representation.
+    fn repr(&self) -> Repr<'_> {
+        Repr(self.0.as_slice())
+    }
+}
+
+/// Write a signed 32-bit integer using zig-zag encoding.
+///
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn write_vari32(data: &mut Vec<u8>, n: i32) {
+    let mut un = n.to_bits() << 1;
+    if n < 0 {
+        un = !un;
+    }
+    write_varu32(data, un)
+}
+
+/// Read a signed 32-bit integer using zig-zag encoding. Also, return the
+/// number of bytes read.
+///
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn read_vari32(data: &[u8]) -> (i32, usize) {
+    let (un, i) = read_varu32(data);
+    let mut n = i32::from_bits(un >> 1);
+    if un & 1 != 0 {
+        n = !n;
+    }
+    (n, i)
+}
+
+/// Write an unsigned 32-bit integer as a varint. In essence, `n` is written
+/// as a sequence of bytes where all bytes except for the last one have the
+/// most significant bit set. The least significant 7 bits correspond to the
+/// actual bits of `n`. So in the worst case, a varint uses 5 bytes, but in
+/// very common cases, it uses fewer than 4.
+///
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn write_varu32(data: &mut Vec<u8>, mut n: u32) {
+    while n >= 0b1000_0000 {
+        data.push(n.low_u8() | 0b1000_0000);
+        n >>= 7;
+    }
+    data.push(n.low_u8());
+}
+
+/// Read an unsigned 32-bit varint. Also, return the number of bytes read.
+///
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn read_varu32(data: &[u8]) -> (u32, usize) {
+    // N.B. We can assume correctness here since we know that all varuints are
+    // written with write_varu32. Hence, the 'as' uses and unchecked arithmetic
+    // is all okay.
+    let mut n: u32 = 0;
+    let mut shift: u32 = 0;
+    for (i, &b) in data.iter().enumerate() {
+        if b < 0b1000_0000 {
+            return (n | (u32::from(b) << shift), i + 1);
+        }
+        n |= (u32::from(b) & 0b0111_1111) << shift;
+        shift += 7;
+    }
+    (0, 0)
+}
+
+/// Push a native-endian encoded `n` on to `dst`.
+fn write_u32(dst: &mut Vec<u8>, n: u32) {
+    use crate::util::wire::NE;
+
+    let start = dst.len();
+    dst.extend(core::iter::repeat(0).take(mem::size_of::<u32>()));
+    NE::write_u32(n, &mut dst[start..]);
+}
+
+#[cfg(test)]
+mod tests {
+    use alloc::vec;
+
+    use quickcheck::quickcheck;
+
+    use super::*;
+
+    #[cfg(not(miri))]
+    quickcheck! {
+        fn prop_state_read_write_nfa_state_ids(sids: Vec<StateID>) -> bool {
+            // Builders states do not permit duplicate IDs.
+            let sids = dedup_state_ids(sids);
+
+            let mut b = StateBuilderEmpty::new().into_matches().into_nfa();
+            for &sid in &sids {
+                b.add_nfa_state_id(sid);
+            }
+            let s = b.to_state();
+            let mut got = vec![];
+            s.iter_nfa_state_ids(|sid| got.push(sid));
+            got == sids
+        }
+
+        fn prop_state_read_write_pattern_ids(pids: Vec<PatternID>) -> bool {
+            // Builders states do not permit duplicate IDs.
+            let pids = dedup_pattern_ids(pids);
+
+            let mut b = StateBuilderEmpty::new().into_matches();
+            for &pid in &pids {
+                b.add_match_pattern_id(pid);
+            }
+            let s = b.into_nfa().to_state();
+            let mut got = vec![];
+            s.iter_match_pattern_ids(|pid| got.push(pid));
+            got == pids
+        }
+
+        fn prop_state_read_write_nfa_state_and_pattern_ids(
+            sids: Vec<StateID>,
+            pids: Vec<PatternID>
+        ) -> bool {
+            // Builders states do not permit duplicate IDs.
+            let sids = dedup_state_ids(sids);
+            let pids = dedup_pattern_ids(pids);
+
+            let mut b = StateBuilderEmpty::new().into_matches();
+            for &pid in &pids {
+                b.add_match_pattern_id(pid);
+            }
+
+            let mut b = b.into_nfa();
+            for &sid in &sids {
+                b.add_nfa_state_id(sid);
+            }
+
+            let s = b.to_state();
+            let mut got_pids = vec![];
+            s.iter_match_pattern_ids(|pid| got_pids.push(pid));
+            let mut got_sids = vec![];
+            s.iter_nfa_state_ids(|sid| got_sids.push(sid));
+            got_pids == pids && got_sids == sids
+        }
+    }
+
+    quickcheck! {
+        fn prop_read_write_varu32(n: u32) -> bool {
+            let mut buf = vec![];
+            write_varu32(&mut buf, n);
+            let (got, nread) = read_varu32(&buf);
+            nread == buf.len() && got == n
+        }
+
+        fn prop_read_write_vari32(n: i32) -> bool {
+            let mut buf = vec![];
+            write_vari32(&mut buf, n);
+            let (got, nread) = read_vari32(&buf);
+            nread == buf.len() && got == n
+        }
+    }
+
+    #[cfg(not(miri))]
+    fn dedup_state_ids(sids: Vec<StateID>) -> Vec<StateID> {
+        let mut set = alloc::collections::BTreeSet::new();
+        let mut deduped = vec![];
+        for sid in sids {
+            if set.contains(&sid) {
+                continue;
+            }
+            set.insert(sid);
+            deduped.push(sid);
+        }
+        deduped
+    }
+
+    #[cfg(not(miri))]
+    fn dedup_pattern_ids(pids: Vec<PatternID>) -> Vec<PatternID> {
+        let mut set = alloc::collections::BTreeSet::new();
+        let mut deduped = vec![];
+        for pid in pids {
+            if set.contains(&pid) {
+                continue;
+            }
+            set.insert(pid);
+            deduped.push(pid);
+        }
+        deduped
+    }
+}
diff --git a/vendor/regex-automata/src/util/empty.rs b/vendor/regex-automata/src/util/empty.rs
new file mode 100644
index 0000000..e16af3b
--- /dev/null
+++ b/vendor/regex-automata/src/util/empty.rs
@@ -0,0 +1,265 @@
+/*!
+This module provides helper routines for dealing with zero-width matches.
+
+The main problem being solved here is this:
+
+1. The caller wants to search something that they know is valid UTF-8, such
+as a Rust `&str`.
+2. The regex used by the caller can match the empty string. For example, `a*`.
+3. The caller should never get match offsets returned that occur within the
+encoding of a UTF-8 codepoint. It is logically incorrect, and also means that,
+e.g., slicing the `&str` at those offsets will lead to a panic.
+
+So the question here is, how do we prevent the caller from getting match
+offsets that split a codepoint? For example, strictly speaking, the regex `a*`
+matches `☃` at the positions `[0, 0]`, `[1, 1]`, `[2, 2]` and `[3, 3]` since
+the UTF-8 encoding of `☃` is `\xE2\x98\x83`. In particular, the `NFA` that
+underlies all of the matching engines in this crate doesn't have anything in
+its state graph that prevents matching between UTF-8 code units. Indeed, any
+engine derived from the `NFA` will match at those positions by virtue of the
+fact that the `NFA` is byte oriented. That is, its transitions are defined over
+bytes and the matching engines work by proceeding one byte at a time.
+
+(An alternative architecture would be to define the transitions in an `NFA`
+over codepoints, or `char`. And then make the matching engines proceed by
+decoding one codepoint at a time. This is a viable strategy, but it doesn't
+work for DFA matching engines because designing a fast and memory efficient
+transition table for an alphabet as large as Unicode is quite difficult. More
+to the point, the top-level `regex` crate supports matching on arbitrary bytes
+when Unicode mode is disabled and one is searching a `&[u8]`. So in that case,
+you can't just limit yourself to decoding codepoints and matching those. You
+really do need to be able to follow byte oriented transitions on the `NFA`.)
+
+In an older version of the regex crate, we handled this case not in the regex
+engine, but in the iterators over matches. Namely, since this case only arises
+when the match is empty, we "just" incremented the next starting position
+of the search by `N`, where `N` is the length of the codepoint encoded at
+the current position. The alternative or more "natural" solution of just
+incrementing by `1` would result in executing a search of `a*` on `☃` like
+this:
+
+* Start search at `0`.
+* Found match at `[0, 0]`.
+* Next start position is `0`.
+* To avoid an infinite loop, since it's an empty match, increment by `1`.
+* Start search at `1`.
+* Found match at `[1, 1]`. Oops.
+
+But if we instead incremented by `3` (the length in bytes of `☃`), then we get
+the following:
+
+* Start search at `0`.
+* Found match at `[0, 0]`.
+* Next start position is `0`.
+* To avoid an infinite loop, since it's an empty match, increment by `3`.
+* Start search at `3`.
+* Found match at `[3, 3]`.
+
+And we get the correct result. But does this technique work in all cases?
+Crucially, it requires that a zero-width match that splits a codepoint never
+occurs beyond the starting position of the search. Because if it did, merely
+incrementing the start position by the number of bytes in the codepoint at
+the current position wouldn't be enough. A zero-width match could just occur
+anywhere. It turns out that it is _almost_ true. We can convince ourselves by
+looking at all possible patterns that can match the empty string:
+
+* Patterns like `a*`, `a{0}`, `(?:)`, `a|` and `|a` all unconditionally match
+the empty string. That is, assuming there isn't an `a` at the current position,
+they will all match the empty string at the start of a search. There is no way
+to move past it because any other match would not be "leftmost."
+* `^` only matches at the beginning of the haystack, where the start position
+is `0`. Since we know we're searching valid UTF-8 (if it isn't valid UTF-8,
+then this entire problem goes away because it implies your string type supports
+invalid UTF-8 and thus must deal with offsets that not only split a codepoint
+but occur in entirely invalid UTF-8 somehow), it follows that `^` never matches
+between the code units of a codepoint because the start of a valid UTF-8 string
+is never within the encoding of a codepoint.
+* `$` basically the same logic as `^`, but for the end of a string. A valid
+UTF-8 string can't have an incomplete codepoint at the end of it.
+* `(?m:^)` follows similarly to `^`, but it can match immediately following
+a `\n`. However, since a `\n` is always a codepoint itself and can never
+appear within a codepoint, it follows that the position immediately following
+a `\n` in a string that is valid UTF-8 is guaranteed to not be between the
+code units of another codepoint. (One caveat here is that the line terminator
+for multi-line anchors can now be changed to any arbitrary byte, including
+things like `\x98` which might occur within a codepoint. However, this wasn't
+supported by the old regex crate. If it was, it pose the same problems as
+`(?-u:\B)`, as we'll discuss below.)
+* `(?m:$)` a similar argument as for `(?m:^)`. The only difference is that a
+`(?m:$)` matches just before a `\n`. But the same argument applies.
+* `(?Rm:^)` and `(?Rm:$)` weren't supported by the old regex crate, but the
+CRLF aware line anchors follow a similar argument as for `(?m:^)` and `(?m:$)`.
+Namely, since they only ever match at a boundary where one side is either a
+`\r` or a `\n`, neither of which can occur within a codepoint.
+* `\b` only matches at positions where both sides are valid codepoints, so
+this cannot split a codepoint.
+* `\B`, like `\b`, also only matches at positions where both sides are valid
+codepoints. So this cannot split a codepoint either.
+* `(?-u:\b)` matches only at positions where at least one side of it is an ASCII
+word byte. Since ASCII bytes cannot appear as code units in non-ASCII codepoints
+(one of the many amazing qualities of UTF-8), it follows that this too cannot
+split a codepoint.
+* `(?-u:\B)` finally represents a problem. It can matches between *any* two
+bytes that are either both word bytes or non-word bytes. Since code units like
+`\xE2` and `\x98` (from the UTF-8 encoding of `☃`) are both non-word bytes,
+`(?-u:\B)` will match at the position between them.
+
+Thus, our approach of incrementing one codepoint at a time after seeing an
+empty match is flawed because `(?-u:\B)` can result in an empty match that
+splits a codepoint at a position past the starting point of a search. For
+example, searching `(?-u:\B)` on `a☃` would produce the following matches: `[2,
+2]`, `[3, 3]` and `[4, 4]`. The positions at `0` and `1` don't match because
+they correspond to word boundaries since `a` is an ASCII word byte.
+
+So what did the old regex crate do to avoid this? It banned `(?-u:\B)` from
+regexes that could match `&str`. That might sound extreme, but a lot of other
+things were banned too. For example, all of `(?-u:.)`, `(?-u:[^a])` and
+`(?-u:\W)` can match invalid UTF-8 too, including individual code units with a
+codepoint. The key difference is that those expressions could never produce an
+empty match. That ban happens when translating an `Ast` to an `Hir`, because
+that process that reason about whether an `Hir` can produce *non-empty* matches
+at invalid UTF-8 boundaries. Bottom line though is that we side-stepped the
+`(?-u:\B)` issue by banning it.
+
+If banning `(?-u:\B)` were the only issue with the old regex crate's approach,
+then I probably would have kept it. `\B` is rarely used, so it's not such a big
+deal to have to work-around it. However, the problem with the above approach
+is that it doesn't compose. The logic for avoiding splitting a codepoint only
+lived in the iterator, which means if anyone wants to implement their own
+iterator over regex matches, they have to deal with this extremely subtle edge
+case to get full correctness.
+
+Instead, in this crate, we take the approach of pushing this complexity down
+to the lowest layers of each regex engine. The approach is pretty simple:
+
+* If this corner case doesn't apply, don't do anything. (For example, if UTF-8
+mode isn't enabled or if the regex cannot match the empty string.)
+* If an empty match is reported, explicitly check if it splits a codepoint.
+* If it doesn't, we're done, return the match.
+* If it does, then ignore the match and re-run the search.
+* Repeat the above process until the end of the haystack is reached or a match
+is found that doesn't split a codepoint or isn't zero width.
+
+And that's pretty much what this module provides. Every regex engine uses these
+methods in their lowest level public APIs, but just above the layer where
+their internal engine is used. That way, all regex engines can be arbitrarily
+composed without worrying about handling this case, and iterators don't need to
+handle it explicitly.
+
+(It turns out that a new feature I added, support for changing the line
+terminator in a regex to any arbitrary byte, also provokes the above problem.
+Namely, the byte could be invalid UTF-8 or a UTF-8 continuation byte. So that
+support would need to be limited or banned when UTF-8 mode is enabled, just
+like we did for `(?-u:\B)`. But thankfully our more robust approach in this
+crate handles that case just fine too.)
+*/
+
+use crate::util::search::{Input, MatchError};
+
+#[cold]
+#[inline(never)]
+pub(crate) fn skip_splits_fwd<T, F>(
+    input: &Input<'_>,
+    init_value: T,
+    match_offset: usize,
+    find: F,
+) -> Result<Option<T>, MatchError>
+where
+    F: FnMut(&Input<'_>) -> Result<Option<(T, usize)>, MatchError>,
+{
+    skip_splits(true, input, init_value, match_offset, find)
+}
+
+#[cold]
+#[inline(never)]
+pub(crate) fn skip_splits_rev<T, F>(
+    input: &Input<'_>,
+    init_value: T,
+    match_offset: usize,
+    find: F,
+) -> Result<Option<T>, MatchError>
+where
+    F: FnMut(&Input<'_>) -> Result<Option<(T, usize)>, MatchError>,
+{
+    skip_splits(false, input, init_value, match_offset, find)
+}
+
+fn skip_splits<T, F>(
+    forward: bool,
+    input: &Input<'_>,
+    init_value: T,
+    mut match_offset: usize,
+    mut find: F,
+) -> Result<Option<T>, MatchError>
+where
+    F: FnMut(&Input<'_>) -> Result<Option<(T, usize)>, MatchError>,
+{
+    // If our config says to do an anchored search, then we're definitely
+    // done. We just need to determine whether we have a valid match or
+    // not. If we don't, then we're not allowed to continue, so we report
+    // no match.
+    //
+    // This is actually quite a subtle correctness thing. The key here is
+    // that if we got an empty match that splits a codepoint after doing an
+    // anchored search in UTF-8 mode, then that implies that we must have
+    // *started* the search at a location that splits a codepoint. This
+    // follows from the fact that if a match is reported from an anchored
+    // search, then the start offset of the match *must* match the start
+    // offset of the search.
+    //
+    // It also follows that no other non-empty match is possible. For
+    // example, you might write a regex like '(?:)|SOMETHING' and start its
+    // search in the middle of a codepoint. The first branch is an empty
+    // regex that will bubble up a match at the first position, and then
+    // get rejected here and report no match. But what if 'SOMETHING' could
+    // have matched? We reason that such a thing is impossible, because
+    // if it does, it must report a match that starts in the middle of a
+    // codepoint. This in turn implies that a match is reported whose span
+    // does not correspond to valid UTF-8, and this breaks the promise
+    // made when UTF-8 mode is enabled. (That promise *can* be broken, for
+    // example, by enabling UTF-8 mode but building an by hand NFA that
+    // produces non-empty matches that span invalid UTF-8. This is an unchecked
+    // but documented precondition violation of UTF-8 mode, and is documented
+    // to have unspecified behavior.)
+    //
+    // I believe this actually means that if an anchored search is run, and
+    // UTF-8 mode is enabled and the start position splits a codepoint,
+    // then it is correct to immediately report no match without even
+    // executing the regex engine. But it doesn't really seem worth writing
+    // out that case in every regex engine to save a tiny bit of work in an
+    // extremely pathological case, so we just handle it here.
+    if input.get_anchored().is_anchored() {
+        return Ok(if input.is_char_boundary(match_offset) {
+            Some(init_value)
+        } else {
+            None
+        });
+    }
+    // Otherwise, we have an unanchored search, so just keep looking for
+    // matches until we have one that does not split a codepoint or we hit
+    // EOI.
+    let mut value = init_value;
+    let mut input = input.clone();
+    while !input.is_char_boundary(match_offset) {
+        if forward {
+            // The unwrap is OK here because overflowing usize while
+            // iterating over a slice is impossible, at it would require
+            // a slice of length greater than isize::MAX, which is itself
+            // impossible.
+            input.set_start(input.start().checked_add(1).unwrap());
+        } else {
+            input.set_end(match input.end().checked_sub(1) {
+                None => return Ok(None),
+                Some(end) => end,
+            });
+        }
+        match find(&input)? {
+            None => return Ok(None),
+            Some((new_value, new_match_end)) => {
+                value = new_value;
+                match_offset = new_match_end;
+            }
+        }
+    }
+    Ok(Some(value))
+}
diff --git a/vendor/regex-automata/src/util/escape.rs b/vendor/regex-automata/src/util/escape.rs
new file mode 100644
index 0000000..7f6aa15
--- /dev/null
+++ b/vendor/regex-automata/src/util/escape.rs
@@ -0,0 +1,84 @@
+/*!
+Provides convenience routines for escaping raw bytes.
+
+Since this crate tends to deal with `&[u8]` everywhere and the default
+`Debug` implementation just shows decimal integers, it makes debugging those
+representations quite difficult. This module provides types that show `&[u8]`
+as if it were a string, with invalid UTF-8 escaped into its byte-by-byte hex
+representation.
+*/
+
+use crate::util::utf8;
+
+/// Provides a convenient `Debug` implementation for a `u8`.
+///
+/// The `Debug` impl treats the byte as an ASCII, and emits a human readable
+/// representation of it. If the byte isn't ASCII, then it's emitted as a hex
+/// escape sequence.
+#[derive(Clone, Copy)]
+pub struct DebugByte(pub u8);
+
+impl core::fmt::Debug for DebugByte {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        // Special case ASCII space. It's too hard to read otherwise, so
+        // put quotes around it. I sometimes wonder whether just '\x20' would
+        // be better...
+        if self.0 == b' ' {
+            return write!(f, "' '");
+        }
+        // 10 bytes is enough to cover any output from ascii::escape_default.
+        let mut bytes = [0u8; 10];
+        let mut len = 0;
+        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
+            // capitalize \xab to \xAB
+            if i >= 2 && b'a' <= b && b <= b'f' {
+                b -= 32;
+            }
+            bytes[len] = b;
+            len += 1;
+        }
+        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
+    }
+}
+
+/// Provides a convenient `Debug` implementation for `&[u8]`.
+///
+/// This generally works best when the bytes are presumed to be mostly UTF-8,
+/// but will work for anything. For any bytes that aren't UTF-8, they are
+/// emitted as hex escape sequences.
+pub struct DebugHaystack<'a>(pub &'a [u8]);
+
+impl<'a> core::fmt::Debug for DebugHaystack<'a> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "\"")?;
+        // This is a sad re-implementation of a similar impl found in bstr.
+        let mut bytes = self.0;
+        while let Some(result) = utf8::decode(bytes) {
+            let ch = match result {
+                Ok(ch) => ch,
+                Err(byte) => {
+                    write!(f, r"\x{:02x}", byte)?;
+                    bytes = &bytes[1..];
+                    continue;
+                }
+            };
+            bytes = &bytes[ch.len_utf8()..];
+            match ch {
+                '\0' => write!(f, "\\0")?,
+                // ASCII control characters except \0, \n, \r, \t
+                '\x01'..='\x08'
+                | '\x0b'
+                | '\x0c'
+                | '\x0e'..='\x19'
+                | '\x7f' => {
+                    write!(f, "\\x{:02x}", u32::from(ch))?;
+                }
+                '\n' | '\r' | '\t' | _ => {
+                    write!(f, "{}", ch.escape_debug())?;
+                }
+            }
+        }
+        write!(f, "\"")?;
+        Ok(())
+    }
+}
diff --git a/vendor/regex-automata/src/util/int.rs b/vendor/regex-automata/src/util/int.rs
new file mode 100644
index 0000000..e6b13bf
--- /dev/null
+++ b/vendor/regex-automata/src/util/int.rs
@@ -0,0 +1,252 @@
+/*!
+This module provides several integer oriented traits for converting between
+both fixed size integers and integers whose size varies based on the target
+(like `usize`).
+
+The driving design principle of this module is to attempt to centralize as many
+`as` casts as possible here. And in particular, we separate casts into two
+buckets:
+
+* Casts that we use for their truncating behavior. In this case, we use more
+descriptive names, like `low_u32` and `high_u32`.
+* Casts that we use for converting back-and-forth between `usize`. These
+conversions are generally necessary because we often store indices in different
+formats to save on memory, which requires converting to and from `usize`. In
+this case, we very specifically do not want to overflow, and so the methods
+defined here will panic if the `as` cast would be lossy in debug mode. (A
+normal `as` cast will never panic!)
+
+For `as` casts between raw pointers, we use `cast`, so `as` isn't needed there.
+
+For regex engines, floating point is just never used, so we don't have to worry
+about `as` casts for those.
+
+Otherwise, this module pretty much covers all of our `as` needs except for one
+thing: const contexts. There are a select few places in this crate where we
+still need to use `as` because const functions on traits aren't stable yet.
+If we wind up significantly expanding our const footprint in this crate, it
+might be worth defining free functions to handle those cases. But at the time
+of writing, that just seemed like too much ceremony. Instead, I comment each
+such use of `as` in a const context with a "fixme" notice.
+
+NOTE: for simplicity, we don't take target pointer width into account here for
+`usize` conversions. Since we currently only panic in debug mode, skipping the
+check when it can be proven it isn't needed at compile time doesn't really
+matter. Now, if we wind up wanting to do as many checks as possible in release
+mode, then we would want to skip those when we know the conversions are always
+non-lossy.
+
+NOTE: this module isn't an exhaustive API. For example, we still use things
+like `u64::from` where possible, or even `usize::try_from()` for when we do
+explicitly want to panic or when we want to return an error for overflow.
+*/
+
+pub(crate) trait U8 {
+    fn as_usize(self) -> usize;
+}
+
+impl U8 for u8 {
+    fn as_usize(self) -> usize {
+        usize::from(self)
+    }
+}
+
+pub(crate) trait U16 {
+    fn as_usize(self) -> usize;
+    fn low_u8(self) -> u8;
+    fn high_u8(self) -> u8;
+}
+
+impl U16 for u16 {
+    fn as_usize(self) -> usize {
+        usize::from(self)
+    }
+
+    fn low_u8(self) -> u8 {
+        self as u8
+    }
+
+    fn high_u8(self) -> u8 {
+        (self >> 8) as u8
+    }
+}
+
+pub(crate) trait U32 {
+    fn as_usize(self) -> usize;
+    fn low_u8(self) -> u8;
+    fn low_u16(self) -> u16;
+    fn high_u16(self) -> u16;
+}
+
+impl U32 for u32 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("u32 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn low_u8(self) -> u8 {
+        self as u8
+    }
+
+    fn low_u16(self) -> u16 {
+        self as u16
+    }
+
+    fn high_u16(self) -> u16 {
+        (self >> 16) as u16
+    }
+}
+
+pub(crate) trait U64 {
+    fn as_usize(self) -> usize;
+    fn low_u8(self) -> u8;
+    fn low_u16(self) -> u16;
+    fn low_u32(self) -> u32;
+    fn high_u32(self) -> u32;
+}
+
+impl U64 for u64 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("u64 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn low_u8(self) -> u8 {
+        self as u8
+    }
+
+    fn low_u16(self) -> u16 {
+        self as u16
+    }
+
+    fn low_u32(self) -> u32 {
+        self as u32
+    }
+
+    fn high_u32(self) -> u32 {
+        (self >> 32) as u32
+    }
+}
+
+pub(crate) trait I32 {
+    fn as_usize(self) -> usize;
+    fn to_bits(self) -> u32;
+    fn from_bits(n: u32) -> i32;
+}
+
+impl I32 for i32 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("i32 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn to_bits(self) -> u32 {
+        self as u32
+    }
+
+    fn from_bits(n: u32) -> i32 {
+        n as i32
+    }
+}
+
+pub(crate) trait Usize {
+    fn as_u8(self) -> u8;
+    fn as_u16(self) -> u16;
+    fn as_u32(self) -> u32;
+    fn as_u64(self) -> u64;
+}
+
+impl Usize for usize {
+    fn as_u8(self) -> u8 {
+        #[cfg(debug_assertions)]
+        {
+            u8::try_from(self).expect("usize overflowed u8")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u8
+        }
+    }
+
+    fn as_u16(self) -> u16 {
+        #[cfg(debug_assertions)]
+        {
+            u16::try_from(self).expect("usize overflowed u16")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u16
+        }
+    }
+
+    fn as_u32(self) -> u32 {
+        #[cfg(debug_assertions)]
+        {
+            u32::try_from(self).expect("usize overflowed u32")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u32
+        }
+    }
+
+    fn as_u64(self) -> u64 {
+        #[cfg(debug_assertions)]
+        {
+            u64::try_from(self).expect("usize overflowed u64")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u64
+        }
+    }
+}
+
+// Pointers aren't integers, but we convert pointers to integers to perform
+// offset arithmetic in some places. (And no, we don't convert the integers
+// back to pointers.) So add 'as_usize' conversions here too for completeness.
+//
+// These 'as' casts are actually okay because they're always non-lossy. But the
+// idea here is to just try and remove as much 'as' as possible, particularly
+// in this crate where we are being really paranoid about offsets and making
+// sure we don't panic on inputs that might be untrusted. This way, the 'as'
+// casts become easier to audit if they're all in one place, even when some of
+// them are actually okay 100% of the time.
+
+pub(crate) trait Pointer {
+    fn as_usize(self) -> usize;
+}
+
+impl<T> Pointer for *const T {
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
+
+pub(crate) trait PointerMut {
+    fn as_usize(self) -> usize;
+}
+
+impl<T> PointerMut for *mut T {
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
diff --git a/vendor/regex-automata/src/util/interpolate.rs b/vendor/regex-automata/src/util/interpolate.rs
new file mode 100644
index 0000000..f274629
--- /dev/null
+++ b/vendor/regex-automata/src/util/interpolate.rs
@@ -0,0 +1,579 @@
+/*!
+Provides routines for interpolating capture group references.
+
+That is, if a replacement string contains references like `$foo` or `${foo1}`,
+then they are replaced with the corresponding capture values for the groups
+named `foo` and `foo1`, respectively. Similarly, syntax like `$1` and `${1}`
+is supported as well, with `1` corresponding to a capture group index and not
+a name.
+
+This module provides the free functions [`string`] and [`bytes`], which
+interpolate Rust Unicode strings and byte strings, respectively.
+
+# Format
+
+These routines support two different kinds of capture references: unbraced and
+braced.
+
+For the unbraced format, the format supported is `$ref` where `name` can be
+any character in the class `[0-9A-Za-z_]`. `ref` is always the longest
+possible parse. So for example, `$1a` corresponds to the capture group named
+`1a` and not the capture group at index `1`. If `ref` matches `^[0-9]+$`, then
+it is treated as a capture group index itself and not a name.
+
+For the braced format, the format supported is `${ref}` where `ref` can be any
+sequence of bytes except for `}`. If no closing brace occurs, then it is not
+considered a capture reference. As with the unbraced format, if `ref` matches
+`^[0-9]+$`, then it is treated as a capture group index and not a name.
+
+The braced format is useful for exerting precise control over the name of the
+capture reference. For example, `${1}a` corresponds to the capture group
+reference `1` followed by the letter `a`, where as `$1a` (as mentioned above)
+corresponds to the capture group reference `1a`. The braced format is also
+useful for expressing capture group names that use characters not supported by
+the unbraced format. For example, `${foo[bar].baz}` refers to the capture group
+named `foo[bar].baz`.
+
+If a capture group reference is found and it does not refer to a valid capture
+group, then it will be replaced with the empty string.
+
+To write a literal `$`, use `$$`.
+
+To be clear, and as exhibited via the type signatures in the routines in this
+module, it is impossible for a replacement string to be invalid. A replacement
+string may not have the intended semantics, but the interpolation procedure
+itself can never fail.
+*/
+
+use alloc::{string::String, vec::Vec};
+
+use crate::util::memchr::memchr;
+
+/// Accepts a replacement string and interpolates capture references with their
+/// corresponding values.
+///
+/// `append` should be a function that appends the string value of a capture
+/// group at a particular index to the string given. If the capture group
+/// index is invalid, then nothing should be appended.
+///
+/// `name_to_index` should be a function that maps a capture group name to a
+/// capture group index. If the given name doesn't exist, then `None` should
+/// be returned.
+///
+/// Finally, `dst` is where the final interpolated contents should be written.
+/// If `replacement` contains no capture group references, then `dst` will be
+/// equivalent to `replacement`.
+///
+/// See the [module documentation](self) for details about the format
+/// supported.
+///
+/// # Example
+///
+/// ```
+/// use regex_automata::util::interpolate;
+///
+/// let mut dst = String::new();
+/// interpolate::string(
+///     "foo $bar baz",
+///     |index, dst| {
+///         if index == 0 {
+///             dst.push_str("BAR");
+///         }
+///     },
+///     |name| {
+///         if name == "bar" {
+///             Some(0)
+///         } else {
+///             None
+///         }
+///     },
+///     &mut dst,
+/// );
+/// assert_eq!("foo BAR baz", dst);
+/// ```
+pub fn string(
+    mut replacement: &str,
+    mut append: impl FnMut(usize, &mut String),
+    mut name_to_index: impl FnMut(&str) -> Option<usize>,
+    dst: &mut String,
+) {
+    while !replacement.is_empty() {
+        match memchr(b'$', replacement.as_bytes()) {
+            None => break,
+            Some(i) => {
+                dst.push_str(&replacement[..i]);
+                replacement = &replacement[i..];
+            }
+        }
+        // Handle escaping of '$'.
+        if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') {
+            dst.push_str("$");
+            replacement = &replacement[2..];
+            continue;
+        }
+        debug_assert!(!replacement.is_empty());
+        let cap_ref = match find_cap_ref(replacement.as_bytes()) {
+            Some(cap_ref) => cap_ref,
+            None => {
+                dst.push_str("$");
+                replacement = &replacement[1..];
+                continue;
+            }
+        };
+        replacement = &replacement[cap_ref.end..];
+        match cap_ref.cap {
+            Ref::Number(i) => append(i, dst),
+            Ref::Named(name) => {
+                if let Some(i) = name_to_index(name) {
+                    append(i, dst);
+                }
+            }
+        }
+    }
+    dst.push_str(replacement);
+}
+
+/// Accepts a replacement byte string and interpolates capture references with
+/// their corresponding values.
+///
+/// `append` should be a function that appends the byte string value of a
+/// capture group at a particular index to the byte string given. If the
+/// capture group index is invalid, then nothing should be appended.
+///
+/// `name_to_index` should be a function that maps a capture group name to a
+/// capture group index. If the given name doesn't exist, then `None` should
+/// be returned.
+///
+/// Finally, `dst` is where the final interpolated contents should be written.
+/// If `replacement` contains no capture group references, then `dst` will be
+/// equivalent to `replacement`.
+///
+/// See the [module documentation](self) for details about the format
+/// supported.
+///
+/// # Example
+///
+/// ```
+/// use regex_automata::util::interpolate;
+///
+/// let mut dst = vec![];
+/// interpolate::bytes(
+///     b"foo $bar baz",
+///     |index, dst| {
+///         if index == 0 {
+///             dst.extend_from_slice(b"BAR");
+///         }
+///     },
+///     |name| {
+///         if name == "bar" {
+///             Some(0)
+///         } else {
+///             None
+///         }
+///     },
+///     &mut dst,
+/// );
+/// assert_eq!(&b"foo BAR baz"[..], dst);
+/// ```
+pub fn bytes(
+    mut replacement: &[u8],
+    mut append: impl FnMut(usize, &mut Vec<u8>),
+    mut name_to_index: impl FnMut(&str) -> Option<usize>,
+    dst: &mut Vec<u8>,
+) {
+    while !replacement.is_empty() {
+        match memchr(b'$', replacement) {
+            None => break,
+            Some(i) => {
+                dst.extend_from_slice(&replacement[..i]);
+                replacement = &replacement[i..];
+            }
+        }
+        // Handle escaping of '$'.
+        if replacement.get(1).map_or(false, |&b| b == b'$') {
+            dst.push(b'$');
+            replacement = &replacement[2..];
+            continue;
+        }
+        debug_assert!(!replacement.is_empty());
+        let cap_ref = match find_cap_ref(replacement) {
+            Some(cap_ref) => cap_ref,
+            None => {
+                dst.push(b'$');
+                replacement = &replacement[1..];
+                continue;
+            }
+        };
+        replacement = &replacement[cap_ref.end..];
+        match cap_ref.cap {
+            Ref::Number(i) => append(i, dst),
+            Ref::Named(name) => {
+                if let Some(i) = name_to_index(name) {
+                    append(i, dst);
+                }
+            }
+        }
+    }
+    dst.extend_from_slice(replacement);
+}
+
+/// `CaptureRef` represents a reference to a capture group inside some text.
+/// The reference is either a capture group name or a number.
+///
+/// It is also tagged with the position in the text following the
+/// capture reference.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+struct CaptureRef<'a> {
+    cap: Ref<'a>,
+    end: usize,
+}
+
+/// A reference to a capture group in some text.
+///
+/// e.g., `$2`, `$foo`, `${foo}`.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+enum Ref<'a> {
+    Named(&'a str),
+    Number(usize),
+}
+
+impl<'a> From<&'a str> for Ref<'a> {
+    fn from(x: &'a str) -> Ref<'a> {
+        Ref::Named(x)
+    }
+}
+
+impl From<usize> for Ref<'static> {
+    fn from(x: usize) -> Ref<'static> {
+        Ref::Number(x)
+    }
+}
+
+/// Parses a possible reference to a capture group name in the given text,
+/// starting at the beginning of `replacement`.
+///
+/// If no such valid reference could be found, None is returned.
+///
+/// Note that this returns a "possible" reference because this routine doesn't
+/// know whether the reference is to a valid group or not. If it winds up not
+/// being a valid reference, then it should be replaced with the empty string.
+fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
+    let mut i = 0;
+    let rep: &[u8] = replacement;
+    if rep.len() <= 1 || rep[0] != b'$' {
+        return None;
+    }
+    i += 1;
+    if rep[i] == b'{' {
+        return find_cap_ref_braced(rep, i + 1);
+    }
+    let mut cap_end = i;
+    while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) {
+        cap_end += 1;
+    }
+    if cap_end == i {
+        return None;
+    }
+    // We just verified that the range 0..cap_end is valid ASCII, so it must
+    // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
+    // check via an unchecked conversion or by parsing the number straight from
+    // &[u8].
+    let cap = core::str::from_utf8(&rep[i..cap_end])
+        .expect("valid UTF-8 capture name");
+    Some(CaptureRef {
+        cap: match cap.parse::<usize>() {
+            Ok(i) => Ref::Number(i),
+            Err(_) => Ref::Named(cap),
+        },
+        end: cap_end,
+    })
+}
+
+/// Looks for a braced reference, e.g., `${foo1}`. This assumes that an opening
+/// brace has been found at `i-1` in `rep`. This then looks for a closing
+/// brace and returns the capture reference within the brace.
+fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> {
+    assert_eq!(b'{', rep[i.checked_sub(1).unwrap()]);
+    let start = i;
+    while rep.get(i).map_or(false, |&b| b != b'}') {
+        i += 1;
+    }
+    if !rep.get(i).map_or(false, |&b| b == b'}') {
+        return None;
+    }
+    // When looking at braced names, we don't put any restrictions on the name,
+    // so it's possible it could be invalid UTF-8. But a capture group name
+    // can never be invalid UTF-8, so if we have invalid UTF-8, then we can
+    // safely return None.
+    let cap = match core::str::from_utf8(&rep[start..i]) {
+        Err(_) => return None,
+        Ok(cap) => cap,
+    };
+    Some(CaptureRef {
+        cap: match cap.parse::<usize>() {
+            Ok(i) => Ref::Number(i),
+            Err(_) => Ref::Named(cap),
+        },
+        end: i + 1,
+    })
+}
+
+/// Returns true if and only if the given byte is allowed in a capture name
+/// written in non-brace form.
+fn is_valid_cap_letter(b: u8) -> bool {
+    match b {
+        b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
+        _ => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use alloc::{string::String, vec, vec::Vec};
+
+    use super::{find_cap_ref, CaptureRef};
+
+    macro_rules! find {
+        ($name:ident, $text:expr) => {
+            #[test]
+            fn $name() {
+                assert_eq!(None, find_cap_ref($text.as_bytes()));
+            }
+        };
+        ($name:ident, $text:expr, $capref:expr) => {
+            #[test]
+            fn $name() {
+                assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
+            }
+        };
+    }
+
+    macro_rules! c {
+        ($name_or_number:expr, $pos:expr) => {
+            CaptureRef { cap: $name_or_number.into(), end: $pos }
+        };
+    }
+
+    find!(find_cap_ref1, "$foo", c!("foo", 4));
+    find!(find_cap_ref2, "${foo}", c!("foo", 6));
+    find!(find_cap_ref3, "$0", c!(0, 2));
+    find!(find_cap_ref4, "$5", c!(5, 2));
+    find!(find_cap_ref5, "$10", c!(10, 3));
+    // See https://github.com/rust-lang/regex/pull/585
+    // for more on characters following numbers
+    find!(find_cap_ref6, "$42a", c!("42a", 4));
+    find!(find_cap_ref7, "${42}a", c!(42, 5));
+    find!(find_cap_ref8, "${42");
+    find!(find_cap_ref9, "${42 ");
+    find!(find_cap_ref10, " $0 ");
+    find!(find_cap_ref11, "$");
+    find!(find_cap_ref12, " ");
+    find!(find_cap_ref13, "");
+    find!(find_cap_ref14, "$1-$2", c!(1, 2));
+    find!(find_cap_ref15, "$1_$2", c!("1_", 3));
+    find!(find_cap_ref16, "$x-$y", c!("x", 2));
+    find!(find_cap_ref17, "$x_$y", c!("x_", 3));
+    find!(find_cap_ref18, "${#}", c!("#", 4));
+    find!(find_cap_ref19, "${Z[}", c!("Z[", 5));
+    find!(find_cap_ref20, "${¾}", c!("¾", 5));
+    find!(find_cap_ref21, "${¾a}", c!("¾a", 6));
+    find!(find_cap_ref22, "${a¾}", c!("a¾", 6));
+    find!(find_cap_ref23, "${☃}", c!("☃", 6));
+    find!(find_cap_ref24, "${a☃}", c!("a☃", 7));
+    find!(find_cap_ref25, "${☃a}", c!("☃a", 7));
+    find!(find_cap_ref26, "${名字}", c!("名字", 9));
+
+    fn interpolate_string(
+        mut name_to_index: Vec<(&'static str, usize)>,
+        caps: Vec<&'static str>,
+        replacement: &str,
+    ) -> String {
+        name_to_index.sort_by_key(|x| x.0);
+
+        let mut dst = String::new();
+        super::string(
+            replacement,
+            |i, dst| {
+                if let Some(&s) = caps.get(i) {
+                    dst.push_str(s);
+                }
+            },
+            |name| -> Option<usize> {
+                name_to_index
+                    .binary_search_by_key(&name, |x| x.0)
+                    .ok()
+                    .map(|i| name_to_index[i].1)
+            },
+            &mut dst,
+        );
+        dst
+    }
+
+    fn interpolate_bytes(
+        mut name_to_index: Vec<(&'static str, usize)>,
+        caps: Vec<&'static str>,
+        replacement: &str,
+    ) -> String {
+        name_to_index.sort_by_key(|x| x.0);
+
+        let mut dst = vec![];
+        super::bytes(
+            replacement.as_bytes(),
+            |i, dst| {
+                if let Some(&s) = caps.get(i) {
+                    dst.extend_from_slice(s.as_bytes());
+                }
+            },
+            |name| -> Option<usize> {
+                name_to_index
+                    .binary_search_by_key(&name, |x| x.0)
+                    .ok()
+                    .map(|i| name_to_index[i].1)
+            },
+            &mut dst,
+        );
+        String::from_utf8(dst).unwrap()
+    }
+
+    macro_rules! interp {
+        ($name:ident, $map:expr, $caps:expr, $hay:expr, $expected:expr $(,)*) => {
+            #[test]
+            fn $name() {
+                assert_eq!(
+                    $expected,
+                    interpolate_string($map, $caps, $hay),
+                    "interpolate::string failed",
+                );
+                assert_eq!(
+                    $expected,
+                    interpolate_bytes($map, $caps, $hay),
+                    "interpolate::bytes failed",
+                );
+            }
+        };
+    }
+
+    interp!(
+        interp1,
+        vec![("foo", 2)],
+        vec!["", "", "xxx"],
+        "test $foo test",
+        "test xxx test",
+    );
+
+    interp!(
+        interp2,
+        vec![("foo", 2)],
+        vec!["", "", "xxx"],
+        "test$footest",
+        "test",
+    );
+
+    interp!(
+        interp3,
+        vec![("foo", 2)],
+        vec!["", "", "xxx"],
+        "test${foo}test",
+        "testxxxtest",
+    );
+
+    interp!(
+        interp4,
+        vec![("foo", 2)],
+        vec!["", "", "xxx"],
+        "test$2test",
+        "test",
+    );
+
+    interp!(
+        interp5,
+        vec![("foo", 2)],
+        vec!["", "", "xxx"],
+        "test${2}test",
+        "testxxxtest",
+    );
+
+    interp!(
+        interp6,
+        vec![("foo", 2)],
+        vec!["", "", "xxx"],
+        "test $$foo test",
+        "test $foo test",
+    );
+
+    interp!(
+        interp7,
+        vec![("foo", 2)],
+        vec!["", "", "xxx"],
+        "test $foo",
+        "test xxx",
+    );
+
+    interp!(
+        interp8,
+        vec![("foo", 2)],
+        vec!["", "", "xxx"],
+        "$foo test",
+        "xxx test",
+    );
+
+    interp!(
+        interp9,
+        vec![("bar", 1), ("foo", 2)],
+        vec!["", "yyy", "xxx"],
+        "test $bar$foo",
+        "test yyyxxx",
+    );
+
+    interp!(
+        interp10,
+        vec![("bar", 1), ("foo", 2)],
+        vec!["", "yyy", "xxx"],
+        "test $ test",
+        "test $ test",
+    );
+
+    interp!(
+        interp11,
+        vec![("bar", 1), ("foo", 2)],
+        vec!["", "yyy", "xxx"],
+        "test ${} test",
+        "test  test",
+    );
+
+    interp!(
+        interp12,
+        vec![("bar", 1), ("foo", 2)],
+        vec!["", "yyy", "xxx"],
+        "test ${ } test",
+        "test  test",
+    );
+
+    interp!(
+        interp13,
+        vec![("bar", 1), ("foo", 2)],
+        vec!["", "yyy", "xxx"],
+        "test ${a b} test",
+        "test  test",
+    );
+
+    interp!(
+        interp14,
+        vec![("bar", 1), ("foo", 2)],
+        vec!["", "yyy", "xxx"],
+        "test ${a} test",
+        "test  test",
+    );
+
+    // This is a funny case where a braced reference is never closed, but
+    // within the unclosed braced reference, there is an unbraced reference.
+    // In this case, the braced reference is just treated literally and the
+    // unbraced reference is found.
+    interp!(
+        interp15,
+        vec![("bar", 1), ("foo", 2)],
+        vec!["", "yyy", "xxx"],
+        "test ${wat $bar ok",
+        "test ${wat yyy ok",
+    );
+}
diff --git a/vendor/regex-automata/src/util/iter.rs b/vendor/regex-automata/src/util/iter.rs
new file mode 100644
index 0000000..a789fa0
--- /dev/null
+++ b/vendor/regex-automata/src/util/iter.rs
@@ -0,0 +1,1027 @@
+/*!
+Generic helpers for iteration of matches from a regex engine in a haystack.
+
+The principle type in this module is a [`Searcher`]. A `Searcher` provides
+its own lower level iterator-like API in addition to methods for constructing
+types that implement `Iterator`. The documentation for `Searcher` explains a
+bit more about why these different APIs exist.
+
+Currently, this module supports iteration over any regex engine that works
+with the [`HalfMatch`], [`Match`] or [`Captures`] types.
+*/
+
+#[cfg(feature = "alloc")]
+use crate::util::captures::Captures;
+use crate::util::search::{HalfMatch, Input, Match, MatchError};
+
+/// A searcher for creating iterators and performing lower level iteration.
+///
+/// This searcher encapsulates the logic required for finding all successive
+/// non-overlapping matches in a haystack. In theory, iteration would look
+/// something like this:
+///
+/// 1. Setting the start position to `0`.
+/// 2. Execute a regex search. If no match, end iteration.
+/// 3. Report the match and set the start position to the end of the match.
+/// 4. Go back to (2).
+///
+/// And if this were indeed the case, it's likely that `Searcher` wouldn't
+/// exist. Unfortunately, because a regex may match the empty string, the above
+/// logic won't work for all possible regexes. Namely, if an empty match is
+/// found, then step (3) would set the start position of the search to the
+/// position it was at. Thus, iteration would never end.
+///
+/// Instead, a `Searcher` knows how to detect these cases and forcefully
+/// advance iteration in the case of an empty match that overlaps with a
+/// previous match.
+///
+/// If you know that your regex cannot match any empty string, then the simple
+/// algorithm described above will work correctly.
+///
+/// When possible, prefer the iterators defined on the regex engine you're
+/// using. This tries to abstract over the regex engine and is thus a bit more
+/// unwieldy to use.
+///
+/// In particular, a `Searcher` is not itself an iterator. Instead, it provides
+/// `advance` routines that permit moving the search along explicitly. It also
+/// provides various routines, like [`Searcher::into_matches_iter`], that
+/// accept a closure (representing how a regex engine executes a search) and
+/// returns a conventional iterator.
+///
+/// The lifetime parameters come from the [`Input`] type passed to
+/// [`Searcher::new`]:
+///
+/// * `'h` is the lifetime of the underlying haystack.
+///
+/// # Searcher vs Iterator
+///
+/// Why does a search type with "advance" APIs exist at all when we also have
+/// iterators? Unfortunately, the reasoning behind this split is a complex
+/// combination of the following things:
+///
+/// 1. While many of the regex engines expose their own iterators, it is also
+/// nice to expose this lower level iteration helper because it permits callers
+/// to provide their own `Input` configuration. Moreover, a `Searcher` can work
+/// with _any_ regex engine instead of only the ones defined in this crate.
+/// This way, everyone benefits from a shared iteration implementation.
+/// 2. There are many different regex engines that, while they have the same
+/// match semantics, they have slightly different APIs. Iteration is just
+/// complex enough to want to share code, and so we need a way of abstracting
+/// over those different regex engines. While we could define a new trait that
+/// describes any regex engine search API, it would wind up looking very close
+/// to a closure. While there may still be reasons for the more generic trait
+/// to exist, for now and for the purposes of iteration, we use a closure.
+/// Closures also provide a lot of easy flexibility at the call site, in that
+/// they permit the caller to borrow any kind of state they want for use during
+/// each search call.
+/// 3. As a result of using closures, and because closures are anonymous types
+/// that cannot be named, it is difficult to encapsulate them without both
+/// costs to speed and added complexity to the public API. For example, in
+/// defining an iterator type like
+/// [`dfa::regex::FindMatches`](crate::dfa::regex::FindMatches),
+/// if we use a closure internally, it's not possible to name this type in the
+/// return type of the iterator constructor. Thus, the only way around it is
+/// to erase the type by boxing it and turning it into a `Box<dyn FnMut ...>`.
+/// This boxed closure is unlikely to be inlined _and_ it infects the public
+/// API in subtle ways. Namely, unless you declare the closure as implementing
+/// `Send` and `Sync`, then the resulting iterator type won't implement it
+/// either. But there are practical issues with requiring the closure to
+/// implement `Send` and `Sync` that result in other API complexities that
+/// are beyond the scope of this already long exposition.
+/// 4. Some regex engines expose more complex match information than just
+/// "which pattern matched" and "at what offsets." For example, the PikeVM
+/// exposes match spans for each capturing group that participated in the
+/// match. In such cases, it can be quite beneficial to reuse the capturing
+/// group allocation on subsequent searches. A proper iterator doesn't permit
+/// this API due to its interface, so it's useful to have something a bit lower
+/// level that permits callers to amortize allocations while also reusing a
+/// shared implementation of iteration. (See the documentation for
+/// [`Searcher::advance`] for an example of using the "advance" API with the
+/// PikeVM.)
+///
+/// What this boils down to is that there are "advance" APIs which require
+/// handing a closure to it for every call, and there are also APIs to create
+/// iterators from a closure. The former are useful for _implementing_
+/// iterators or when you need more flexibility, while the latter are useful
+/// for conveniently writing custom iterators on-the-fly.
+///
+/// # Example: iterating with captures
+///
+/// Several regex engines in this crate over convenient iterator APIs over
+/// [`Captures`] values. To do so, this requires allocating a new `Captures`
+/// value for each iteration step. This can perhaps be more costly than you
+/// might want. Instead of implementing your own iterator to avoid that
+/// cost (which can be a little subtle if you want to handle empty matches
+/// correctly), you can use this `Searcher` to do it for you:
+///
+/// ```
+/// use regex_automata::{
+///     nfa::thompson::pikevm::PikeVM,
+///     util::iter::Searcher,
+///     Input, Span,
+/// };
+///
+/// let re = PikeVM::new("foo(?P<numbers>[0-9]+)")?;
+/// let haystack = "foo1 foo12 foo123";
+///
+/// let mut caps = re.create_captures();
+/// let mut cache = re.create_cache();
+/// let mut matches = vec![];
+/// let mut searcher = Searcher::new(Input::new(haystack));
+/// while let Some(_) = searcher.advance(|input| {
+///     re.search(&mut cache, input, &mut caps);
+///     Ok(caps.get_match())
+/// }) {
+///     // The unwrap is OK since 'numbers' matches if the pattern matches.
+///     matches.push(caps.get_group_by_name("numbers").unwrap());
+/// }
+/// assert_eq!(matches, vec![
+///     Span::from(3..4),
+///     Span::from(8..10),
+///     Span::from(14..17),
+/// ]);
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Debug)]
+pub struct Searcher<'h> {
+    /// The input parameters to give to each regex engine call.
+    ///
+    /// The start position of the search is mutated during iteration.
+    input: Input<'h>,
+    /// Records the end offset of the most recent match. This is necessary to
+    /// handle a corner case for preventing empty matches from overlapping with
+    /// the ending bounds of a prior match.
+    last_match_end: Option<usize>,
+}
+
+impl<'h> Searcher<'h> {
+    /// Create a new fallible non-overlapping matches iterator.
+    ///
+    /// The given `input` provides the parameters (including the haystack),
+    /// while the `finder` represents a closure that calls the underlying regex
+    /// engine. The closure may borrow any additional state that is needed,
+    /// such as a prefilter scanner.
+    pub fn new(input: Input<'h>) -> Searcher<'h> {
+        Searcher { input, last_match_end: None }
+    }
+
+    /// Returns the current `Input` used by this searcher.
+    ///
+    /// The `Input` returned is generally equivalent to the one given to
+    /// [`Searcher::new`], but its start position may be different to reflect
+    /// the start of the next search to be executed.
+    pub fn input<'s>(&'s self) -> &'s Input<'h> {
+        &self.input
+    }
+
+    /// Return the next half match for an infallible search if one exists, and
+    /// advance to the next position.
+    ///
+    /// This is like `try_advance_half`, except errors are converted into
+    /// panics.
+    ///
+    /// # Panics
+    ///
+    /// If the given closure returns an error, then this panics. This is useful
+    /// when you know your underlying regex engine has been configured to not
+    /// return an error.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to use a `Searcher` to iterate over all matches
+    /// when using a DFA, which only provides "half" matches.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     util::iter::Searcher,
+    ///     HalfMatch, Input,
+    /// };
+    ///
+    /// let re = DFA::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22");
+    /// let mut it = Searcher::new(input);
+    ///
+    /// let expected = Some(HalfMatch::must(0, 10));
+    /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = Some(HalfMatch::must(0, 21));
+    /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = Some(HalfMatch::must(0, 32));
+    /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = None;
+    /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// This correctly moves iteration forward even when an empty match occurs:
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     util::iter::Searcher,
+    ///     HalfMatch, Input,
+    /// };
+    ///
+    /// let re = DFA::new(r"a|")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let input = Input::new("abba");
+    /// let mut it = Searcher::new(input);
+    ///
+    /// let expected = Some(HalfMatch::must(0, 1));
+    /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = Some(HalfMatch::must(0, 2));
+    /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = Some(HalfMatch::must(0, 4));
+    /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = None;
+    /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn advance_half<F>(&mut self, finder: F) -> Option<HalfMatch>
+    where
+        F: FnMut(&Input<'_>) -> Result<Option<HalfMatch>, MatchError>,
+    {
+        match self.try_advance_half(finder) {
+            Ok(m) => m,
+            Err(err) => panic!(
+                "unexpected regex half find error: {}\n\
+                 to handle find errors, use 'try' or 'search' methods",
+                err,
+            ),
+        }
+    }
+
+    /// Return the next match for an infallible search if one exists, and
+    /// advance to the next position.
+    ///
+    /// The search is advanced even in the presence of empty matches by
+    /// forbidding empty matches from overlapping with any other match.
+    ///
+    /// This is like `try_advance`, except errors are converted into panics.
+    ///
+    /// # Panics
+    ///
+    /// If the given closure returns an error, then this panics. This is useful
+    /// when you know your underlying regex engine has been configured to not
+    /// return an error.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to use a `Searcher` to iterate over all matches
+    /// when using a regex based on lazy DFAs:
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     hybrid::regex::Regex,
+    ///     util::iter::Searcher,
+    ///     Match, Input,
+    /// };
+    ///
+    /// let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22");
+    /// let mut it = Searcher::new(input);
+    ///
+    /// let expected = Some(Match::must(0, 0..10));
+    /// let got = it.advance(|input| re.try_search(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = Some(Match::must(0, 11..21));
+    /// let got = it.advance(|input| re.try_search(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = Some(Match::must(0, 22..32));
+    /// let got = it.advance(|input| re.try_search(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = None;
+    /// let got = it.advance(|input| re.try_search(&mut cache, input));
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// This example shows the same as above, but with the PikeVM. This example
+    /// is useful because it shows how to use this API even when the regex
+    /// engine doesn't directly return a `Match`.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     util::iter::Searcher,
+    ///     Match, Input,
+    /// };
+    ///
+    /// let re = PikeVM::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22");
+    /// let mut it = Searcher::new(input);
+    ///
+    /// let expected = Some(Match::must(0, 0..10));
+    /// let got = it.advance(|input| {
+    ///     re.search(&mut cache, input, &mut caps);
+    ///     Ok(caps.get_match())
+    /// });
+    /// // Note that if we wanted to extract capturing group spans, we could
+    /// // do that here with 'caps'.
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = Some(Match::must(0, 11..21));
+    /// let got = it.advance(|input| {
+    ///     re.search(&mut cache, input, &mut caps);
+    ///     Ok(caps.get_match())
+    /// });
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = Some(Match::must(0, 22..32));
+    /// let got = it.advance(|input| {
+    ///     re.search(&mut cache, input, &mut caps);
+    ///     Ok(caps.get_match())
+    /// });
+    /// assert_eq!(expected, got);
+    ///
+    /// let expected = None;
+    /// let got = it.advance(|input| {
+    ///     re.search(&mut cache, input, &mut caps);
+    ///     Ok(caps.get_match())
+    /// });
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn advance<F>(&mut self, finder: F) -> Option<Match>
+    where
+        F: FnMut(&Input<'_>) -> Result<Option<Match>, MatchError>,
+    {
+        match self.try_advance(finder) {
+            Ok(m) => m,
+            Err(err) => panic!(
+                "unexpected regex find error: {}\n\
+                 to handle find errors, use 'try' or 'search' methods",
+                err,
+            ),
+        }
+    }
+
+    /// Return the next half match for a fallible search if one exists, and
+    /// advance to the next position.
+    ///
+    /// This is like `advance_half`, except it permits callers to handle errors
+    /// during iteration.
+    #[inline]
+    pub fn try_advance_half<F>(
+        &mut self,
+        mut finder: F,
+    ) -> Result<Option<HalfMatch>, MatchError>
+    where
+        F: FnMut(&Input<'_>) -> Result<Option<HalfMatch>, MatchError>,
+    {
+        let mut m = match finder(&self.input)? {
+            None => return Ok(None),
+            Some(m) => m,
+        };
+        if Some(m.offset()) == self.last_match_end {
+            m = match self.handle_overlapping_empty_half_match(m, finder)? {
+                None => return Ok(None),
+                Some(m) => m,
+            };
+        }
+        self.input.set_start(m.offset());
+        self.last_match_end = Some(m.offset());
+        Ok(Some(m))
+    }
+
+    /// Return the next match for a fallible search if one exists, and advance
+    /// to the next position.
+    ///
+    /// This is like `advance`, except it permits callers to handle errors
+    /// during iteration.
+    #[inline]
+    pub fn try_advance<F>(
+        &mut self,
+        mut finder: F,
+    ) -> Result<Option<Match>, MatchError>
+    where
+        F: FnMut(&Input<'_>) -> Result<Option<Match>, MatchError>,
+    {
+        let mut m = match finder(&self.input)? {
+            None => return Ok(None),
+            Some(m) => m,
+        };
+        if m.is_empty() && Some(m.end()) == self.last_match_end {
+            m = match self.handle_overlapping_empty_match(m, finder)? {
+                None => return Ok(None),
+                Some(m) => m,
+            };
+        }
+        self.input.set_start(m.end());
+        self.last_match_end = Some(m.end());
+        Ok(Some(m))
+    }
+
+    /// Given a closure that executes a single search, return an iterator over
+    /// all successive non-overlapping half matches.
+    ///
+    /// The iterator returned yields result values. If the underlying regex
+    /// engine is configured to never return an error, consider calling
+    /// [`TryHalfMatchesIter::infallible`] to convert errors into panics.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to use a `Searcher` to create a proper
+    /// iterator over half matches.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     util::iter::Searcher,
+    ///     HalfMatch, Input,
+    /// };
+    ///
+    /// let re = DFA::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22");
+    /// let mut it = Searcher::new(input).into_half_matches_iter(|input| {
+    ///     re.try_search_fwd(&mut cache, input)
+    /// });
+    ///
+    /// let expected = Some(Ok(HalfMatch::must(0, 10)));
+    /// assert_eq!(expected, it.next());
+    ///
+    /// let expected = Some(Ok(HalfMatch::must(0, 21)));
+    /// assert_eq!(expected, it.next());
+    ///
+    /// let expected = Some(Ok(HalfMatch::must(0, 32)));
+    /// assert_eq!(expected, it.next());
+    ///
+    /// let expected = None;
+    /// assert_eq!(expected, it.next());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn into_half_matches_iter<F>(
+        self,
+        finder: F,
+    ) -> TryHalfMatchesIter<'h, F>
+    where
+        F: FnMut(&Input<'_>) -> Result<Option<HalfMatch>, MatchError>,
+    {
+        TryHalfMatchesIter { it: self, finder }
+    }
+
+    /// Given a closure that executes a single search, return an iterator over
+    /// all successive non-overlapping matches.
+    ///
+    /// The iterator returned yields result values. If the underlying regex
+    /// engine is configured to never return an error, consider calling
+    /// [`TryMatchesIter::infallible`] to convert errors into panics.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to use a `Searcher` to create a proper
+    /// iterator over matches.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     hybrid::regex::Regex,
+    ///     util::iter::Searcher,
+    ///     Match, Input,
+    /// };
+    ///
+    /// let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22");
+    /// let mut it = Searcher::new(input).into_matches_iter(|input| {
+    ///     re.try_search(&mut cache, input)
+    /// });
+    ///
+    /// let expected = Some(Ok(Match::must(0, 0..10)));
+    /// assert_eq!(expected, it.next());
+    ///
+    /// let expected = Some(Ok(Match::must(0, 11..21)));
+    /// assert_eq!(expected, it.next());
+    ///
+    /// let expected = Some(Ok(Match::must(0, 22..32)));
+    /// assert_eq!(expected, it.next());
+    ///
+    /// let expected = None;
+    /// assert_eq!(expected, it.next());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn into_matches_iter<F>(self, finder: F) -> TryMatchesIter<'h, F>
+    where
+        F: FnMut(&Input<'_>) -> Result<Option<Match>, MatchError>,
+    {
+        TryMatchesIter { it: self, finder }
+    }
+
+    /// Given a closure that executes a single search, return an iterator over
+    /// all successive non-overlapping `Captures` values.
+    ///
+    /// The iterator returned yields result values. If the underlying regex
+    /// engine is configured to never return an error, consider calling
+    /// [`TryCapturesIter::infallible`] to convert errors into panics.
+    ///
+    /// Unlike the other iterator constructors, this accepts an initial
+    /// `Captures` value. This `Captures` value is reused for each search, and
+    /// the iterator implementation clones it before returning it. The caller
+    /// must provide this value because the iterator is purposely ignorant
+    /// of the underlying regex engine and thus doesn't know how to create
+    /// one itself. More to the point, a `Captures` value itself has a few
+    /// different constructors, which change which kind of information is
+    /// available to query in exchange for search performance.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to use a `Searcher` to create a proper iterator
+    /// over `Captures` values, which provides access to all capturing group
+    /// spans for each match.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     util::iter::Searcher,
+    ///     Input,
+    /// };
+    ///
+    /// let re = PikeVM::new(
+    ///     r"(?P<y>[0-9]{4})-(?P<m>[0-9]{2})-(?P<d>[0-9]{2})",
+    /// )?;
+    /// let (mut cache, caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// let haystack = "2010-03-14 2016-10-08 2020-10-22";
+    /// let input = Input::new(haystack);
+    /// let mut it = Searcher::new(input)
+    ///     .into_captures_iter(caps, |input, caps| {
+    ///         re.search(&mut cache, input, caps);
+    ///         Ok(())
+    ///     });
+    ///
+    /// let got = it.next().expect("first date")?;
+    /// let year = got.get_group_by_name("y").expect("must match");
+    /// assert_eq!("2010", &haystack[year]);
+    ///
+    /// let got = it.next().expect("second date")?;
+    /// let month = got.get_group_by_name("m").expect("must match");
+    /// assert_eq!("10", &haystack[month]);
+    ///
+    /// let got = it.next().expect("third date")?;
+    /// let day = got.get_group_by_name("d").expect("must match");
+    /// assert_eq!("22", &haystack[day]);
+    ///
+    /// assert!(it.next().is_none());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "alloc")]
+    #[inline]
+    pub fn into_captures_iter<F>(
+        self,
+        caps: Captures,
+        finder: F,
+    ) -> TryCapturesIter<'h, F>
+    where
+        F: FnMut(&Input<'_>, &mut Captures) -> Result<(), MatchError>,
+    {
+        TryCapturesIter { it: self, caps, finder }
+    }
+
+    /// Handles the special case of a match that begins where the previous
+    /// match ended. Without this special handling, it'd be possible to get
+    /// stuck where an empty match never results in forward progress. This
+    /// also makes it more consistent with how presiding general purpose regex
+    /// engines work.
+    #[cold]
+    #[inline(never)]
+    fn handle_overlapping_empty_half_match<F>(
+        &mut self,
+        _: HalfMatch,
+        mut finder: F,
+    ) -> Result<Option<HalfMatch>, MatchError>
+    where
+        F: FnMut(&Input<'_>) -> Result<Option<HalfMatch>, MatchError>,
+    {
+        // Since we are only here when 'm.offset()' matches the offset of the
+        // last match, it follows that this must have been an empty match.
+        // Since we both need to make progress *and* prevent overlapping
+        // matches, we discard this match and advance the search by 1.
+        //
+        // Note that this may start a search in the middle of a codepoint. The
+        // regex engines themselves are expected to deal with that and not
+        // report any matches within a codepoint if they are configured in
+        // UTF-8 mode.
+        self.input.set_start(self.input.start().checked_add(1).unwrap());
+        finder(&self.input)
+    }
+
+    /// Handles the special case of an empty match by ensuring that 1) the
+    /// iterator always advances and 2) empty matches never overlap with other
+    /// matches.
+    ///
+    /// (1) is necessary because we principally make progress by setting the
+    /// starting location of the next search to the ending location of the last
+    /// match. But if a match is empty, then this results in a search that does
+    /// not advance and thus does not terminate.
+    ///
+    /// (2) is not strictly necessary, but makes intuitive sense and matches
+    /// the presiding behavior of most general purpose regex engines. The
+    /// "intuitive sense" here is that we want to report NON-overlapping
+    /// matches. So for example, given the regex 'a|(?:)' against the haystack
+    /// 'a', without the special handling, you'd get the matches [0, 1) and [1,
+    /// 1), where the latter overlaps with the end bounds of the former.
+    ///
+    /// Note that we mark this cold and forcefully prevent inlining because
+    /// handling empty matches like this is extremely rare and does require
+    /// quite a bit of code, comparatively. Keeping this code out of the main
+    /// iterator function keeps it smaller and more amenable to inlining
+    /// itself.
+    #[cold]
+    #[inline(never)]
+    fn handle_overlapping_empty_match<F>(
+        &mut self,
+        m: Match,
+        mut finder: F,
+    ) -> Result<Option<Match>, MatchError>
+    where
+        F: FnMut(&Input<'_>) -> Result<Option<Match>, MatchError>,
+    {
+        assert!(m.is_empty());
+        self.input.set_start(self.input.start().checked_add(1).unwrap());
+        finder(&self.input)
+    }
+}
+
+/// An iterator over all non-overlapping half matches for a fallible search.
+///
+/// The iterator yields a `Result<HalfMatch, MatchError>` value until no more
+/// matches could be found.
+///
+/// The type parameters are as follows:
+///
+/// * `F` represents the type of a closure that executes the search.
+///
+/// The lifetime parameters come from the [`Input`] type:
+///
+/// * `'h` is the lifetime of the underlying haystack.
+///
+/// When possible, prefer the iterators defined on the regex engine you're
+/// using. This tries to abstract over the regex engine and is thus a bit more
+/// unwieldy to use.
+///
+/// This iterator is created by [`Searcher::into_half_matches_iter`].
+pub struct TryHalfMatchesIter<'h, F> {
+    it: Searcher<'h>,
+    finder: F,
+}
+
+impl<'h, F> TryHalfMatchesIter<'h, F> {
+    /// Return an infallible version of this iterator.
+    ///
+    /// Any item yielded that corresponds to an error results in a panic. This
+    /// is useful if your underlying regex engine is configured in a way that
+    /// it is guaranteed to never return an error.
+    pub fn infallible(self) -> HalfMatchesIter<'h, F> {
+        HalfMatchesIter(self)
+    }
+
+    /// Returns the current `Input` used by this iterator.
+    ///
+    /// The `Input` returned is generally equivalent to the one used to
+    /// construct this iterator, but its start position may be different to
+    /// reflect the start of the next search to be executed.
+    pub fn input<'i>(&'i self) -> &'i Input<'h> {
+        self.it.input()
+    }
+}
+
+impl<'h, F> Iterator for TryHalfMatchesIter<'h, F>
+where
+    F: FnMut(&Input<'_>) -> Result<Option<HalfMatch>, MatchError>,
+{
+    type Item = Result<HalfMatch, MatchError>;
+
+    #[inline]
+    fn next(&mut self) -> Option<Result<HalfMatch, MatchError>> {
+        self.it.try_advance_half(&mut self.finder).transpose()
+    }
+}
+
+impl<'h, F> core::fmt::Debug for TryHalfMatchesIter<'h, F> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("TryHalfMatchesIter")
+            .field("it", &self.it)
+            .field("finder", &"<closure>")
+            .finish()
+    }
+}
+
+/// An iterator over all non-overlapping half matches for an infallible search.
+///
+/// The iterator yields a [`HalfMatch`] value until no more matches could be
+/// found.
+///
+/// The type parameters are as follows:
+///
+/// * `F` represents the type of a closure that executes the search.
+///
+/// The lifetime parameters come from the [`Input`] type:
+///
+/// * `'h` is the lifetime of the underlying haystack.
+///
+/// When possible, prefer the iterators defined on the regex engine you're
+/// using. This tries to abstract over the regex engine and is thus a bit more
+/// unwieldy to use.
+///
+/// This iterator is created by [`Searcher::into_half_matches_iter`] and
+/// then calling [`TryHalfMatchesIter::infallible`].
+#[derive(Debug)]
+pub struct HalfMatchesIter<'h, F>(TryHalfMatchesIter<'h, F>);
+
+impl<'h, F> HalfMatchesIter<'h, F> {
+    /// Returns the current `Input` used by this iterator.
+    ///
+    /// The `Input` returned is generally equivalent to the one used to
+    /// construct this iterator, but its start position may be different to
+    /// reflect the start of the next search to be executed.
+    pub fn input<'i>(&'i self) -> &'i Input<'h> {
+        self.0.it.input()
+    }
+}
+
+impl<'h, F> Iterator for HalfMatchesIter<'h, F>
+where
+    F: FnMut(&Input<'_>) -> Result<Option<HalfMatch>, MatchError>,
+{
+    type Item = HalfMatch;
+
+    #[inline]
+    fn next(&mut self) -> Option<HalfMatch> {
+        match self.0.next()? {
+            Ok(m) => Some(m),
+            Err(err) => panic!(
+                "unexpected regex half find error: {}\n\
+                 to handle find errors, use 'try' or 'search' methods",
+                err,
+            ),
+        }
+    }
+}
+
+/// An iterator over all non-overlapping matches for a fallible search.
+///
+/// The iterator yields a `Result<Match, MatchError>` value until no more
+/// matches could be found.
+///
+/// The type parameters are as follows:
+///
+/// * `F` represents the type of a closure that executes the search.
+///
+/// The lifetime parameters come from the [`Input`] type:
+///
+/// * `'h` is the lifetime of the underlying haystack.
+///
+/// When possible, prefer the iterators defined on the regex engine you're
+/// using. This tries to abstract over the regex engine and is thus a bit more
+/// unwieldy to use.
+///
+/// This iterator is created by [`Searcher::into_matches_iter`].
+pub struct TryMatchesIter<'h, F> {
+    it: Searcher<'h>,
+    finder: F,
+}
+
+impl<'h, F> TryMatchesIter<'h, F> {
+    /// Return an infallible version of this iterator.
+    ///
+    /// Any item yielded that corresponds to an error results in a panic. This
+    /// is useful if your underlying regex engine is configured in a way that
+    /// it is guaranteed to never return an error.
+    pub fn infallible(self) -> MatchesIter<'h, F> {
+        MatchesIter(self)
+    }
+
+    /// Returns the current `Input` used by this iterator.
+    ///
+    /// The `Input` returned is generally equivalent to the one used to
+    /// construct this iterator, but its start position may be different to
+    /// reflect the start of the next search to be executed.
+    pub fn input<'i>(&'i self) -> &'i Input<'h> {
+        self.it.input()
+    }
+}
+
+impl<'h, F> Iterator for TryMatchesIter<'h, F>
+where
+    F: FnMut(&Input<'_>) -> Result<Option<Match>, MatchError>,
+{
+    type Item = Result<Match, MatchError>;
+
+    #[inline]
+    fn next(&mut self) -> Option<Result<Match, MatchError>> {
+        self.it.try_advance(&mut self.finder).transpose()
+    }
+}
+
+impl<'h, F> core::fmt::Debug for TryMatchesIter<'h, F> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("TryMatchesIter")
+            .field("it", &self.it)
+            .field("finder", &"<closure>")
+            .finish()
+    }
+}
+
+/// An iterator over all non-overlapping matches for an infallible search.
+///
+/// The iterator yields a [`Match`] value until no more matches could be found.
+///
+/// The type parameters are as follows:
+///
+/// * `F` represents the type of a closure that executes the search.
+///
+/// The lifetime parameters come from the [`Input`] type:
+///
+/// * `'h` is the lifetime of the underlying haystack.
+///
+/// When possible, prefer the iterators defined on the regex engine you're
+/// using. This tries to abstract over the regex engine and is thus a bit more
+/// unwieldy to use.
+///
+/// This iterator is created by [`Searcher::into_matches_iter`] and
+/// then calling [`TryMatchesIter::infallible`].
+#[derive(Debug)]
+pub struct MatchesIter<'h, F>(TryMatchesIter<'h, F>);
+
+impl<'h, F> MatchesIter<'h, F> {
+    /// Returns the current `Input` used by this iterator.
+    ///
+    /// The `Input` returned is generally equivalent to the one used to
+    /// construct this iterator, but its start position may be different to
+    /// reflect the start of the next search to be executed.
+    pub fn input<'i>(&'i self) -> &'i Input<'h> {
+        self.0.it.input()
+    }
+}
+
+impl<'h, F> Iterator for MatchesIter<'h, F>
+where
+    F: FnMut(&Input<'_>) -> Result<Option<Match>, MatchError>,
+{
+    type Item = Match;
+
+    #[inline]
+    fn next(&mut self) -> Option<Match> {
+        match self.0.next()? {
+            Ok(m) => Some(m),
+            Err(err) => panic!(
+                "unexpected regex find error: {}\n\
+                 to handle find errors, use 'try' or 'search' methods",
+                err,
+            ),
+        }
+    }
+}
+
+/// An iterator over all non-overlapping captures for a fallible search.
+///
+/// The iterator yields a `Result<Captures, MatchError>` value until no more
+/// matches could be found.
+///
+/// The type parameters are as follows:
+///
+/// * `F` represents the type of a closure that executes the search.
+///
+/// The lifetime parameters come from the [`Input`] type:
+///
+/// * `'h` is the lifetime of the underlying haystack.
+///
+/// When possible, prefer the iterators defined on the regex engine you're
+/// using. This tries to abstract over the regex engine and is thus a bit more
+/// unwieldy to use.
+///
+/// This iterator is created by [`Searcher::into_captures_iter`].
+#[cfg(feature = "alloc")]
+pub struct TryCapturesIter<'h, F> {
+    it: Searcher<'h>,
+    caps: Captures,
+    finder: F,
+}
+
+#[cfg(feature = "alloc")]
+impl<'h, F> TryCapturesIter<'h, F> {
+    /// Return an infallible version of this iterator.
+    ///
+    /// Any item yielded that corresponds to an error results in a panic. This
+    /// is useful if your underlying regex engine is configured in a way that
+    /// it is guaranteed to never return an error.
+    pub fn infallible(self) -> CapturesIter<'h, F> {
+        CapturesIter(self)
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<'h, F> Iterator for TryCapturesIter<'h, F>
+where
+    F: FnMut(&Input<'_>, &mut Captures) -> Result<(), MatchError>,
+{
+    type Item = Result<Captures, MatchError>;
+
+    #[inline]
+    fn next(&mut self) -> Option<Result<Captures, MatchError>> {
+        let TryCapturesIter { ref mut it, ref mut caps, ref mut finder } =
+            *self;
+        let result = it
+            .try_advance(|input| {
+                (finder)(input, caps)?;
+                Ok(caps.get_match())
+            })
+            .transpose()?;
+        match result {
+            Ok(_) => Some(Ok(caps.clone())),
+            Err(err) => Some(Err(err)),
+        }
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<'h, F> core::fmt::Debug for TryCapturesIter<'h, F> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("TryCapturesIter")
+            .field("it", &self.it)
+            .field("caps", &self.caps)
+            .field("finder", &"<closure>")
+            .finish()
+    }
+}
+
+/// An iterator over all non-overlapping captures for an infallible search.
+///
+/// The iterator yields a [`Captures`] value until no more matches could be
+/// found.
+///
+/// The type parameters are as follows:
+///
+/// * `F` represents the type of a closure that executes the search.
+///
+/// The lifetime parameters come from the [`Input`] type:
+///
+/// * `'h` is the lifetime of the underlying haystack.
+///
+/// When possible, prefer the iterators defined on the regex engine you're
+/// using. This tries to abstract over the regex engine and is thus a bit more
+/// unwieldy to use.
+///
+/// This iterator is created by [`Searcher::into_captures_iter`] and then
+/// calling [`TryCapturesIter::infallible`].
+#[cfg(feature = "alloc")]
+#[derive(Debug)]
+pub struct CapturesIter<'h, F>(TryCapturesIter<'h, F>);
+
+#[cfg(feature = "alloc")]
+impl<'h, F> Iterator for CapturesIter<'h, F>
+where
+    F: FnMut(&Input<'_>, &mut Captures) -> Result<(), MatchError>,
+{
+    type Item = Captures;
+
+    #[inline]
+    fn next(&mut self) -> Option<Captures> {
+        match self.0.next()? {
+            Ok(m) => Some(m),
+            Err(err) => panic!(
+                "unexpected regex captures error: {}\n\
+                 to handle find errors, use 'try' or 'search' methods",
+                err,
+            ),
+        }
+    }
+}
diff --git a/vendor/regex-automata/src/util/lazy.rs b/vendor/regex-automata/src/util/lazy.rs
new file mode 100644
index 0000000..0d0b4fb
--- /dev/null
+++ b/vendor/regex-automata/src/util/lazy.rs
@@ -0,0 +1,461 @@
+/*!
+A lazily initialized value for safe sharing between threads.
+
+The principal type in this module is `Lazy`, which makes it easy to construct
+values that are shared safely across multiple threads simultaneously.
+*/
+
+use core::fmt;
+
+/// A lazily initialized value that implements `Deref` for `T`.
+///
+/// A `Lazy` takes an initialization function and permits callers from any
+/// thread to access the result of that initialization function in a safe
+/// manner. In effect, this permits one-time initialization of global resources
+/// in a (possibly) multi-threaded program.
+///
+/// This type and its functionality are available even when neither the `alloc`
+/// nor the `std` features are enabled. In exchange, a `Lazy` does **not**
+/// guarantee that the given `create` function is called at most once. It
+/// might be called multiple times. Moreover, a call to `Lazy::get` (either
+/// explicitly or implicitly via `Lazy`'s `Deref` impl) may block until a `T`
+/// is available.
+///
+/// This is very similar to `lazy_static` or `once_cell`, except it doesn't
+/// guarantee that the initialization function will be run once and it works
+/// in no-alloc no-std environments. With that said, if you need stronger
+/// guarantees or a more flexible API, then it is recommended to use either
+/// `lazy_static` or `once_cell`.
+///
+/// # Warning: may use a spin lock
+///
+/// When this crate is compiled _without_ the `alloc` feature, then this type
+/// may used a spin lock internally. This can have subtle effects that may
+/// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more
+/// thorough treatment of this topic.
+///
+/// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
+///
+/// # Example
+///
+/// This type is useful for creating regexes once, and then using them from
+/// multiple threads simultaneously without worrying about synchronization.
+///
+/// ```
+/// use regex_automata::{dfa::regex::Regex, util::lazy::Lazy, Match};
+///
+/// static RE: Lazy<Regex> = Lazy::new(|| Regex::new("foo[0-9]+bar").unwrap());
+///
+/// let expected = Some(Match::must(0, 3..14));
+/// assert_eq!(expected, RE.find(b"zzzfoo12345barzzz"));
+/// ```
+pub struct Lazy<T, F = fn() -> T>(lazy::Lazy<T, F>);
+
+impl<T, F> Lazy<T, F> {
+    /// Create a new `Lazy` value that is initialized via the given function.
+    ///
+    /// The `T` type is automatically inferred from the return type of the
+    /// `create` function given.
+    pub const fn new(create: F) -> Lazy<T, F> {
+        Lazy(lazy::Lazy::new(create))
+    }
+}
+
+impl<T, F: Fn() -> T> Lazy<T, F> {
+    /// Return a reference to the lazily initialized value.
+    ///
+    /// This routine may block if another thread is initializing a `T`.
+    ///
+    /// Note that given a `x` which has type `Lazy`, this must be called via
+    /// `Lazy::get(x)` and not `x.get()`. This routine is defined this way
+    /// because `Lazy` impls `Deref` with a target of `T`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the `create` function inside this lazy value panics.
+    /// If the panic occurred in another thread, then this routine _may_ also
+    /// panic (but is not guaranteed to do so).
+    pub fn get(this: &Lazy<T, F>) -> &T {
+        this.0.get()
+    }
+}
+
+impl<T, F: Fn() -> T> core::ops::Deref for Lazy<T, F> {
+    type Target = T;
+
+    fn deref(&self) -> &T {
+        Lazy::get(self)
+    }
+}
+
+impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+#[cfg(feature = "alloc")]
+mod lazy {
+    use core::{
+        fmt,
+        marker::PhantomData,
+        sync::atomic::{AtomicPtr, Ordering},
+    };
+
+    use alloc::boxed::Box;
+
+    /// A non-std lazy initialized value.
+    ///
+    /// This might run the initialization function more than once, but will
+    /// never block.
+    ///
+    /// I wish I could get these semantics into the non-alloc non-std Lazy
+    /// type below, but I'm not sure how to do it. If you can do an alloc,
+    /// then the implementation becomes very simple if you don't care about
+    /// redundant work precisely because a pointer can be atomically swapped.
+    ///
+    /// Perhaps making this approach work in the non-alloc non-std case
+    /// requires asking the caller for a pointer? It would make the API less
+    /// convenient I think.
+    pub(super) struct Lazy<T, F> {
+        data: AtomicPtr<T>,
+        create: F,
+        // This indicates to the compiler that this type can drop T. It's not
+        // totally clear how the absence of this marker could lead to trouble,
+        // but putting here doesn't have any downsides so we hedge until somone
+        // can from the Unsafe Working Group can tell us definitively that we
+        // don't need it.
+        //
+        // See: https://github.com/BurntSushi/regex-automata/issues/30
+        owned: PhantomData<Box<T>>,
+    }
+
+    // SAFETY: So long as T and &T (and F and &F) can themselves be safely
+    // shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only
+    // permits accessing a &T and initialization is free of data races. So if T
+    // is thread safe, then so to is Lazy<T, _>.
+    //
+    // We specifically require that T: Send in order for Lazy<T> to be Sync.
+    // Without that requirement, it's possible to send a T from one thread to
+    // another via Lazy's destructor.
+    //
+    // It's not clear whether we need F: Send+Sync for Lazy to be Sync. But
+    // we're conservative for now and keep both.
+    unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {}
+
+    impl<T, F> Lazy<T, F> {
+        /// Create a new alloc but non-std lazy value that is racily
+        /// initialized. That is, the 'create' function may be called more than
+        /// once.
+        pub(super) const fn new(create: F) -> Lazy<T, F> {
+            Lazy {
+                data: AtomicPtr::new(core::ptr::null_mut()),
+                create,
+                owned: PhantomData,
+            }
+        }
+    }
+
+    impl<T, F: Fn() -> T> Lazy<T, F> {
+        /// Get the underlying lazy value. If it hasn't been initialized
+        /// yet, then always attempt to initialize it (even if some other
+        /// thread is initializing it) and atomically attach it to this lazy
+        /// value before returning it.
+        pub(super) fn get(&self) -> &T {
+            if let Some(data) = self.poll() {
+                return data;
+            }
+            let data = (self.create)();
+            let mut ptr = Box::into_raw(Box::new(data));
+            // We attempt to stuff our initialized value into our atomic
+            // pointer. Upon success, we don't need to do anything. But if
+            // someone else beat us to the punch, then we need to make sure
+            // our newly created value is dropped.
+            let result = self.data.compare_exchange(
+                core::ptr::null_mut(),
+                ptr,
+                Ordering::AcqRel,
+                Ordering::Acquire,
+            );
+            if let Err(old) = result {
+                // SAFETY: We created 'ptr' via Box::into_raw above, so turning
+                // it back into a Box via from_raw is safe.
+                drop(unsafe { Box::from_raw(ptr) });
+                ptr = old;
+            }
+            // SAFETY: We just set the pointer above to a non-null value, even
+            // in the error case, and set it to a fully initialized value
+            // returned by 'create'.
+            unsafe { &*ptr }
+        }
+
+        /// If this lazy value has been initialized successfully, then return
+        /// that value. Otherwise return None immediately. This never attempts
+        /// to run initialization itself.
+        fn poll(&self) -> Option<&T> {
+            let ptr = self.data.load(Ordering::Acquire);
+            if ptr.is_null() {
+                return None;
+            }
+            // SAFETY: We just checked that the pointer is not null. Since it's
+            // not null, it must have been fully initialized by 'get' at some
+            // point.
+            Some(unsafe { &*ptr })
+        }
+    }
+
+    impl<T: fmt::Debug, F: Fn() -> T> fmt::Debug for Lazy<T, F> {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            f.debug_struct("Lazy").field("data", &self.poll()).finish()
+        }
+    }
+
+    impl<T, F> Drop for Lazy<T, F> {
+        fn drop(&mut self) {
+            let ptr = *self.data.get_mut();
+            if !ptr.is_null() {
+                // SAFETY: We just checked that 'ptr' is not null. And since
+                // we have exclusive access, there are no races to worry about.
+                drop(unsafe { Box::from_raw(ptr) });
+            }
+        }
+    }
+}
+
+#[cfg(not(feature = "alloc"))]
+mod lazy {
+    use core::{
+        cell::Cell,
+        fmt,
+        mem::MaybeUninit,
+        panic::{RefUnwindSafe, UnwindSafe},
+        sync::atomic::{AtomicU8, Ordering},
+    };
+
+    /// Our 'Lazy' value can be in one of three states:
+    ///
+    /// * INIT is where it starts, and also ends up back here if the
+    /// 'create' routine panics.
+    /// * BUSY is where it sits while initialization is running in exactly
+    /// one thread.
+    /// * DONE is where it sits after 'create' has completed and 'data' has
+    /// been fully initialized.
+    const LAZY_STATE_INIT: u8 = 0;
+    const LAZY_STATE_BUSY: u8 = 1;
+    const LAZY_STATE_DONE: u8 = 2;
+
+    /// A non-alloc non-std lazy initialized value.
+    ///
+    /// This guarantees initialization only happens once, but uses a spinlock
+    /// to block in the case of simultaneous access. Blocking occurs so that
+    /// one thread waits while another thread initializes the value.
+    ///
+    /// I would much rather have the semantics of the 'alloc' Lazy type above.
+    /// Namely, that we might run the initialization function more than once,
+    /// but we never otherwise block. However, I don't know how to do that in
+    /// a non-alloc non-std context.
+    pub(super) struct Lazy<T, F> {
+        state: AtomicU8,
+        create: Cell<Option<F>>,
+        data: Cell<MaybeUninit<T>>,
+    }
+
+    // SAFETY: So long as T and &T (and F and &F) can themselves be safely
+    // shared among threads, so to can a Lazy<T, _>. Namely, the Lazy API only
+    // permits accessing a &T and initialization is free of data races. So if T
+    // is thread safe, then so to is Lazy<T, _>.
+    unsafe impl<T: Send + Sync, F: Send + Sync> Sync for Lazy<T, F> {}
+    // A reference to a Lazy is unwind safe because we specifically take
+    // precautions to poison all accesses to a Lazy if the caller-provided
+    // 'create' function panics.
+    impl<T: UnwindSafe, F: UnwindSafe + RefUnwindSafe> RefUnwindSafe
+        for Lazy<T, F>
+    {
+    }
+
+    impl<T, F> Lazy<T, F> {
+        /// Create a new non-alloc non-std lazy value that is initialized
+        /// exactly once on first use using the given function.
+        pub(super) const fn new(create: F) -> Lazy<T, F> {
+            Lazy {
+                state: AtomicU8::new(LAZY_STATE_INIT),
+                create: Cell::new(Some(create)),
+                data: Cell::new(MaybeUninit::uninit()),
+            }
+        }
+    }
+
+    impl<T, F: FnOnce() -> T> Lazy<T, F> {
+        /// Get the underlying lazy value. If it isn't been initialized
+        /// yet, then either initialize it or block until some other thread
+        /// initializes it. If the 'create' function given to Lazy::new panics
+        /// (even in another thread), then this panics too.
+        pub(super) fn get(&self) -> &T {
+            // This is effectively a spinlock. We loop until we enter a DONE
+            // state, and if possible, initialize it ourselves. The only way
+            // we exit the loop is if 'create' panics, we initialize 'data' or
+            // some other thread initializes 'data'.
+            //
+            // Yes, I have read spinlocks considered harmful[1]. And that
+            // article is why this spinlock is only active when 'alloc' isn't
+            // enabled. I did this because I don't think there is really
+            // another choice without 'alloc', other than not providing this at
+            // all. But I think that's a big bummer.
+            //
+            // [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
+            while self.state.load(Ordering::Acquire) != LAZY_STATE_DONE {
+                // Check if we're the first ones to get here. If so, we'll be
+                // the ones who initialize.
+                let result = self.state.compare_exchange(
+                    LAZY_STATE_INIT,
+                    LAZY_STATE_BUSY,
+                    Ordering::AcqRel,
+                    Ordering::Acquire,
+                );
+                // This means we saw the INIT state and nobody else can. So we
+                // must take responsibility for initializing. And by virtue of
+                // observing INIT, we have also told anyone else trying to
+                // get here that we are BUSY. If someone else sees BUSY, then
+                // they will spin until we finish initialization.
+                if let Ok(_) = result {
+                    // Since we are guaranteed to be the only ones here, we
+                    // know that 'create' is there... Unless someone else got
+                    // here before us and 'create' panicked. In which case,
+                    // 'self.create' is now 'None' and we forward the panic
+                    // to the caller. (i.e., We implement poisoning.)
+                    //
+                    // SAFETY: Our use of 'self.state' guarantees that we are
+                    // the only thread executing this line, and thus there are
+                    // no races.
+                    let create = unsafe {
+                        (*self.create.as_ptr()).take().expect(
+                            "Lazy's create function panicked, \
+                             preventing initialization,
+                             poisoning current thread",
+                        )
+                    };
+                    let guard = Guard { state: &self.state };
+                    // SAFETY: Our use of 'self.state' guarantees that we are
+                    // the only thread executing this line, and thus there are
+                    // no races.
+                    unsafe {
+                        (*self.data.as_ptr()).as_mut_ptr().write(create());
+                    }
+                    // All is well. 'self.create' ran successfully, so we
+                    // forget the guard.
+                    core::mem::forget(guard);
+                    // Everything is initialized, so we can declare success.
+                    self.state.store(LAZY_STATE_DONE, Ordering::Release);
+                    break;
+                }
+                core::hint::spin_loop();
+            }
+            // We only get here if data is fully initialized, and thus poll
+            // will always return something.
+            self.poll().unwrap()
+        }
+
+        /// If this lazy value has been initialized successfully, then return
+        /// that value. Otherwise return None immediately. This never blocks.
+        fn poll(&self) -> Option<&T> {
+            if self.state.load(Ordering::Acquire) == LAZY_STATE_DONE {
+                // SAFETY: The DONE state only occurs when data has been fully
+                // initialized.
+                Some(unsafe { &*(*self.data.as_ptr()).as_ptr() })
+            } else {
+                None
+            }
+        }
+    }
+
+    impl<T: fmt::Debug, F: FnMut() -> T> fmt::Debug for Lazy<T, F> {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            f.debug_struct("Lazy")
+                .field("state", &self.state.load(Ordering::Acquire))
+                .field("create", &"<closure>")
+                .field("data", &self.poll())
+                .finish()
+        }
+    }
+
+    impl<T, F> Drop for Lazy<T, F> {
+        fn drop(&mut self) {
+            if *self.state.get_mut() == LAZY_STATE_DONE {
+                // SAFETY: state is DONE if and only if data has been fully
+                // initialized. At which point, it is safe to drop.
+                unsafe {
+                    self.data.get_mut().assume_init_drop();
+                }
+            }
+        }
+    }
+
+    /// A guard that will reset a Lazy's state back to INIT when dropped. The
+    /// idea here is to 'forget' this guard on success. On failure (when a
+    /// panic occurs), the Drop impl runs and causes all in-progress and future
+    /// 'get' calls to panic. Without this guard, all in-progress and future
+    /// 'get' calls would spin forever. Crashing is much better than getting
+    /// stuck in an infinite loop.
+    struct Guard<'a> {
+        state: &'a AtomicU8,
+    }
+
+    impl<'a> Drop for Guard<'a> {
+        fn drop(&mut self) {
+            // We force ourselves back into an INIT state. This will in turn
+            // cause any future 'get' calls to attempt calling 'self.create'
+            // again which will in turn panic because 'self.create' will now
+            // be 'None'.
+            self.state.store(LAZY_STATE_INIT, Ordering::Release);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn assert_send<T: Send>() {}
+    fn assert_sync<T: Sync>() {}
+    fn assert_unwind<T: core::panic::UnwindSafe>() {}
+    fn assert_refunwind<T: core::panic::RefUnwindSafe>() {}
+
+    #[test]
+    fn oibits() {
+        assert_send::<Lazy<u64>>();
+        assert_sync::<Lazy<u64>>();
+        assert_unwind::<Lazy<u64>>();
+        assert_refunwind::<Lazy<u64>>();
+    }
+
+    // This is a regression test because we used to rely on the inferred Sync
+    // impl for the Lazy type defined above (for 'alloc' mode). In the
+    // inferred impl, it only requires that T: Sync for Lazy<T>: Sync. But
+    // if we have that, we can actually make use of the fact that Lazy<T> drops
+    // T to create a value on one thread and drop it on another. This *should*
+    // require T: Send, but our missing bounds before let it sneak by.
+    //
+    // Basically, this test should not compile, so we... comment it out. We
+    // don't have a great way of testing compile-fail tests right now.
+    //
+    // See: https://github.com/BurntSushi/regex-automata/issues/30
+    /*
+    #[test]
+    fn sync_not_send() {
+        #[allow(dead_code)]
+        fn inner<T: Sync + Default>() {
+            let lazy = Lazy::new(move || T::default());
+            std::thread::scope(|scope| {
+                scope.spawn(|| {
+                    Lazy::get(&lazy); // We create T in this thread
+                });
+            });
+            // And drop in this thread.
+            drop(lazy);
+            // So we have send a !Send type over threads. (with some more
+            // legwork, its possible to even sneak the value out of drop
+            // through thread local)
+        }
+    }
+    */
+}
diff --git a/vendor/regex-automata/src/util/look.rs b/vendor/regex-automata/src/util/look.rs
new file mode 100644
index 0000000..73e51c0
--- /dev/null
+++ b/vendor/regex-automata/src/util/look.rs
@@ -0,0 +1,2547 @@
+/*!
+Types and routines for working with look-around assertions.
+
+This module principally defines two types:
+
+* [`Look`] enumerates all of the assertions supported by this crate.
+* [`LookSet`] provides a way to efficiently store a set of [`Look`] values.
+* [`LookMatcher`] provides routines for checking whether a `Look` or a
+`LookSet` matches at a particular position in a haystack.
+*/
+
+// LAMENTATION: Sadly, a lot of the API of `Look` and `LookSet` were basically
+// copied verbatim from the regex-syntax crate. I would have no problems using
+// the regex-syntax types and defining the matching routines (only found
+// in this crate) as free functions, except the `Look` and `LookSet` types
+// are used in lots of places. Including in places we expect to work when
+// regex-syntax is *not* enabled, such as in the definition of the NFA itself.
+//
+// Thankfully the code we copy is pretty simple and there isn't much of it.
+// Otherwise, the rest of this module deals with *matching* the assertions,
+// which is not something that regex-syntax handles.
+
+use crate::util::{escape::DebugByte, utf8};
+
+/// A look-around assertion.
+///
+/// An assertion matches at a position between characters in a haystack.
+/// Namely, it does not actually "consume" any input as most parts of a regular
+/// expression do. Assertions are a way of stating that some property must be
+/// true at a particular point during matching.
+///
+/// For example, `(?m)^[a-z]+$` is a pattern that:
+///
+/// * Scans the haystack for a position at which `(?m:^)` is satisfied. That
+/// occurs at either the beginning of the haystack, or immediately following
+/// a `\n` character.
+/// * Looks for one or more occurrences of `[a-z]`.
+/// * Once `[a-z]+` has matched as much as it can, an overall match is only
+/// reported when `[a-z]+` stops just before a `\n`.
+///
+/// So in this case, `abc` and `\nabc\n` match, but `\nabc1\n` does not.
+///
+/// Assertions are also called "look-around," "look-behind" and "look-ahead."
+/// Specifically, some assertions are look-behind (like `^`), other assertions
+/// are look-ahead (like `$`) and yet other assertions are both look-ahead and
+/// look-behind (like `\b`).
+///
+/// # Assertions in an NFA
+///
+/// An assertion in a [`thompson::NFA`](crate::nfa::thompson::NFA) can be
+/// thought of as a conditional epsilon transition. That is, a matching engine
+/// like the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) only permits
+/// moving through conditional epsilon transitions when their condition
+/// is satisfied at whatever position the `PikeVM` is currently at in the
+/// haystack.
+///
+/// How assertions are handled in a `DFA` is trickier, since a DFA does not
+/// have epsilon transitions at all. In this case, they are compiled into the
+/// automaton itself, at the expense of more states than what would be required
+/// without an assertion.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Look {
+    /// Match the beginning of text. Specifically, this matches at the starting
+    /// position of the input.
+    Start = 1 << 0,
+    /// Match the end of text. Specifically, this matches at the ending
+    /// position of the input.
+    End = 1 << 1,
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following a `\n` character.
+    StartLF = 1 << 2,
+    /// Match the end of a line or the end of text. Specifically, this matches
+    /// at the end position of the input, or at the position immediately
+    /// preceding a `\n` character.
+    EndLF = 1 << 3,
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following either a `\r` or `\n` character, but never after
+    /// a `\r` when a `\n` follows.
+    StartCRLF = 1 << 4,
+    /// Match the end of a line or the end of text. Specifically, this matches
+    /// at the end position of the input, or at the position immediately
+    /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r`
+    /// precedes it.
+    EndCRLF = 1 << 5,
+    /// Match an ASCII-only word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    WordAscii = 1 << 6,
+    /// Match an ASCII-only negation of a word boundary.
+    WordAsciiNegate = 1 << 7,
+    /// Match a Unicode-aware word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    WordUnicode = 1 << 8,
+    /// Match a Unicode-aware negation of a word boundary.
+    WordUnicodeNegate = 1 << 9,
+    /// Match the start of an ASCII-only word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStartAscii = 1 << 10,
+    /// Match the end of an ASCII-only word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEndAscii = 1 << 11,
+    /// Match the start of a Unicode word boundary. That is, this matches a
+    /// position at either the beginning of the haystack or where the previous
+    /// character is not a word character and the following character is a word
+    /// character.
+    WordStartUnicode = 1 << 12,
+    /// Match the end of a Unicode word boundary. That is, this matches a
+    /// position at either the end of the haystack or where the previous
+    /// character is a word character and the following character is not a word
+    /// character.
+    WordEndUnicode = 1 << 13,
+    /// Match the start half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalfAscii = 1 << 14,
+    /// Match the end half of an ASCII-only word boundary. That is, this
+    /// matches a position at either the end of the haystack or where the
+    /// following character is not a word character.
+    WordEndHalfAscii = 1 << 15,
+    /// Match the start half of a Unicode word boundary. That is, this matches
+    /// a position at either the beginning of the haystack or where the
+    /// previous character is not a word character.
+    WordStartHalfUnicode = 1 << 16,
+    /// Match the end half of a Unicode word boundary. That is, this matches
+    /// a position at either the end of the haystack or where the following
+    /// character is not a word character.
+    WordEndHalfUnicode = 1 << 17,
+}
+
+impl Look {
+    /// Flip the look-around assertion to its equivalent for reverse searches.
+    /// For example, `StartLF` gets translated to `EndLF`.
+    ///
+    /// Some assertions, such as `WordUnicode`, remain the same since they
+    /// match the same positions regardless of the direction of the search.
+    #[inline]
+    pub const fn reversed(self) -> Look {
+        match self {
+            Look::Start => Look::End,
+            Look::End => Look::Start,
+            Look::StartLF => Look::EndLF,
+            Look::EndLF => Look::StartLF,
+            Look::StartCRLF => Look::EndCRLF,
+            Look::EndCRLF => Look::StartCRLF,
+            Look::WordAscii => Look::WordAscii,
+            Look::WordAsciiNegate => Look::WordAsciiNegate,
+            Look::WordUnicode => Look::WordUnicode,
+            Look::WordUnicodeNegate => Look::WordUnicodeNegate,
+            Look::WordStartAscii => Look::WordEndAscii,
+            Look::WordEndAscii => Look::WordStartAscii,
+            Look::WordStartUnicode => Look::WordEndUnicode,
+            Look::WordEndUnicode => Look::WordStartUnicode,
+            Look::WordStartHalfAscii => Look::WordEndHalfAscii,
+            Look::WordEndHalfAscii => Look::WordStartHalfAscii,
+            Look::WordStartHalfUnicode => Look::WordEndHalfUnicode,
+            Look::WordEndHalfUnicode => Look::WordStartHalfUnicode,
+        }
+    }
+
+    /// Return the underlying representation of this look-around enumeration
+    /// as an integer. Giving the return value to the [`Look::from_repr`]
+    /// constructor is guaranteed to return the same look-around variant that
+    /// one started with within a semver compatible release of this crate.
+    #[inline]
+    pub const fn as_repr(self) -> u32 {
+        // AFAIK, 'as' is the only way to zero-cost convert an int enum to an
+        // actual int.
+        self as u32
+    }
+
+    /// Given the underlying representation of a `Look` value, return the
+    /// corresponding `Look` value if the representation is valid. Otherwise
+    /// `None` is returned.
+    #[inline]
+    pub const fn from_repr(repr: u32) -> Option<Look> {
+        match repr {
+            0b00_0000_0000_0000_0001 => Some(Look::Start),
+            0b00_0000_0000_0000_0010 => Some(Look::End),
+            0b00_0000_0000_0000_0100 => Some(Look::StartLF),
+            0b00_0000_0000_0000_1000 => Some(Look::EndLF),
+            0b00_0000_0000_0001_0000 => Some(Look::StartCRLF),
+            0b00_0000_0000_0010_0000 => Some(Look::EndCRLF),
+            0b00_0000_0000_0100_0000 => Some(Look::WordAscii),
+            0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate),
+            0b00_0000_0001_0000_0000 => Some(Look::WordUnicode),
+            0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate),
+            0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii),
+            0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii),
+            0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode),
+            0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode),
+            0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii),
+            0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii),
+            0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode),
+            0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode),
+            _ => None,
+        }
+    }
+
+    /// Returns a convenient single codepoint representation of this
+    /// look-around assertion. Each assertion is guaranteed to be represented
+    /// by a distinct character.
+    ///
+    /// This is useful for succinctly representing a look-around assertion in
+    /// human friendly but succinct output intended for a programmer working on
+    /// regex internals.
+    #[inline]
+    pub const fn as_char(self) -> char {
+        match self {
+            Look::Start => 'A',
+            Look::End => 'z',
+            Look::StartLF => '^',
+            Look::EndLF => '$',
+            Look::StartCRLF => 'r',
+            Look::EndCRLF => 'R',
+            Look::WordAscii => 'b',
+            Look::WordAsciiNegate => 'B',
+            Look::WordUnicode => '𝛃',
+            Look::WordUnicodeNegate => '𝚩',
+            Look::WordStartAscii => '<',
+            Look::WordEndAscii => '>',
+            Look::WordStartUnicode => '〈',
+            Look::WordEndUnicode => '〉',
+            Look::WordStartHalfAscii => '◁',
+            Look::WordEndHalfAscii => '▷',
+            Look::WordStartHalfUnicode => '◀',
+            Look::WordEndHalfUnicode => '▶',
+        }
+    }
+}
+
+/// LookSet is a memory-efficient set of look-around assertions.
+///
+/// This is useful for efficiently tracking look-around assertions. For
+/// example, a [`thompson::NFA`](crate::nfa::thompson::NFA) provides properties
+/// that return `LookSet`s.
+#[derive(Clone, Copy, Default, Eq, PartialEq)]
+pub struct LookSet {
+    /// The underlying representation this set is exposed to make it possible
+    /// to store it somewhere efficiently. The representation is that
+    /// of a bitset, where each assertion occupies bit `i` where
+    /// `i = Look::as_repr()`.
+    ///
+    /// Note that users of this internal representation must permit the full
+    /// range of `u16` values to be represented. For example, even if the
+    /// current implementation only makes use of the 10 least significant bits,
+    /// it may use more bits in a future semver compatible release.
+    pub bits: u32,
+}
+
+impl LookSet {
+    /// Create an empty set of look-around assertions.
+    #[inline]
+    pub fn empty() -> LookSet {
+        LookSet { bits: 0 }
+    }
+
+    /// Create a full set of look-around assertions.
+    ///
+    /// This set contains all possible look-around assertions.
+    #[inline]
+    pub fn full() -> LookSet {
+        LookSet { bits: !0 }
+    }
+
+    /// Create a look-around set containing the look-around assertion given.
+    ///
+    /// This is a convenience routine for creating an empty set and inserting
+    /// one look-around assertions.
+    #[inline]
+    pub fn singleton(look: Look) -> LookSet {
+        LookSet::empty().insert(look)
+    }
+
+    /// Returns the total number of look-around assertions in this set.
+    #[inline]
+    pub fn len(self) -> usize {
+        // OK because max value always fits in a u8, which in turn always
+        // fits in a usize, regardless of target.
+        usize::try_from(self.bits.count_ones()).unwrap()
+    }
+
+    /// Returns true if and only if this set is empty.
+    #[inline]
+    pub fn is_empty(self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns true if and only if the given look-around assertion is in this
+    /// set.
+    #[inline]
+    pub fn contains(self, look: Look) -> bool {
+        self.bits & look.as_repr() != 0
+    }
+
+    /// Returns true if and only if this set contains any anchor assertions.
+    /// This includes both "start/end of haystack" and "start/end of line."
+    #[inline]
+    pub fn contains_anchor(&self) -> bool {
+        self.contains_anchor_haystack() || self.contains_anchor_line()
+    }
+
+    /// Returns true if and only if this set contains any "start/end of
+    /// haystack" anchors. This doesn't include "start/end of line" anchors.
+    #[inline]
+    pub fn contains_anchor_haystack(&self) -> bool {
+        self.contains(Look::Start) || self.contains(Look::End)
+    }
+
+    /// Returns true if and only if this set contains any "start/end of line"
+    /// anchors. This doesn't include "start/end of haystack" anchors. This
+    /// includes both `\n` line anchors and CRLF (`\r\n`) aware line anchors.
+    #[inline]
+    pub fn contains_anchor_line(&self) -> bool {
+        self.contains(Look::StartLF)
+            || self.contains(Look::EndLF)
+            || self.contains(Look::StartCRLF)
+            || self.contains(Look::EndCRLF)
+    }
+
+    /// Returns true if and only if this set contains any "start/end of line"
+    /// anchors that only treat `\n` as line terminators. This does not include
+    /// haystack anchors or CRLF aware line anchors.
+    #[inline]
+    pub fn contains_anchor_lf(&self) -> bool {
+        self.contains(Look::StartLF) || self.contains(Look::EndLF)
+    }
+
+    /// Returns true if and only if this set contains any "start/end of line"
+    /// anchors that are CRLF-aware. This doesn't include "start/end of
+    /// haystack" or "start/end of line-feed" anchors.
+    #[inline]
+    pub fn contains_anchor_crlf(&self) -> bool {
+        self.contains(Look::StartCRLF) || self.contains(Look::EndCRLF)
+    }
+
+    /// Returns true if and only if this set contains any word boundary or
+    /// negated word boundary assertions. This include both Unicode and ASCII
+    /// word boundaries.
+    #[inline]
+    pub fn contains_word(self) -> bool {
+        self.contains_word_unicode() || self.contains_word_ascii()
+    }
+
+    /// Returns true if and only if this set contains any Unicode word boundary
+    /// or negated Unicode word boundary assertions.
+    #[inline]
+    pub fn contains_word_unicode(self) -> bool {
+        self.contains(Look::WordUnicode)
+            || self.contains(Look::WordUnicodeNegate)
+            || self.contains(Look::WordStartUnicode)
+            || self.contains(Look::WordEndUnicode)
+            || self.contains(Look::WordStartHalfUnicode)
+            || self.contains(Look::WordEndHalfUnicode)
+    }
+
+    /// Returns true if and only if this set contains any ASCII word boundary
+    /// or negated ASCII word boundary assertions.
+    #[inline]
+    pub fn contains_word_ascii(self) -> bool {
+        self.contains(Look::WordAscii)
+            || self.contains(Look::WordAsciiNegate)
+            || self.contains(Look::WordStartAscii)
+            || self.contains(Look::WordEndAscii)
+            || self.contains(Look::WordStartHalfAscii)
+            || self.contains(Look::WordEndHalfAscii)
+    }
+
+    /// Returns an iterator over all of the look-around assertions in this set.
+    #[inline]
+    pub fn iter(self) -> LookSetIter {
+        LookSetIter { set: self }
+    }
+
+    /// Return a new set that is equivalent to the original, but with the given
+    /// assertion added to it. If the assertion is already in the set, then the
+    /// returned set is equivalent to the original.
+    #[inline]
+    pub fn insert(self, look: Look) -> LookSet {
+        LookSet { bits: self.bits | look.as_repr() }
+    }
+
+    /// Updates this set in place with the result of inserting the given
+    /// assertion into this set.
+    #[inline]
+    pub fn set_insert(&mut self, look: Look) {
+        *self = self.insert(look);
+    }
+
+    /// Return a new set that is equivalent to the original, but with the given
+    /// assertion removed from it. If the assertion is not in the set, then the
+    /// returned set is equivalent to the original.
+    #[inline]
+    pub fn remove(self, look: Look) -> LookSet {
+        LookSet { bits: self.bits & !look.as_repr() }
+    }
+
+    /// Updates this set in place with the result of removing the given
+    /// assertion from this set.
+    #[inline]
+    pub fn set_remove(&mut self, look: Look) {
+        *self = self.remove(look);
+    }
+
+    /// Returns a new set that is the result of subtracting the given set from
+    /// this set.
+    #[inline]
+    pub fn subtract(self, other: LookSet) -> LookSet {
+        LookSet { bits: self.bits & !other.bits }
+    }
+
+    /// Updates this set in place with the result of subtracting the given set
+    /// from this set.
+    #[inline]
+    pub fn set_subtract(&mut self, other: LookSet) {
+        *self = self.subtract(other);
+    }
+
+    /// Returns a new set that is the union of this and the one given.
+    #[inline]
+    pub fn union(self, other: LookSet) -> LookSet {
+        LookSet { bits: self.bits | other.bits }
+    }
+
+    /// Updates this set in place with the result of unioning it with the one
+    /// given.
+    #[inline]
+    pub fn set_union(&mut self, other: LookSet) {
+        *self = self.union(other);
+    }
+
+    /// Returns a new set that is the intersection of this and the one given.
+    #[inline]
+    pub fn intersect(self, other: LookSet) -> LookSet {
+        LookSet { bits: self.bits & other.bits }
+    }
+
+    /// Updates this set in place with the result of intersecting it with the
+    /// one given.
+    #[inline]
+    pub fn set_intersect(&mut self, other: LookSet) {
+        *self = self.intersect(other);
+    }
+
+    /// Return a `LookSet` from the slice given as a native endian 32-bit
+    /// integer.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `slice.len() < 4`.
+    #[inline]
+    pub fn read_repr(slice: &[u8]) -> LookSet {
+        let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap());
+        LookSet { bits }
+    }
+
+    /// Write a `LookSet` as a native endian 32-bit integer to the beginning
+    /// of the slice given.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `slice.len() < 4`.
+    #[inline]
+    pub fn write_repr(self, slice: &mut [u8]) {
+        let raw = self.bits.to_ne_bytes();
+        slice[0] = raw[0];
+        slice[1] = raw[1];
+        slice[2] = raw[2];
+        slice[3] = raw[3];
+    }
+
+    /// Checks that all assertions in this set can be matched.
+    ///
+    /// Some assertions, such as Unicode word boundaries, require optional (but
+    /// enabled by default) tables that may not be available. If there are
+    /// assertions in this set that require tables that are not available, then
+    /// this will return an error.
+    ///
+    /// Specifically, this returns an error when the the
+    /// `unicode-word-boundary` feature is _not_ enabled _and_ this set
+    /// contains a Unicode word boundary assertion.
+    ///
+    /// It can be useful to use this on the result of
+    /// [`NFA::look_set_any`](crate::nfa::thompson::NFA::look_set_any)
+    /// when building a matcher engine to ensure methods like
+    /// [`LookMatcher::matches_set`] do not panic at search time.
+    pub fn available(self) -> Result<(), UnicodeWordBoundaryError> {
+        if self.contains_word_unicode() {
+            UnicodeWordBoundaryError::check()?;
+        }
+        Ok(())
+    }
+}
+
+impl core::fmt::Debug for LookSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        if self.is_empty() {
+            return write!(f, "∅");
+        }
+        for look in self.iter() {
+            write!(f, "{}", look.as_char())?;
+        }
+        Ok(())
+    }
+}
+
+/// An iterator over all look-around assertions in a [`LookSet`].
+///
+/// This iterator is created by [`LookSet::iter`].
+#[derive(Clone, Debug)]
+pub struct LookSetIter {
+    set: LookSet,
+}
+
+impl Iterator for LookSetIter {
+    type Item = Look;
+
+    #[inline]
+    fn next(&mut self) -> Option<Look> {
+        if self.set.is_empty() {
+            return None;
+        }
+        // We'll never have more than u8::MAX distinct look-around assertions,
+        // so 'bit' will always fit into a u16.
+        let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap();
+        let look = Look::from_repr(1 << bit)?;
+        self.set = self.set.remove(look);
+        Some(look)
+    }
+}
+
+/// A matcher for look-around assertions.
+///
+/// This matcher permits configuring aspects of how look-around assertions are
+/// matched.
+///
+/// # Example
+///
+/// A `LookMatcher` can change the line terminator used for matching multi-line
+/// anchors such as `(?m:^)` and `(?m:$)`.
+///
+/// ```
+/// use regex_automata::{
+///     nfa::thompson::{self, pikevm::PikeVM},
+///     util::look::LookMatcher,
+///     Match, Input,
+/// };
+///
+/// let mut lookm = LookMatcher::new();
+/// lookm.set_line_terminator(b'\x00');
+///
+/// let re = PikeVM::builder()
+///     .thompson(thompson::Config::new().look_matcher(lookm))
+///     .build(r"(?m)^[a-z]+$")?;
+/// let mut cache = re.create_cache();
+///
+/// // Multi-line assertions now use NUL as a terminator.
+/// assert_eq!(
+///     Some(Match::must(0, 1..4)),
+///     re.find(&mut cache, b"\x00abc\x00"),
+/// );
+/// // ... and \n is no longer recognized as a terminator.
+/// assert_eq!(
+///     None,
+///     re.find(&mut cache, b"\nabc\n"),
+/// );
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Debug)]
+pub struct LookMatcher {
+    lineterm: DebugByte,
+}
+
+impl LookMatcher {
+    /// Creates a new default matcher for look-around assertions.
+    pub fn new() -> LookMatcher {
+        LookMatcher { lineterm: DebugByte(b'\n') }
+    }
+
+    /// Sets the line terminator for use with `(?m:^)` and `(?m:$)`.
+    ///
+    /// Namely, instead of `^` matching after `\n` and `$` matching immediately
+    /// before a `\n`, this will cause it to match after and before the byte
+    /// given.
+    ///
+    /// It can occasionally be useful to use this to configure the line
+    /// terminator to the NUL byte when searching binary data.
+    ///
+    /// Note that this does not apply to CRLF-aware line anchors such as
+    /// `(?Rm:^)` and `(?Rm:$)`. CRLF-aware line anchors are hard-coded to
+    /// use `\r` and `\n`.
+    pub fn set_line_terminator(&mut self, byte: u8) -> &mut LookMatcher {
+        self.lineterm.0 = byte;
+        self
+    }
+
+    /// Returns the line terminator that was configured for this matcher.
+    ///
+    /// If no line terminator was configured, then this returns `\n`.
+    ///
+    /// Note that the line terminator should only be used for matching `(?m:^)`
+    /// and `(?m:$)` assertions. It specifically should _not_ be used for
+    /// matching the CRLF aware assertions `(?Rm:^)` and `(?Rm:$)`.
+    pub fn get_line_terminator(&self) -> u8 {
+        self.lineterm.0
+    }
+
+    /// Returns true when the position `at` in `haystack` satisfies the given
+    /// look-around assertion.
+    ///
+    /// # Panics
+    ///
+    /// This panics when testing any Unicode word boundary assertion in this
+    /// set and when the Unicode word data is not available. Specifically, this
+    /// only occurs when the `unicode-word-boundary` feature is not enabled.
+    ///
+    /// Since it's generally expected that this routine is called inside of
+    /// a matching engine, callers should check the error condition when
+    /// building the matching engine. If there is a Unicode word boundary
+    /// in the matcher and the data isn't available, then the matcher should
+    /// fail to build.
+    ///
+    /// Callers can check the error condition with [`LookSet::available`].
+    ///
+    /// This also may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn matches(&self, look: Look, haystack: &[u8], at: usize) -> bool {
+        self.matches_inline(look, haystack, at)
+    }
+
+    /// Like `matches`, but forcefully inlined.
+    ///
+    /// # Panics
+    ///
+    /// This panics when testing any Unicode word boundary assertion in this
+    /// set and when the Unicode word data is not available. Specifically, this
+    /// only occurs when the `unicode-word-boundary` feature is not enabled.
+    ///
+    /// Since it's generally expected that this routine is called inside of
+    /// a matching engine, callers should check the error condition when
+    /// building the matching engine. If there is a Unicode word boundary
+    /// in the matcher and the data isn't available, then the matcher should
+    /// fail to build.
+    ///
+    /// Callers can check the error condition with [`LookSet::available`].
+    ///
+    /// This also may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn matches_inline(
+        &self,
+        look: Look,
+        haystack: &[u8],
+        at: usize,
+    ) -> bool {
+        match look {
+            Look::Start => self.is_start(haystack, at),
+            Look::End => self.is_end(haystack, at),
+            Look::StartLF => self.is_start_lf(haystack, at),
+            Look::EndLF => self.is_end_lf(haystack, at),
+            Look::StartCRLF => self.is_start_crlf(haystack, at),
+            Look::EndCRLF => self.is_end_crlf(haystack, at),
+            Look::WordAscii => self.is_word_ascii(haystack, at),
+            Look::WordAsciiNegate => self.is_word_ascii_negate(haystack, at),
+            Look::WordUnicode => self.is_word_unicode(haystack, at).unwrap(),
+            Look::WordUnicodeNegate => {
+                self.is_word_unicode_negate(haystack, at).unwrap()
+            }
+            Look::WordStartAscii => self.is_word_start_ascii(haystack, at),
+            Look::WordEndAscii => self.is_word_end_ascii(haystack, at),
+            Look::WordStartUnicode => {
+                self.is_word_start_unicode(haystack, at).unwrap()
+            }
+            Look::WordEndUnicode => {
+                self.is_word_end_unicode(haystack, at).unwrap()
+            }
+            Look::WordStartHalfAscii => {
+                self.is_word_start_half_ascii(haystack, at)
+            }
+            Look::WordEndHalfAscii => {
+                self.is_word_end_half_ascii(haystack, at)
+            }
+            Look::WordStartHalfUnicode => {
+                self.is_word_start_half_unicode(haystack, at).unwrap()
+            }
+            Look::WordEndHalfUnicode => {
+                self.is_word_end_half_unicode(haystack, at).unwrap()
+            }
+        }
+    }
+
+    /// Returns true when _all_ of the assertions in the given set match at the
+    /// given position in the haystack.
+    ///
+    /// # Panics
+    ///
+    /// This panics when testing any Unicode word boundary assertion in this
+    /// set and when the Unicode word data is not available. Specifically, this
+    /// only occurs when the `unicode-word-boundary` feature is not enabled.
+    ///
+    /// Since it's generally expected that this routine is called inside of
+    /// a matching engine, callers should check the error condition when
+    /// building the matching engine. If there is a Unicode word boundary
+    /// in the matcher and the data isn't available, then the matcher should
+    /// fail to build.
+    ///
+    /// Callers can check the error condition with [`LookSet::available`].
+    ///
+    /// This also may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn matches_set(
+        &self,
+        set: LookSet,
+        haystack: &[u8],
+        at: usize,
+    ) -> bool {
+        self.matches_set_inline(set, haystack, at)
+    }
+
+    /// Like `LookSet::matches`, but forcefully inlined for perf.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn matches_set_inline(
+        &self,
+        set: LookSet,
+        haystack: &[u8],
+        at: usize,
+    ) -> bool {
+        // This used to luse LookSet::iter with Look::matches on each element,
+        // but that proved to be quite diastrous for perf. The manual "if
+        // the set has this assertion, check it" turns out to be quite a bit
+        // faster.
+        if set.contains(Look::Start) {
+            if !self.is_start(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::End) {
+            if !self.is_end(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::StartLF) {
+            if !self.is_start_lf(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::EndLF) {
+            if !self.is_end_lf(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::StartCRLF) {
+            if !self.is_start_crlf(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::EndCRLF) {
+            if !self.is_end_crlf(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordAscii) {
+            if !self.is_word_ascii(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordAsciiNegate) {
+            if !self.is_word_ascii_negate(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordUnicode) {
+            if !self.is_word_unicode(haystack, at).unwrap() {
+                return false;
+            }
+        }
+        if set.contains(Look::WordUnicodeNegate) {
+            if !self.is_word_unicode_negate(haystack, at).unwrap() {
+                return false;
+            }
+        }
+        if set.contains(Look::WordStartAscii) {
+            if !self.is_word_start_ascii(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordEndAscii) {
+            if !self.is_word_end_ascii(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordStartUnicode) {
+            if !self.is_word_start_unicode(haystack, at).unwrap() {
+                return false;
+            }
+        }
+        if set.contains(Look::WordEndUnicode) {
+            if !self.is_word_end_unicode(haystack, at).unwrap() {
+                return false;
+            }
+        }
+        if set.contains(Look::WordStartHalfAscii) {
+            if !self.is_word_start_half_ascii(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordEndHalfAscii) {
+            if !self.is_word_end_half_ascii(haystack, at) {
+                return false;
+            }
+        }
+        if set.contains(Look::WordStartHalfUnicode) {
+            if !self.is_word_start_half_unicode(haystack, at).unwrap() {
+                return false;
+            }
+        }
+        if set.contains(Look::WordEndHalfUnicode) {
+            if !self.is_word_end_half_unicode(haystack, at).unwrap() {
+                return false;
+            }
+        }
+        true
+    }
+
+    /// Split up the given byte classes into equivalence classes in a way that
+    /// is consistent with this look-around assertion.
+    #[cfg(feature = "alloc")]
+    pub(crate) fn add_to_byteset(
+        &self,
+        look: Look,
+        set: &mut crate::util::alphabet::ByteClassSet,
+    ) {
+        match look {
+            Look::Start | Look::End => {}
+            Look::StartLF | Look::EndLF => {
+                set.set_range(self.lineterm.0, self.lineterm.0);
+            }
+            Look::StartCRLF | Look::EndCRLF => {
+                set.set_range(b'\r', b'\r');
+                set.set_range(b'\n', b'\n');
+            }
+            Look::WordAscii
+            | Look::WordAsciiNegate
+            | Look::WordUnicode
+            | Look::WordUnicodeNegate
+            | Look::WordStartAscii
+            | Look::WordEndAscii
+            | Look::WordStartUnicode
+            | Look::WordEndUnicode
+            | Look::WordStartHalfAscii
+            | Look::WordEndHalfAscii
+            | Look::WordStartHalfUnicode
+            | Look::WordEndHalfUnicode => {
+                // We need to mark all ranges of bytes whose pairs result in
+                // evaluating \b differently. This isn't technically correct
+                // for Unicode word boundaries, but DFAs can't handle those
+                // anyway, and thus, the byte classes don't need to either
+                // since they are themselves only used in DFAs.
+                //
+                // FIXME: It seems like the calls to 'set_range' here are
+                // completely invariant, which means we could just hard-code
+                // them here without needing to write a loop. And we only need
+                // to do this dance at most once per regex.
+                //
+                // FIXME: Is this correct for \B?
+                let iswb = utf8::is_word_byte;
+                // This unwrap is OK because we guard every use of 'asu8' with
+                // a check that the input is <= 255.
+                let asu8 = |b: u16| u8::try_from(b).unwrap();
+                let mut b1: u16 = 0;
+                let mut b2: u16;
+                while b1 <= 255 {
+                    b2 = b1 + 1;
+                    while b2 <= 255 && iswb(asu8(b1)) == iswb(asu8(b2)) {
+                        b2 += 1;
+                    }
+                    // The guards above guarantee that b2 can never get any
+                    // bigger.
+                    assert!(b2 <= 256);
+                    // Subtracting 1 from b2 is always OK because it is always
+                    // at least 1 greater than b1, and the assert above
+                    // guarantees that the asu8 conversion will succeed.
+                    set.set_range(asu8(b1), asu8(b2.checked_sub(1).unwrap()));
+                    b1 = b2;
+                }
+            }
+        }
+    }
+
+    /// Returns true when [`Look::Start`] is satisfied `at` the given position
+    /// in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_start(&self, _haystack: &[u8], at: usize) -> bool {
+        at == 0
+    }
+
+    /// Returns true when [`Look::End`] is satisfied `at` the given position in
+    /// `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_end(&self, haystack: &[u8], at: usize) -> bool {
+        at == haystack.len()
+    }
+
+    /// Returns true when [`Look::StartLF`] is satisfied `at` the given
+    /// position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_start_lf(&self, haystack: &[u8], at: usize) -> bool {
+        self.is_start(haystack, at) || haystack[at - 1] == self.lineterm.0
+    }
+
+    /// Returns true when [`Look::EndLF`] is satisfied `at` the given position
+    /// in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_end_lf(&self, haystack: &[u8], at: usize) -> bool {
+        self.is_end(haystack, at) || haystack[at] == self.lineterm.0
+    }
+
+    /// Returns true when [`Look::StartCRLF`] is satisfied `at` the given
+    /// position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_start_crlf(&self, haystack: &[u8], at: usize) -> bool {
+        self.is_start(haystack, at)
+            || haystack[at - 1] == b'\n'
+            || (haystack[at - 1] == b'\r'
+                && (at >= haystack.len() || haystack[at] != b'\n'))
+    }
+
+    /// Returns true when [`Look::EndCRLF`] is satisfied `at` the given
+    /// position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_end_crlf(&self, haystack: &[u8], at: usize) -> bool {
+        self.is_end(haystack, at)
+            || haystack[at] == b'\r'
+            || (haystack[at] == b'\n'
+                && (at == 0 || haystack[at - 1] != b'\r'))
+    }
+
+    /// Returns true when [`Look::WordAscii`] is satisfied `at` the given
+    /// position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_word_ascii(&self, haystack: &[u8], at: usize) -> bool {
+        let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]);
+        let word_after =
+            at < haystack.len() && utf8::is_word_byte(haystack[at]);
+        word_before != word_after
+    }
+
+    /// Returns true when [`Look::WordAsciiNegate`] is satisfied `at` the given
+    /// position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_word_ascii_negate(&self, haystack: &[u8], at: usize) -> bool {
+        !self.is_word_ascii(haystack, at)
+    }
+
+    /// Returns true when [`Look::WordUnicode`] is satisfied `at` the given
+    /// position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when Unicode word boundary tables
+    /// are not available. Specifically, this only occurs when the
+    /// `unicode-word-boundary` feature is not enabled.
+    #[inline]
+    pub fn is_word_unicode(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, UnicodeWordBoundaryError> {
+        let word_before = is_word_char::rev(haystack, at)?;
+        let word_after = is_word_char::fwd(haystack, at)?;
+        Ok(word_before != word_after)
+    }
+
+    /// Returns true when [`Look::WordUnicodeNegate`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when Unicode word boundary tables
+    /// are not available. Specifically, this only occurs when the
+    /// `unicode-word-boundary` feature is not enabled.
+    #[inline]
+    pub fn is_word_unicode_negate(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, UnicodeWordBoundaryError> {
+        // This is pretty subtle. Why do we need to do UTF-8 decoding here?
+        // Well... at time of writing, the is_word_char_{fwd,rev} routines will
+        // only return true if there is a valid UTF-8 encoding of a "word"
+        // codepoint, and false in every other case (including invalid UTF-8).
+        // This means that in regions of invalid UTF-8 (which might be a
+        // subset of valid UTF-8!), it would result in \B matching. While this
+        // would be questionable in the context of truly invalid UTF-8, it is
+        // *certainly* wrong to report match boundaries that split the encoding
+        // of a codepoint. So to work around this, we ensure that we can decode
+        // a codepoint on either side of `at`. If either direction fails, then
+        // we don't permit \B to match at all.
+        //
+        // Now, this isn't exactly optimal from a perf perspective. We could
+        // try and detect this in is_word_char::{fwd,rev}, but it's not clear
+        // if it's worth it. \B is, after all, rarely used. Even worse,
+        // is_word_char::{fwd,rev} could do its own UTF-8 decoding, and so this
+        // will wind up doing UTF-8 decoding twice. Owch. We could fix this
+        // with more code complexity, but it just doesn't feel worth it for \B.
+        //
+        // And in particular, we do *not* have to do this with \b, because \b
+        // *requires* that at least one side of `at` be a "word" codepoint,
+        // which in turn implies one side of `at` must be valid UTF-8. This in
+        // turn implies that \b can never split a valid UTF-8 encoding of a
+        // codepoint. In the case where one side of `at` is truly invalid UTF-8
+        // and the other side IS a word codepoint, then we want \b to match
+        // since it represents a valid UTF-8 boundary. It also makes sense. For
+        // example, you'd want \b\w+\b to match 'abc' in '\xFFabc\xFF'.
+        //
+        // Note also that this is not just '!is_word_unicode(..)' like it is
+        // for the ASCII case. For example, neither \b nor \B is satisfied
+        // within invalid UTF-8 sequences.
+        let word_before = at > 0
+            && match utf8::decode_last(&haystack[..at]) {
+                None | Some(Err(_)) => return Ok(false),
+                Some(Ok(_)) => is_word_char::rev(haystack, at)?,
+            };
+        let word_after = at < haystack.len()
+            && match utf8::decode(&haystack[at..]) {
+                None | Some(Err(_)) => return Ok(false),
+                Some(Ok(_)) => is_word_char::fwd(haystack, at)?,
+            };
+        Ok(word_before == word_after)
+    }
+
+    /// Returns true when [`Look::WordStartAscii`] is satisfied `at` the given
+    /// position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_word_start_ascii(&self, haystack: &[u8], at: usize) -> bool {
+        let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]);
+        let word_after =
+            at < haystack.len() && utf8::is_word_byte(haystack[at]);
+        !word_before && word_after
+    }
+
+    /// Returns true when [`Look::WordEndAscii`] is satisfied `at` the given
+    /// position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_word_end_ascii(&self, haystack: &[u8], at: usize) -> bool {
+        let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]);
+        let word_after =
+            at < haystack.len() && utf8::is_word_byte(haystack[at]);
+        word_before && !word_after
+    }
+
+    /// Returns true when [`Look::WordStartUnicode`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when Unicode word boundary tables
+    /// are not available. Specifically, this only occurs when the
+    /// `unicode-word-boundary` feature is not enabled.
+    #[inline]
+    pub fn is_word_start_unicode(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, UnicodeWordBoundaryError> {
+        let word_before = is_word_char::rev(haystack, at)?;
+        let word_after = is_word_char::fwd(haystack, at)?;
+        Ok(!word_before && word_after)
+    }
+
+    /// Returns true when [`Look::WordEndUnicode`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when Unicode word boundary tables
+    /// are not available. Specifically, this only occurs when the
+    /// `unicode-word-boundary` feature is not enabled.
+    #[inline]
+    pub fn is_word_end_unicode(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, UnicodeWordBoundaryError> {
+        let word_before = is_word_char::rev(haystack, at)?;
+        let word_after = is_word_char::fwd(haystack, at)?;
+        Ok(word_before && !word_after)
+    }
+
+    /// Returns true when [`Look::WordStartHalfAscii`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_word_start_half_ascii(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> bool {
+        let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]);
+        !word_before
+    }
+
+    /// Returns true when [`Look::WordEndHalfAscii`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    #[inline]
+    pub fn is_word_end_half_ascii(&self, haystack: &[u8], at: usize) -> bool {
+        let word_after =
+            at < haystack.len() && utf8::is_word_byte(haystack[at]);
+        !word_after
+    }
+
+    /// Returns true when [`Look::WordStartHalfUnicode`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when Unicode word boundary tables
+    /// are not available. Specifically, this only occurs when the
+    /// `unicode-word-boundary` feature is not enabled.
+    #[inline]
+    pub fn is_word_start_half_unicode(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, UnicodeWordBoundaryError> {
+        // See `is_word_unicode_negate` for why we need to do this. We don't
+        // need to do it for `is_word_start_unicode` because that guarantees
+        // that the position matched falls on a valid UTF-8 boundary given
+        // that the right side must be in \w.
+        let word_before = at > 0
+            && match utf8::decode_last(&haystack[..at]) {
+                None | Some(Err(_)) => return Ok(false),
+                Some(Ok(_)) => is_word_char::rev(haystack, at)?,
+            };
+        Ok(!word_before)
+    }
+
+    /// Returns true when [`Look::WordEndHalfUnicode`] is satisfied `at` the
+    /// given position in `haystack`.
+    ///
+    /// # Panics
+    ///
+    /// This may panic when `at > haystack.len()`. Note that `at ==
+    /// haystack.len()` is legal and guaranteed not to panic.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when Unicode word boundary tables
+    /// are not available. Specifically, this only occurs when the
+    /// `unicode-word-boundary` feature is not enabled.
+    #[inline]
+    pub fn is_word_end_half_unicode(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, UnicodeWordBoundaryError> {
+        // See `is_word_unicode_negate` for why we need to do this. We don't
+        // need to do it for `is_word_end_unicode` because that guarantees
+        // that the position matched falls on a valid UTF-8 boundary given
+        // that the left side must be in \w.
+        let word_after = at < haystack.len()
+            && match utf8::decode(&haystack[at..]) {
+                None | Some(Err(_)) => return Ok(false),
+                Some(Ok(_)) => is_word_char::fwd(haystack, at)?,
+            };
+        Ok(!word_after)
+    }
+}
+
+impl Default for LookMatcher {
+    fn default() -> LookMatcher {
+        LookMatcher::new()
+    }
+}
+
+/// An error that occurs when the Unicode-aware `\w` class is unavailable.
+///
+/// This error can occur when the data tables necessary for the Unicode aware
+/// Perl character class `\w` are unavailable. The `\w` class is used to
+/// determine whether a codepoint is considered a word character or not when
+/// determining whether a Unicode aware `\b` (or `\B`) matches at a particular
+/// position.
+///
+/// This error can only occur when the `unicode-word-boundary` feature is
+/// disabled.
+#[derive(Clone, Debug)]
+pub struct UnicodeWordBoundaryError(());
+
+impl UnicodeWordBoundaryError {
+    #[cfg(not(feature = "unicode-word-boundary"))]
+    pub(crate) fn new() -> UnicodeWordBoundaryError {
+        UnicodeWordBoundaryError(())
+    }
+
+    /// Returns an error if and only if Unicode word boundary data is
+    /// unavailable.
+    pub fn check() -> Result<(), UnicodeWordBoundaryError> {
+        is_word_char::check()
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for UnicodeWordBoundaryError {}
+
+impl core::fmt::Display for UnicodeWordBoundaryError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(
+            f,
+            "Unicode-aware \\b and \\B are unavailable because the \
+             requisite data tables are missing, please enable the \
+             unicode-word-boundary feature"
+        )
+    }
+}
+
+// Below are FOUR different ways for checking whether whether a "word"
+// codepoint exists at a particular position in the haystack. The four
+// different approaches are, in order of preference:
+//
+// 1. Parse '\w', convert to an NFA, convert to a fully compiled DFA on the
+// first call, and then use that DFA for all subsequent calls.
+// 2. Do UTF-8 decoding and use regex_syntax::is_word_character if available.
+// 3. Do UTF-8 decoding and use our own 'perl_word' table.
+// 4. Return an error.
+//
+// The reason for all of these approaches is a combination of perf and
+// permitting one to build regex-automata without the Unicode data necessary
+// for handling Unicode-aware word boundaries. (In which case, '(?-u:\b)' would
+// still work.)
+//
+// The DFA approach is the fastest, but it requires the regex parser, the
+// NFA compiler, the DFA builder and the DFA search runtime. That's a lot to
+// bring in, but if it's available, it's (probably) the best we can do.
+//
+// Approaches (2) and (3) are effectively equivalent, but (2) reuses the
+// data in regex-syntax and avoids duplicating it in regex-automata.
+//
+// Finally, (4) unconditionally returns an error since the requisite data isn't
+// available anywhere.
+//
+// There are actually more approaches possible that we didn't implement. For
+// example, if the DFA builder is available but the syntax parser is not, we
+// could technically hand construct our own NFA from the 'perl_word' data
+// table. But to avoid some pretty hairy code duplication, we would in turn
+// need to pull the UTF-8 compiler out of the NFA compiler. Yikes.
+//
+// A possibly more sensible alternative is to use a lazy DFA when the full
+// DFA builder isn't available...
+//
+// Yet another choice would be to build the full DFA and then embed it into the
+// source. Then we'd only need to bring in the DFA search runtime, which is
+// considerably smaller than the DFA builder code. The problem here is that the
+// Debian people have spooked me[1] into avoiding cyclic dependencies. Namely,
+// we'd need to build regex-cli, which depends on regex-automata in order to
+// build some part of regex-automata. But to be honest, something like this has
+// to be allowed somehow? I just don't know what the right process is.
+//
+// There are perhaps other choices as well. Why did I stop at these 4? Because
+// I wanted to preserve my sanity. I suspect I'll wind up adding the lazy DFA
+// approach eventually, as the benefits of the DFA approach are somewhat
+// compelling. The 'boundary-words-holmes' benchmark tests this. (Note that
+// the commands below no longer work. If necessary, we should re-capitulate
+// the benchmark from whole cloth in rebar.)
+//
+//   $ regex-cli bench measure -f boundary-words-holmes -e pikevm > dfa.csv
+//
+// Then I changed the code below so that the util/unicode_data/perl_word table
+// was used and re-ran the benchmark:
+//
+//   $ regex-cli bench measure -f boundary-words-holmes -e pikevm > table.csv
+//
+// And compared them:
+//
+//   $ regex-cli bench diff dfa.csv table.csv
+//   benchmark                             engine                 dfa        table
+//   ---------                             ------                 ---        -----
+//   internal/count/boundary-words-holmes  regex/automata/pikevm  18.6 MB/s  12.9 MB/s
+//
+// Which is a nice improvement.
+//
+// UPDATE: It turns out that it takes approximately 22ms to build the reverse
+// DFA for \w. (And about 3ms for the forward DFA.) It's probably not much in
+// the grand scheme things, but that is a significant latency cost. So I'm not
+// sure that's a good idea. I then tried using a lazy DFA instead, and that
+// eliminated the overhead, but since the lazy DFA requires mutable working
+// memory, that requires introducing a 'Cache' for every simultaneous call.
+//
+// I ended up deciding for now to just keep the "UTF-8 decode and check the
+// table." The DFA and lazy DFA approaches are still below, but commented out.
+//
+// [1]: https://github.com/BurntSushi/ucd-generate/issues/11
+
+/*
+/// A module that looks for word codepoints using lazy DFAs.
+#[cfg(all(
+    feature = "unicode-word-boundary",
+    feature = "syntax",
+    feature = "unicode-perl",
+    feature = "hybrid"
+))]
+mod is_word_char {
+    use alloc::vec::Vec;
+
+    use crate::{
+        hybrid::dfa::{Cache, DFA},
+        nfa::thompson::NFA,
+        util::{lazy::Lazy, pool::Pool, primitives::StateID},
+        Anchored, Input,
+    };
+
+    pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> {
+        Ok(())
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(super) fn fwd(
+        haystack: &[u8],
+        mut at: usize,
+    ) -> Result<bool, super::UnicodeWordBoundaryError> {
+        static WORD: Lazy<DFA> = Lazy::new(|| DFA::new(r"\w").unwrap());
+        static CACHE: Lazy<Pool<Cache>> =
+            Lazy::new(|| Pool::new(|| WORD.create_cache()));
+        let dfa = Lazy::get(&WORD);
+        let mut cache = Lazy::get(&CACHE).get();
+        let mut sid = dfa
+            .start_state_forward(
+                &mut cache,
+                &Input::new("").anchored(Anchored::Yes),
+            )
+            .unwrap();
+        while at < haystack.len() {
+            let byte = haystack[at];
+            sid = dfa.next_state(&mut cache, sid, byte).unwrap();
+            at += 1;
+            if sid.is_tagged() {
+                if sid.is_match() {
+                    return Ok(true);
+                } else if sid.is_dead() {
+                    return Ok(false);
+                }
+            }
+        }
+        Ok(dfa.next_eoi_state(&mut cache, sid).unwrap().is_match())
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(super) fn rev(
+        haystack: &[u8],
+        mut at: usize,
+    ) -> Result<bool, super::UnicodeWordBoundaryError> {
+        static WORD: Lazy<DFA> = Lazy::new(|| {
+            DFA::builder()
+                .thompson(NFA::config().reverse(true))
+                .build(r"\w")
+                .unwrap()
+        });
+        static CACHE: Lazy<Pool<Cache>> =
+            Lazy::new(|| Pool::new(|| WORD.create_cache()));
+        let dfa = Lazy::get(&WORD);
+        let mut cache = Lazy::get(&CACHE).get();
+        let mut sid = dfa
+            .start_state_reverse(
+                &mut cache,
+                &Input::new("").anchored(Anchored::Yes),
+            )
+            .unwrap();
+        while at > 0 {
+            at -= 1;
+            let byte = haystack[at];
+            sid = dfa.next_state(&mut cache, sid, byte).unwrap();
+            if sid.is_tagged() {
+                if sid.is_match() {
+                    return Ok(true);
+                } else if sid.is_dead() {
+                    return Ok(false);
+                }
+            }
+        }
+        Ok(dfa.next_eoi_state(&mut cache, sid).unwrap().is_match())
+    }
+}
+*/
+
+/*
+/// A module that looks for word codepoints using fully compiled DFAs.
+#[cfg(all(
+    feature = "unicode-word-boundary",
+    feature = "syntax",
+    feature = "unicode-perl",
+    feature = "dfa-build"
+))]
+mod is_word_char {
+    use alloc::vec::Vec;
+
+    use crate::{
+        dfa::{dense::DFA, Automaton, StartKind},
+        nfa::thompson::NFA,
+        util::{lazy::Lazy, primitives::StateID},
+        Anchored, Input,
+    };
+
+    pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> {
+        Ok(())
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(super) fn fwd(
+        haystack: &[u8],
+        mut at: usize,
+    ) -> Result<bool, super::UnicodeWordBoundaryError> {
+        static WORD: Lazy<(DFA<Vec<u32>>, StateID)> = Lazy::new(|| {
+            let dfa = DFA::builder()
+                .configure(DFA::config().start_kind(StartKind::Anchored))
+                .build(r"\w")
+                .unwrap();
+            // OK because our regex has no look-around.
+            let start_id = dfa.universal_start_state(Anchored::Yes).unwrap();
+            (dfa, start_id)
+        });
+        let &(ref dfa, mut sid) = Lazy::get(&WORD);
+        while at < haystack.len() {
+            let byte = haystack[at];
+            sid = dfa.next_state(sid, byte);
+            at += 1;
+            if dfa.is_special_state(sid) {
+                if dfa.is_match_state(sid) {
+                    return Ok(true);
+                } else if dfa.is_dead_state(sid) {
+                    return Ok(false);
+                }
+            }
+        }
+        Ok(dfa.is_match_state(dfa.next_eoi_state(sid)))
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(super) fn rev(
+        haystack: &[u8],
+        mut at: usize,
+    ) -> Result<bool, super::UnicodeWordBoundaryError> {
+        static WORD: Lazy<(DFA<Vec<u32>>, StateID)> = Lazy::new(|| {
+            let dfa = DFA::builder()
+                .configure(DFA::config().start_kind(StartKind::Anchored))
+                // From ad hoc measurements, it looks like setting
+                // shrink==false is slightly faster than shrink==true. I kind
+                // of feel like this indicates that shrinking is probably a
+                // failure, although it can help in some cases. Sigh.
+                .thompson(NFA::config().reverse(true).shrink(false))
+                .build(r"\w")
+                .unwrap();
+            // OK because our regex has no look-around.
+            let start_id = dfa.universal_start_state(Anchored::Yes).unwrap();
+            (dfa, start_id)
+        });
+        let &(ref dfa, mut sid) = Lazy::get(&WORD);
+        while at > 0 {
+            at -= 1;
+            let byte = haystack[at];
+            sid = dfa.next_state(sid, byte);
+            if dfa.is_special_state(sid) {
+                if dfa.is_match_state(sid) {
+                    return Ok(true);
+                } else if dfa.is_dead_state(sid) {
+                    return Ok(false);
+                }
+            }
+        }
+        Ok(dfa.is_match_state(dfa.next_eoi_state(sid)))
+    }
+}
+*/
+
+/// A module that looks for word codepoints using regex-syntax's data tables.
+#[cfg(all(
+    feature = "unicode-word-boundary",
+    feature = "syntax",
+    feature = "unicode-perl",
+))]
+mod is_word_char {
+    use regex_syntax::try_is_word_character;
+
+    use crate::util::utf8;
+
+    pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> {
+        Ok(())
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(super) fn fwd(
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, super::UnicodeWordBoundaryError> {
+        Ok(match utf8::decode(&haystack[at..]) {
+            None | Some(Err(_)) => false,
+            Some(Ok(ch)) => try_is_word_character(ch).expect(
+                "since unicode-word-boundary, syntax and unicode-perl \
+                 are all enabled, it is expected that \
+                 try_is_word_character succeeds",
+            ),
+        })
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(super) fn rev(
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, super::UnicodeWordBoundaryError> {
+        Ok(match utf8::decode_last(&haystack[..at]) {
+            None | Some(Err(_)) => false,
+            Some(Ok(ch)) => try_is_word_character(ch).expect(
+                "since unicode-word-boundary, syntax and unicode-perl \
+                 are all enabled, it is expected that \
+                 try_is_word_character succeeds",
+            ),
+        })
+    }
+}
+
+/// A module that looks for word codepoints using regex-automata's data tables
+/// (which are only compiled when regex-syntax's tables aren't available).
+///
+/// Note that the cfg should match the one in src/util/unicode_data/mod.rs for
+/// perl_word.
+#[cfg(all(
+    feature = "unicode-word-boundary",
+    not(all(feature = "syntax", feature = "unicode-perl")),
+))]
+mod is_word_char {
+    use crate::util::utf8;
+
+    pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> {
+        Ok(())
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(super) fn fwd(
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, super::UnicodeWordBoundaryError> {
+        Ok(match utf8::decode(&haystack[at..]) {
+            None | Some(Err(_)) => false,
+            Some(Ok(ch)) => is_word_character(ch),
+        })
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(super) fn rev(
+        haystack: &[u8],
+        at: usize,
+    ) -> Result<bool, super::UnicodeWordBoundaryError> {
+        Ok(match utf8::decode_last(&haystack[..at]) {
+            None | Some(Err(_)) => false,
+            Some(Ok(ch)) => is_word_character(ch),
+        })
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_word_character(c: char) -> bool {
+        use crate::util::{unicode_data::perl_word::PERL_WORD, utf8};
+
+        if u8::try_from(c).map_or(false, utf8::is_word_byte) {
+            return true;
+        }
+        PERL_WORD
+            .binary_search_by(|&(start, end)| {
+                use core::cmp::Ordering;
+
+                if start <= c && c <= end {
+                    Ordering::Equal
+                } else if start > c {
+                    Ordering::Greater
+                } else {
+                    Ordering::Less
+                }
+            })
+            .is_ok()
+    }
+}
+
+/// A module that always returns an error if Unicode word boundaries are
+/// disabled. When this feature is disabled, then regex-automata will not
+/// include its own data tables even if regex-syntax is disabled.
+#[cfg(not(feature = "unicode-word-boundary"))]
+mod is_word_char {
+    pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> {
+        Err(super::UnicodeWordBoundaryError::new())
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(super) fn fwd(
+        _bytes: &[u8],
+        _at: usize,
+    ) -> Result<bool, super::UnicodeWordBoundaryError> {
+        Err(super::UnicodeWordBoundaryError::new())
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(super) fn rev(
+        _bytes: &[u8],
+        _at: usize,
+    ) -> Result<bool, super::UnicodeWordBoundaryError> {
+        Err(super::UnicodeWordBoundaryError::new())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    macro_rules! testlook {
+        ($look:expr, $haystack:expr, $at:expr) => {
+            LookMatcher::default().matches($look, $haystack.as_bytes(), $at)
+        };
+    }
+
+    #[test]
+    fn look_matches_start_line() {
+        let look = Look::StartLF;
+
+        assert!(testlook!(look, "", 0));
+        assert!(testlook!(look, "\n", 0));
+        assert!(testlook!(look, "\n", 1));
+        assert!(testlook!(look, "a", 0));
+        assert!(testlook!(look, "\na", 1));
+
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a\na", 1));
+    }
+
+    #[test]
+    fn look_matches_end_line() {
+        let look = Look::EndLF;
+
+        assert!(testlook!(look, "", 0));
+        assert!(testlook!(look, "\n", 1));
+        assert!(testlook!(look, "\na", 0));
+        assert!(testlook!(look, "\na", 2));
+        assert!(testlook!(look, "a\na", 1));
+
+        assert!(!testlook!(look, "a", 0));
+        assert!(!testlook!(look, "\na", 1));
+        assert!(!testlook!(look, "a\na", 0));
+        assert!(!testlook!(look, "a\na", 2));
+    }
+
+    #[test]
+    fn look_matches_start_text() {
+        let look = Look::Start;
+
+        assert!(testlook!(look, "", 0));
+        assert!(testlook!(look, "\n", 0));
+        assert!(testlook!(look, "a", 0));
+
+        assert!(!testlook!(look, "\n", 1));
+        assert!(!testlook!(look, "\na", 1));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a\na", 1));
+    }
+
+    #[test]
+    fn look_matches_end_text() {
+        let look = Look::End;
+
+        assert!(testlook!(look, "", 0));
+        assert!(testlook!(look, "\n", 1));
+        assert!(testlook!(look, "\na", 2));
+
+        assert!(!testlook!(look, "\na", 0));
+        assert!(!testlook!(look, "a\na", 1));
+        assert!(!testlook!(look, "a", 0));
+        assert!(!testlook!(look, "\na", 1));
+        assert!(!testlook!(look, "a\na", 0));
+        assert!(!testlook!(look, "a\na", 2));
+    }
+
+    #[test]
+    #[cfg(all(not(miri), feature = "unicode-word-boundary"))]
+    fn look_matches_word_unicode() {
+        let look = Look::WordUnicode;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(testlook!(look, "a", 0));
+        assert!(testlook!(look, "a", 1));
+        assert!(testlook!(look, "a ", 1));
+        assert!(testlook!(look, " a ", 1));
+        assert!(testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃", 0));
+        assert!(testlook!(look, "𝛃", 4));
+        assert!(testlook!(look, "𝛃 ", 4));
+        assert!(testlook!(look, " 𝛃 ", 1));
+        assert!(testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(testlook!(look, "𝛃𐆀", 0));
+        assert!(testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(!testlook!(look, "a ", 2));
+        assert!(!testlook!(look, " a ", 0));
+        assert!(!testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "𝛃 ", 5));
+        assert!(!testlook!(look, " 𝛃 ", 0));
+        assert!(!testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(!testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_matches_word_ascii() {
+        let look = Look::WordAscii;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(testlook!(look, "a", 0));
+        assert!(testlook!(look, "a", 1));
+        assert!(testlook!(look, "a ", 1));
+        assert!(testlook!(look, " a ", 1));
+        assert!(testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(!testlook!(look, "𝛃", 0));
+        assert!(!testlook!(look, "𝛃", 4));
+        assert!(!testlook!(look, "𝛃 ", 4));
+        assert!(!testlook!(look, " 𝛃 ", 1));
+        assert!(!testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(!testlook!(look, "𝛃𐆀", 0));
+        assert!(!testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(!testlook!(look, "a ", 2));
+        assert!(!testlook!(look, " a ", 0));
+        assert!(!testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "𝛃 ", 5));
+        assert!(!testlook!(look, " 𝛃 ", 0));
+        assert!(!testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(!testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    #[cfg(all(not(miri), feature = "unicode-word-boundary"))]
+    fn look_matches_word_unicode_negate() {
+        let look = Look::WordUnicodeNegate;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!testlook!(look, "a", 0));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a ", 1));
+        assert!(!testlook!(look, " a ", 1));
+        assert!(!testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃", 0));
+        assert!(!testlook!(look, "𝛃", 4));
+        assert!(!testlook!(look, "𝛃 ", 4));
+        assert!(!testlook!(look, " 𝛃 ", 1));
+        assert!(!testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 0));
+        assert!(!testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(testlook!(look, "", 0));
+        assert!(testlook!(look, "ab", 1));
+        assert!(testlook!(look, "a ", 2));
+        assert!(testlook!(look, " a ", 0));
+        assert!(testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃b", 4));
+        assert!(testlook!(look, "𝛃 ", 5));
+        assert!(testlook!(look, " 𝛃 ", 0));
+        assert!(testlook!(look, " 𝛃 ", 6));
+        // These don't match because they could otherwise return an offset that
+        // splits the UTF-8 encoding of a codepoint.
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints. These also don't
+        // match because they could otherwise return an offset that splits the
+        // UTF-8 encoding of a codepoint.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        // But this one does, since 𐆀 isn't a word codepoint, and 8 is the end
+        // of the haystack. So the "end" of the haystack isn't a word and 𐆀
+        // isn't a word, thus, \B matches.
+        assert!(testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_matches_word_ascii_negate() {
+        let look = Look::WordAsciiNegate;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!testlook!(look, "a", 0));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a ", 1));
+        assert!(!testlook!(look, " a ", 1));
+        assert!(!testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(testlook!(look, "𝛃", 0));
+        assert!(testlook!(look, "𝛃", 4));
+        assert!(testlook!(look, "𝛃 ", 4));
+        assert!(testlook!(look, " 𝛃 ", 1));
+        assert!(testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(testlook!(look, "𝛃𐆀", 0));
+        assert!(testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(testlook!(look, "", 0));
+        assert!(testlook!(look, "ab", 1));
+        assert!(testlook!(look, "a ", 2));
+        assert!(testlook!(look, " a ", 0));
+        assert!(testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(testlook!(look, "𝛃 ", 5));
+        assert!(testlook!(look, " 𝛃 ", 0));
+        assert!(testlook!(look, " 𝛃 ", 6));
+        assert!(testlook!(look, "𝛃", 1));
+        assert!(testlook!(look, "𝛃", 2));
+        assert!(testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(testlook!(look, "𝛃𐆀", 1));
+        assert!(testlook!(look, "𝛃𐆀", 2));
+        assert!(testlook!(look, "𝛃𐆀", 3));
+        assert!(testlook!(look, "𝛃𐆀", 5));
+        assert!(testlook!(look, "𝛃𐆀", 6));
+        assert!(testlook!(look, "𝛃𐆀", 7));
+        assert!(testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_matches_word_start_ascii() {
+        let look = Look::WordStartAscii;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(testlook!(look, "a", 0));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a ", 1));
+        assert!(testlook!(look, " a ", 1));
+        assert!(!testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(!testlook!(look, "𝛃", 0));
+        assert!(!testlook!(look, "𝛃", 4));
+        assert!(!testlook!(look, "𝛃 ", 4));
+        assert!(!testlook!(look, " 𝛃 ", 1));
+        assert!(!testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(!testlook!(look, "𝛃𐆀", 0));
+        assert!(!testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(!testlook!(look, "a ", 2));
+        assert!(!testlook!(look, " a ", 0));
+        assert!(!testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(!testlook!(look, "𝛃 ", 5));
+        assert!(!testlook!(look, " 𝛃 ", 0));
+        assert!(!testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(!testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_matches_word_end_ascii() {
+        let look = Look::WordEndAscii;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!testlook!(look, "a", 0));
+        assert!(testlook!(look, "a", 1));
+        assert!(testlook!(look, "a ", 1));
+        assert!(!testlook!(look, " a ", 1));
+        assert!(testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(!testlook!(look, "𝛃", 0));
+        assert!(!testlook!(look, "𝛃", 4));
+        assert!(!testlook!(look, "𝛃 ", 4));
+        assert!(!testlook!(look, " 𝛃 ", 1));
+        assert!(!testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(!testlook!(look, "𝛃𐆀", 0));
+        assert!(!testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(!testlook!(look, "a ", 2));
+        assert!(!testlook!(look, " a ", 0));
+        assert!(!testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(testlook!(look, "b𝛃", 1));
+        assert!(!testlook!(look, "𝛃 ", 5));
+        assert!(!testlook!(look, " 𝛃 ", 0));
+        assert!(!testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(!testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    #[cfg(all(not(miri), feature = "unicode-word-boundary"))]
+    fn look_matches_word_start_unicode() {
+        let look = Look::WordStartUnicode;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(testlook!(look, "a", 0));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a ", 1));
+        assert!(testlook!(look, " a ", 1));
+        assert!(!testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃", 0));
+        assert!(!testlook!(look, "𝛃", 4));
+        assert!(!testlook!(look, "𝛃 ", 4));
+        assert!(testlook!(look, " 𝛃 ", 1));
+        assert!(!testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(testlook!(look, "𝛃𐆀", 0));
+        assert!(!testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(!testlook!(look, "a ", 2));
+        assert!(!testlook!(look, " a ", 0));
+        assert!(!testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(!testlook!(look, "𝛃 ", 5));
+        assert!(!testlook!(look, " 𝛃 ", 0));
+        assert!(!testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(!testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    #[cfg(all(not(miri), feature = "unicode-word-boundary"))]
+    fn look_matches_word_end_unicode() {
+        let look = Look::WordEndUnicode;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!testlook!(look, "a", 0));
+        assert!(testlook!(look, "a", 1));
+        assert!(testlook!(look, "a ", 1));
+        assert!(!testlook!(look, " a ", 1));
+        assert!(testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃", 0));
+        assert!(testlook!(look, "𝛃", 4));
+        assert!(testlook!(look, "𝛃 ", 4));
+        assert!(!testlook!(look, " 𝛃 ", 1));
+        assert!(testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 0));
+        assert!(testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(!testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(!testlook!(look, "a ", 2));
+        assert!(!testlook!(look, " a ", 0));
+        assert!(!testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(!testlook!(look, "𝛃 ", 5));
+        assert!(!testlook!(look, " 𝛃 ", 0));
+        assert!(!testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(!testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_matches_word_start_half_ascii() {
+        let look = Look::WordStartHalfAscii;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(testlook!(look, "a", 0));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a ", 1));
+        assert!(testlook!(look, " a ", 1));
+        assert!(!testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(testlook!(look, "𝛃", 0));
+        assert!(testlook!(look, "𝛃", 4));
+        assert!(testlook!(look, "𝛃 ", 4));
+        assert!(testlook!(look, " 𝛃 ", 1));
+        assert!(testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(testlook!(look, "𝛃𐆀", 0));
+        assert!(testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(testlook!(look, "a ", 2));
+        assert!(testlook!(look, " a ", 0));
+        assert!(testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(testlook!(look, "𝛃 ", 5));
+        assert!(testlook!(look, " 𝛃 ", 0));
+        assert!(testlook!(look, " 𝛃 ", 6));
+        assert!(testlook!(look, "𝛃", 1));
+        assert!(testlook!(look, "𝛃", 2));
+        assert!(testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(testlook!(look, "𝛃𐆀", 1));
+        assert!(testlook!(look, "𝛃𐆀", 2));
+        assert!(testlook!(look, "𝛃𐆀", 3));
+        assert!(testlook!(look, "𝛃𐆀", 5));
+        assert!(testlook!(look, "𝛃𐆀", 6));
+        assert!(testlook!(look, "𝛃𐆀", 7));
+        assert!(testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_matches_word_end_half_ascii() {
+        let look = Look::WordEndHalfAscii;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!testlook!(look, "a", 0));
+        assert!(testlook!(look, "a", 1));
+        assert!(testlook!(look, "a ", 1));
+        assert!(!testlook!(look, " a ", 1));
+        assert!(testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint. Since this is
+        // an ASCII word boundary, none of these match.
+        assert!(testlook!(look, "𝛃", 0));
+        assert!(testlook!(look, "𝛃", 4));
+        assert!(testlook!(look, "𝛃 ", 4));
+        assert!(testlook!(look, " 𝛃 ", 1));
+        assert!(testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints. Again, since
+        // this is an ASCII word boundary, none of these match.
+        assert!(testlook!(look, "𝛃𐆀", 0));
+        assert!(testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(testlook!(look, "a ", 2));
+        assert!(testlook!(look, " a ", 0));
+        assert!(testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(testlook!(look, "b𝛃", 1));
+        assert!(testlook!(look, "𝛃 ", 5));
+        assert!(testlook!(look, " 𝛃 ", 0));
+        assert!(testlook!(look, " 𝛃 ", 6));
+        assert!(testlook!(look, "𝛃", 1));
+        assert!(testlook!(look, "𝛃", 2));
+        assert!(testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(testlook!(look, "𝛃𐆀", 1));
+        assert!(testlook!(look, "𝛃𐆀", 2));
+        assert!(testlook!(look, "𝛃𐆀", 3));
+        assert!(testlook!(look, "𝛃𐆀", 5));
+        assert!(testlook!(look, "𝛃𐆀", 6));
+        assert!(testlook!(look, "𝛃𐆀", 7));
+        assert!(testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    #[cfg(all(not(miri), feature = "unicode-word-boundary"))]
+    fn look_matches_word_start_half_unicode() {
+        let look = Look::WordStartHalfUnicode;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(testlook!(look, "a", 0));
+        assert!(!testlook!(look, "a", 1));
+        assert!(!testlook!(look, "a ", 1));
+        assert!(testlook!(look, " a ", 1));
+        assert!(!testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(testlook!(look, "𝛃", 0));
+        assert!(!testlook!(look, "𝛃", 4));
+        assert!(!testlook!(look, "𝛃 ", 4));
+        assert!(testlook!(look, " 𝛃 ", 1));
+        assert!(!testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(testlook!(look, "𝛃𐆀", 0));
+        assert!(!testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(testlook!(look, "a ", 2));
+        assert!(testlook!(look, " a ", 0));
+        assert!(testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(testlook!(look, "𝛃 ", 5));
+        assert!(testlook!(look, " 𝛃 ", 0));
+        assert!(testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    #[cfg(all(not(miri), feature = "unicode-word-boundary"))]
+    fn look_matches_word_end_half_unicode() {
+        let look = Look::WordEndHalfUnicode;
+
+        // \xF0\x9D\x9B\x83 = 𝛃 (in \w)
+        // \xF0\x90\x86\x80 = 𐆀 (not in \w)
+
+        // Simple ASCII word boundaries.
+        assert!(!testlook!(look, "a", 0));
+        assert!(testlook!(look, "a", 1));
+        assert!(testlook!(look, "a ", 1));
+        assert!(!testlook!(look, " a ", 1));
+        assert!(testlook!(look, " a ", 2));
+
+        // Unicode word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃", 0));
+        assert!(testlook!(look, "𝛃", 4));
+        assert!(testlook!(look, "𝛃 ", 4));
+        assert!(!testlook!(look, " 𝛃 ", 1));
+        assert!(testlook!(look, " 𝛃 ", 5));
+
+        // Unicode word boundaries between non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 0));
+        assert!(testlook!(look, "𝛃𐆀", 4));
+
+        // Non word boundaries for ASCII.
+        assert!(testlook!(look, "", 0));
+        assert!(!testlook!(look, "ab", 1));
+        assert!(testlook!(look, "a ", 2));
+        assert!(testlook!(look, " a ", 0));
+        assert!(testlook!(look, " a ", 3));
+
+        // Non word boundaries with a non-ASCII codepoint.
+        assert!(!testlook!(look, "𝛃b", 4));
+        assert!(!testlook!(look, "b𝛃", 1));
+        assert!(testlook!(look, "𝛃 ", 5));
+        assert!(testlook!(look, " 𝛃 ", 0));
+        assert!(testlook!(look, " 𝛃 ", 6));
+        assert!(!testlook!(look, "𝛃", 1));
+        assert!(!testlook!(look, "𝛃", 2));
+        assert!(!testlook!(look, "𝛃", 3));
+
+        // Non word boundaries with non-ASCII codepoints.
+        assert!(!testlook!(look, "𝛃𐆀", 1));
+        assert!(!testlook!(look, "𝛃𐆀", 2));
+        assert!(!testlook!(look, "𝛃𐆀", 3));
+        assert!(!testlook!(look, "𝛃𐆀", 5));
+        assert!(!testlook!(look, "𝛃𐆀", 6));
+        assert!(!testlook!(look, "𝛃𐆀", 7));
+        assert!(testlook!(look, "𝛃𐆀", 8));
+    }
+
+    #[test]
+    fn look_set() {
+        let mut f = LookSet::default();
+        assert!(!f.contains(Look::Start));
+        assert!(!f.contains(Look::End));
+        assert!(!f.contains(Look::StartLF));
+        assert!(!f.contains(Look::EndLF));
+        assert!(!f.contains(Look::WordUnicode));
+        assert!(!f.contains(Look::WordUnicodeNegate));
+        assert!(!f.contains(Look::WordAscii));
+        assert!(!f.contains(Look::WordAsciiNegate));
+
+        f = f.insert(Look::Start);
+        assert!(f.contains(Look::Start));
+        f = f.remove(Look::Start);
+        assert!(!f.contains(Look::Start));
+
+        f = f.insert(Look::End);
+        assert!(f.contains(Look::End));
+        f = f.remove(Look::End);
+        assert!(!f.contains(Look::End));
+
+        f = f.insert(Look::StartLF);
+        assert!(f.contains(Look::StartLF));
+        f = f.remove(Look::StartLF);
+        assert!(!f.contains(Look::StartLF));
+
+        f = f.insert(Look::EndLF);
+        assert!(f.contains(Look::EndLF));
+        f = f.remove(Look::EndLF);
+        assert!(!f.contains(Look::EndLF));
+
+        f = f.insert(Look::StartCRLF);
+        assert!(f.contains(Look::StartCRLF));
+        f = f.remove(Look::StartCRLF);
+        assert!(!f.contains(Look::StartCRLF));
+
+        f = f.insert(Look::EndCRLF);
+        assert!(f.contains(Look::EndCRLF));
+        f = f.remove(Look::EndCRLF);
+        assert!(!f.contains(Look::EndCRLF));
+
+        f = f.insert(Look::WordUnicode);
+        assert!(f.contains(Look::WordUnicode));
+        f = f.remove(Look::WordUnicode);
+        assert!(!f.contains(Look::WordUnicode));
+
+        f = f.insert(Look::WordUnicodeNegate);
+        assert!(f.contains(Look::WordUnicodeNegate));
+        f = f.remove(Look::WordUnicodeNegate);
+        assert!(!f.contains(Look::WordUnicodeNegate));
+
+        f = f.insert(Look::WordAscii);
+        assert!(f.contains(Look::WordAscii));
+        f = f.remove(Look::WordAscii);
+        assert!(!f.contains(Look::WordAscii));
+
+        f = f.insert(Look::WordAsciiNegate);
+        assert!(f.contains(Look::WordAsciiNegate));
+        f = f.remove(Look::WordAsciiNegate);
+        assert!(!f.contains(Look::WordAsciiNegate));
+
+        f = f.insert(Look::WordStartAscii);
+        assert!(f.contains(Look::WordStartAscii));
+        f = f.remove(Look::WordStartAscii);
+        assert!(!f.contains(Look::WordStartAscii));
+
+        f = f.insert(Look::WordEndAscii);
+        assert!(f.contains(Look::WordEndAscii));
+        f = f.remove(Look::WordEndAscii);
+        assert!(!f.contains(Look::WordEndAscii));
+
+        f = f.insert(Look::WordStartUnicode);
+        assert!(f.contains(Look::WordStartUnicode));
+        f = f.remove(Look::WordStartUnicode);
+        assert!(!f.contains(Look::WordStartUnicode));
+
+        f = f.insert(Look::WordEndUnicode);
+        assert!(f.contains(Look::WordEndUnicode));
+        f = f.remove(Look::WordEndUnicode);
+        assert!(!f.contains(Look::WordEndUnicode));
+
+        f = f.insert(Look::WordStartHalfAscii);
+        assert!(f.contains(Look::WordStartHalfAscii));
+        f = f.remove(Look::WordStartHalfAscii);
+        assert!(!f.contains(Look::WordStartHalfAscii));
+
+        f = f.insert(Look::WordEndHalfAscii);
+        assert!(f.contains(Look::WordEndHalfAscii));
+        f = f.remove(Look::WordEndHalfAscii);
+        assert!(!f.contains(Look::WordEndHalfAscii));
+
+        f = f.insert(Look::WordStartHalfUnicode);
+        assert!(f.contains(Look::WordStartHalfUnicode));
+        f = f.remove(Look::WordStartHalfUnicode);
+        assert!(!f.contains(Look::WordStartHalfUnicode));
+
+        f = f.insert(Look::WordEndHalfUnicode);
+        assert!(f.contains(Look::WordEndHalfUnicode));
+        f = f.remove(Look::WordEndHalfUnicode);
+        assert!(!f.contains(Look::WordEndHalfUnicode));
+    }
+
+    #[test]
+    fn look_set_iter() {
+        let set = LookSet::empty();
+        assert_eq!(0, set.iter().count());
+
+        let set = LookSet::full();
+        assert_eq!(18, set.iter().count());
+
+        let set =
+            LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode);
+        assert_eq!(2, set.iter().count());
+
+        let set = LookSet::empty().insert(Look::StartLF);
+        assert_eq!(1, set.iter().count());
+
+        let set = LookSet::empty().insert(Look::WordAsciiNegate);
+        assert_eq!(1, set.iter().count());
+
+        let set = LookSet::empty().insert(Look::WordEndHalfUnicode);
+        assert_eq!(1, set.iter().count());
+    }
+
+    #[test]
+    #[cfg(feature = "alloc")]
+    fn look_set_debug() {
+        let res = alloc::format!("{:?}", LookSet::empty());
+        assert_eq!("∅", res);
+        let res = alloc::format!("{:?}", LookSet::full());
+        assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res);
+    }
+}
diff --git a/vendor/regex-automata/src/util/memchr.rs b/vendor/regex-automata/src/util/memchr.rs
new file mode 100644
index 0000000..a2cbb07
--- /dev/null
+++ b/vendor/regex-automata/src/util/memchr.rs
@@ -0,0 +1,93 @@
+/*!
+This module defines simple wrapper routines for the memchr functions from the
+`memchr` crate. Basically, when the `memchr` crate is available, we use it,
+otherwise we use a naive implementation which is still pretty fast.
+*/
+
+pub(crate) use self::inner::*;
+
+#[cfg(feature = "perf-literal-substring")]
+pub(super) mod inner {
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+        memchr::memchr(n1, haystack)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+        memchr::memchr2(n1, n2, haystack)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memchr3(
+        n1: u8,
+        n2: u8,
+        n3: u8,
+        haystack: &[u8],
+    ) -> Option<usize> {
+        memchr::memchr3(n1, n2, n3, haystack)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+        memchr::memrchr(n1, haystack)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+        memchr::memrchr2(n1, n2, haystack)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memrchr3(
+        n1: u8,
+        n2: u8,
+        n3: u8,
+        haystack: &[u8],
+    ) -> Option<usize> {
+        memchr::memrchr3(n1, n2, n3, haystack)
+    }
+}
+
+#[cfg(not(feature = "perf-literal-substring"))]
+pub(super) mod inner {
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+        haystack.iter().position(|&b| b == n1)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+        haystack.iter().position(|&b| b == n1 || b == n2)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memchr3(
+        n1: u8,
+        n2: u8,
+        n3: u8,
+        haystack: &[u8],
+    ) -> Option<usize> {
+        haystack.iter().position(|&b| b == n1 || b == n2 || b == n3)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+        haystack.iter().rposition(|&b| b == n1)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
+        haystack.iter().rposition(|&b| b == n1 || b == n2)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn memrchr3(
+        n1: u8,
+        n2: u8,
+        n3: u8,
+        haystack: &[u8],
+    ) -> Option<usize> {
+        haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3)
+    }
+}
diff --git a/vendor/regex-automata/src/util/mod.rs b/vendor/regex-automata/src/util/mod.rs
new file mode 100644
index 0000000..b3eef64
--- /dev/null
+++ b/vendor/regex-automata/src/util/mod.rs
@@ -0,0 +1,57 @@
+/*!
+A collection of modules that provide APIs that are useful across many regex
+engines.
+
+While one should explore the sub-modules directly to get a sense of what's
+there, here are some highlights that tie the sub-modules to higher level
+use cases:
+
+* `alphabet` contains APIs that are useful if you're doing low level things
+with the DFAs in this crate. For example, implementing determinization or
+walking its state graph directly.
+* `captures` contains APIs for dealing with capture group matches and their
+mapping to "slots" used inside an NFA graph. This is also where you can find
+iterators over capture group names.
+* `escape` contains types for pretty-printing raw byte slices as strings.
+* `iter` contains API helpers for writing regex iterators.
+* `lazy` contains a no-std and no-alloc variant of `lazy_static!` and
+`once_cell`.
+* `look` contains APIs for matching and configuring look-around assertions.
+* `pool` provides a way to reuse mutable memory allocated in a thread safe
+manner.
+* `prefilter` provides APIs for building prefilters and using them in searches.
+* `primitives` are what you might use if you're doing lower level work on
+automata, such as walking an NFA state graph.
+* `syntax` provides some higher level convenience functions for interacting
+with the `regex-syntax` crate.
+* `wire` is useful if you're working with DFA serialization.
+*/
+
+pub mod alphabet;
+#[cfg(feature = "alloc")]
+pub mod captures;
+pub mod escape;
+#[cfg(feature = "alloc")]
+pub mod interpolate;
+pub mod iter;
+pub mod lazy;
+pub mod look;
+#[cfg(feature = "alloc")]
+pub mod pool;
+pub mod prefilter;
+pub mod primitives;
+pub mod start;
+#[cfg(feature = "syntax")]
+pub mod syntax;
+pub mod wire;
+
+#[cfg(any(feature = "dfa-build", feature = "hybrid"))]
+pub(crate) mod determinize;
+pub(crate) mod empty;
+pub(crate) mod int;
+pub(crate) mod memchr;
+pub(crate) mod search;
+#[cfg(feature = "alloc")]
+pub(crate) mod sparse_set;
+pub(crate) mod unicode_data;
+pub(crate) mod utf8;
diff --git a/vendor/regex-automata/src/util/pool.rs b/vendor/regex-automata/src/util/pool.rs
new file mode 100644
index 0000000..d90d4ec
--- /dev/null
+++ b/vendor/regex-automata/src/util/pool.rs
@@ -0,0 +1,1199 @@
+// This module provides a relatively simple thread-safe pool of reusable
+// objects. For the most part, it's implemented by a stack represented by a
+// Mutex<Vec<T>>. It has one small trick: because unlocking a mutex is somewhat
+// costly, in the case where a pool is accessed by the first thread that tried
+// to get a value, we bypass the mutex. Here are some benchmarks showing the
+// difference.
+//
+// 2022-10-15: These benchmarks are from the old regex crate and they aren't
+// easy to reproduce because some rely on older implementations of Pool that
+// are no longer around. I've left the results here for posterity, but any
+// enterprising individual should feel encouraged to re-litigate the way Pool
+// works. I am not at all certain it is the best approach.
+//
+// 1) misc::anchored_literal_long_non_match    21 (18571 MB/s)
+// 2) misc::anchored_literal_long_non_match   107 (3644 MB/s)
+// 3) misc::anchored_literal_long_non_match    45 (8666 MB/s)
+// 4) misc::anchored_literal_long_non_match    19 (20526 MB/s)
+//
+// (1) represents our baseline: the master branch at the time of writing when
+// using the 'thread_local' crate to implement the pool below.
+//
+// (2) represents a naive pool implemented completely via Mutex<Vec<T>>. There
+// is no special trick for bypassing the mutex.
+//
+// (3) is the same as (2), except it uses Mutex<Vec<Box<T>>>. It is twice as
+// fast because a Box<T> is much smaller than the T we use with a Pool in this
+// crate. So pushing and popping a Box<T> from a Vec is quite a bit faster
+// than for T.
+//
+// (4) is the same as (3), but with the trick for bypassing the mutex in the
+// case of the first-to-get thread.
+//
+// Why move off of thread_local? Even though (4) is a hair faster than (1)
+// above, this was not the main goal. The main goal was to move off of
+// thread_local and find a way to *simply* re-capture some of its speed for
+// regex's specific case. So again, why move off of it? The *primary* reason is
+// because of memory leaks. See https://github.com/rust-lang/regex/issues/362
+// for example. (Why do I want it to be simple? Well, I suppose what I mean is,
+// "use as much safe code as possible to minimize risk and be as sure as I can
+// be that it is correct.")
+//
+// My guess is that the thread_local design is probably not appropriate for
+// regex since its memory usage scales to the number of active threads that
+// have used a regex, where as the pool below scales to the number of threads
+// that simultaneously use a regex. While neither case permits contraction,
+// since we own the pool data structure below, we can add contraction if a
+// clear use case pops up in the wild. More pressingly though, it seems that
+// there are at least some use case patterns where one might have many threads
+// sitting around that might have used a regex at one point. While thread_local
+// does try to reuse space previously used by a thread that has since stopped,
+// its maximal memory usage still scales with the total number of active
+// threads. In contrast, the pool below scales with the total number of threads
+// *simultaneously* using the pool. The hope is that this uses less memory
+// overall. And if it doesn't, we can hopefully tune it somehow.
+//
+// It seems that these sort of conditions happen frequently
+// in FFI inside of other more "managed" languages. This was
+// mentioned in the issue linked above, and also mentioned here:
+// https://github.com/BurntSushi/rure-go/issues/3. And in particular, users
+// confirm that disabling the use of thread_local resolves the leak.
+//
+// There were other weaker reasons for moving off of thread_local as well.
+// Namely, at the time, I was looking to reduce dependencies. And for something
+// like regex, maintenance can be simpler when we own the full dependency tree.
+//
+// Note that I am not entirely happy with this pool. It has some subtle
+// implementation details and is overall still observable (even with the
+// thread owner optimization) in benchmarks. If someone wants to take a crack
+// at building something better, please file an issue. Even if it means a
+// different API. The API exposed by this pool is not the minimal thing that
+// something like a 'Regex' actually needs. It could adapt to, for example,
+// an API more like what is found in the 'thread_local' crate. However, we do
+// really need to support the no-std alloc-only context, or else the regex
+// crate wouldn't be able to support no-std alloc-only. However, I'm generally
+// okay with making the alloc-only context slower (as it is here), although I
+// do find it unfortunate.
+
+/*!
+A thread safe memory pool.
+
+The principal type in this module is a [`Pool`]. It main use case is for
+holding a thread safe collection of mutable scratch spaces (usually called
+`Cache` in this crate) that regex engines need to execute a search. This then
+permits sharing the same read-only regex object across multiple threads while
+having a quick way of reusing scratch space in a thread safe way. This avoids
+needing to re-create the scratch space for every search, which could wind up
+being quite expensive.
+*/
+
+/// A thread safe pool that works in an `alloc`-only context.
+///
+/// Getting a value out comes with a guard. When that guard is dropped, the
+/// value is automatically put back in the pool. The guard provides both a
+/// `Deref` and a `DerefMut` implementation for easy access to an underlying
+/// `T`.
+///
+/// A `Pool` impls `Sync` when `T` is `Send` (even if `T` is not `Sync`). This
+/// is possible because a pool is guaranteed to provide a value to exactly one
+/// thread at any time.
+///
+/// Currently, a pool never contracts in size. Its size is proportional to the
+/// maximum number of simultaneous uses. This may change in the future.
+///
+/// A `Pool` is a particularly useful data structure for this crate because
+/// many of the regex engines require a mutable "cache" in order to execute
+/// a search. Since regexes themselves tend to be global, the problem is then:
+/// how do you get a mutable cache to execute a search? You could:
+///
+/// 1. Use a `thread_local!`, which requires the standard library and requires
+/// that the regex pattern be statically known.
+/// 2. Use a `Pool`.
+/// 3. Make the cache an explicit dependency in your code and pass it around.
+/// 4. Put the cache state in a `Mutex`, but this means only one search can
+/// execute at a time.
+/// 5. Create a new cache for every search.
+///
+/// A `thread_local!` is perhaps the best choice if it works for your use case.
+/// Putting the cache in a mutex or creating a new cache for every search are
+/// perhaps the worst choices. Of the remaining two choices, whether you use
+/// this `Pool` or thread through a cache explicitly in your code is a matter
+/// of taste and depends on your code architecture.
+///
+/// # Warning: may use a spin lock
+///
+/// When this crate is compiled _without_ the `std` feature, then this type
+/// may used a spin lock internally. This can have subtle effects that may
+/// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more
+/// thorough treatment of this topic.
+///
+/// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
+///
+/// # Example
+///
+/// This example shows how to share a single hybrid regex among multiple
+/// threads, while also safely getting exclusive access to a hybrid's
+/// [`Cache`](crate::hybrid::regex::Cache) without preventing other searches
+/// from running while your thread uses the `Cache`.
+///
+/// ```
+/// use regex_automata::{
+///     hybrid::regex::{Cache, Regex},
+///     util::{lazy::Lazy, pool::Pool},
+///     Match,
+/// };
+///
+/// static RE: Lazy<Regex> =
+///     Lazy::new(|| Regex::new("foo[0-9]+bar").unwrap());
+/// static CACHE: Lazy<Pool<Cache>> =
+///     Lazy::new(|| Pool::new(|| RE.create_cache()));
+///
+/// let expected = Some(Match::must(0, 3..14));
+/// assert_eq!(expected, RE.find(&mut CACHE.get(), b"zzzfoo12345barzzz"));
+/// ```
+pub struct Pool<T, F = fn() -> T>(alloc::boxed::Box<inner::Pool<T, F>>);
+
+impl<T, F> Pool<T, F> {
+    /// Create a new pool. The given closure is used to create values in
+    /// the pool when necessary.
+    pub fn new(create: F) -> Pool<T, F> {
+        Pool(alloc::boxed::Box::new(inner::Pool::new(create)))
+    }
+}
+
+impl<T: Send, F: Fn() -> T> Pool<T, F> {
+    /// Get a value from the pool. The caller is guaranteed to have
+    /// exclusive access to the given value. Namely, it is guaranteed that
+    /// this will never return a value that was returned by another call to
+    /// `get` but was not put back into the pool.
+    ///
+    /// When the guard goes out of scope and its destructor is called, then
+    /// it will automatically be put back into the pool. Alternatively,
+    /// [`PoolGuard::put`] may be used to explicitly put it back in the pool
+    /// without relying on its destructor.
+    ///
+    /// Note that there is no guarantee provided about which value in the
+    /// pool is returned. That is, calling get, dropping the guard (causing
+    /// the value to go back into the pool) and then calling get again is
+    /// *not* guaranteed to return the same value received in the first `get`
+    /// call.
+    #[inline]
+    pub fn get(&self) -> PoolGuard<'_, T, F> {
+        PoolGuard(self.0.get())
+    }
+}
+
+impl<T: core::fmt::Debug, F> core::fmt::Debug for Pool<T, F> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_tuple("Pool").field(&self.0).finish()
+    }
+}
+
+/// A guard that is returned when a caller requests a value from the pool.
+///
+/// The purpose of the guard is to use RAII to automatically put the value
+/// back in the pool once it's dropped.
+pub struct PoolGuard<'a, T: Send, F: Fn() -> T>(inner::PoolGuard<'a, T, F>);
+
+impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> {
+    /// Consumes this guard and puts it back into the pool.
+    ///
+    /// This circumvents the guard's `Drop` implementation. This can be useful
+    /// in circumstances where the automatic `Drop` results in poorer codegen,
+    /// such as calling non-inlined functions.
+    #[inline]
+    pub fn put(this: PoolGuard<'_, T, F>) {
+        inner::PoolGuard::put(this.0);
+    }
+}
+
+impl<'a, T: Send, F: Fn() -> T> core::ops::Deref for PoolGuard<'a, T, F> {
+    type Target = T;
+
+    #[inline]
+    fn deref(&self) -> &T {
+        self.0.value()
+    }
+}
+
+impl<'a, T: Send, F: Fn() -> T> core::ops::DerefMut for PoolGuard<'a, T, F> {
+    #[inline]
+    fn deref_mut(&mut self) -> &mut T {
+        self.0.value_mut()
+    }
+}
+
+impl<'a, T: Send + core::fmt::Debug, F: Fn() -> T> core::fmt::Debug
+    for PoolGuard<'a, T, F>
+{
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_tuple("PoolGuard").field(&self.0).finish()
+    }
+}
+
+#[cfg(feature = "std")]
+mod inner {
+    use core::{
+        cell::UnsafeCell,
+        panic::{RefUnwindSafe, UnwindSafe},
+        sync::atomic::{AtomicUsize, Ordering},
+    };
+
+    use alloc::{boxed::Box, vec, vec::Vec};
+
+    use std::{sync::Mutex, thread_local};
+
+    /// An atomic counter used to allocate thread IDs.
+    ///
+    /// We specifically start our counter at 3 so that we can use the values
+    /// less than it as sentinels.
+    static COUNTER: AtomicUsize = AtomicUsize::new(3);
+
+    /// A thread ID indicating that there is no owner. This is the initial
+    /// state of a pool. Once a pool has an owner, there is no way to change
+    /// it.
+    static THREAD_ID_UNOWNED: usize = 0;
+
+    /// A thread ID indicating that the special owner value is in use and not
+    /// available. This state is useful for avoiding a case where the owner
+    /// of a pool calls `get` before putting the result of a previous `get`
+    /// call back into the pool.
+    static THREAD_ID_INUSE: usize = 1;
+
+    /// This sentinel is used to indicate that a guard has already been dropped
+    /// and should not be re-dropped. We use this because our drop code can be
+    /// called outside of Drop and thus there could be a bug in the internal
+    /// implementation that results in trying to put the same guard back into
+    /// the same pool multiple times, and *that* could result in UB if we
+    /// didn't mark the guard as already having been put back in the pool.
+    ///
+    /// So this isn't strictly necessary, but this let's us define some
+    /// routines as safe (like PoolGuard::put_imp) that we couldn't otherwise
+    /// do.
+    static THREAD_ID_DROPPED: usize = 2;
+
+    /// The number of stacks we use inside of the pool. These are only used for
+    /// non-owners. That is, these represent the "slow" path.
+    ///
+    /// In the original implementation of this pool, we only used a single
+    /// stack. While this might be okay for a couple threads, the prevalence of
+    /// 32, 64 and even 128 core CPUs has made it untenable. The contention
+    /// such an environment introduces when threads are doing a lot of searches
+    /// on short haystacks (a not uncommon use case) is palpable and leads to
+    /// huge slowdowns.
+    ///
+    /// This constant reflects a change from using one stack to the number of
+    /// stacks that this constant is set to. The stack for a particular thread
+    /// is simply chosen by `thread_id % MAX_POOL_STACKS`. The idea behind
+    /// this setup is that there should be a good chance that accesses to the
+    /// pool will be distributed over several stacks instead of all of them
+    /// converging to one.
+    ///
+    /// This is not a particularly smart or dynamic strategy. Fixing this to a
+    /// specific number has at least two downsides. First is that it will help,
+    /// say, an 8 core CPU more than it will a 128 core CPU. (But, crucially,
+    /// it will still help the 128 core case.) Second is that this may wind
+    /// up being a little wasteful with respect to memory usage. Namely, if a
+    /// regex is used on one thread and then moved to another thread, then it
+    /// could result in creating a new copy of the data in the pool even though
+    /// only one is actually needed.
+    ///
+    /// And that memory usage bit is why this is set to 8 and not, say, 64.
+    /// Keeping it at 8 limits, to an extent, how much unnecessary memory can
+    /// be allocated.
+    ///
+    /// In an ideal world, we'd be able to have something like this:
+    ///
+    /// * Grow the number of stacks as the number of concurrent callers
+    /// increases. I spent a little time trying this, but even just adding an
+    /// atomic addition/subtraction for each pop/push for tracking concurrent
+    /// callers led to a big perf hit. Since even more work would seemingly be
+    /// required than just an addition/subtraction, I abandoned this approach.
+    /// * The maximum amount of memory used should scale with respect to the
+    /// number of concurrent callers and *not* the total number of existing
+    /// threads. This is primarily why the `thread_local` crate isn't used, as
+    /// as some environments spin up a lot of threads. This led to multiple
+    /// reports of extremely high memory usage (often described as memory
+    /// leaks).
+    /// * Even more ideally, the pool should contract in size. That is, it
+    /// should grow with bursts and then shrink. But this is a pretty thorny
+    /// issue to tackle and it might be better to just not.
+    /// * It would be nice to explore the use of, say, a lock-free stack
+    /// instead of using a mutex to guard a `Vec` that is ultimately just
+    /// treated as a stack. The main thing preventing me from exploring this
+    /// is the ABA problem. The `crossbeam` crate has tools for dealing with
+    /// this sort of problem (via its epoch based memory reclamation strategy),
+    /// but I can't justify bringing in all of `crossbeam` as a dependency of
+    /// `regex` for this.
+    ///
+    /// See this issue for more context and discussion:
+    /// https://github.com/rust-lang/regex/issues/934
+    const MAX_POOL_STACKS: usize = 8;
+
+    thread_local!(
+        /// A thread local used to assign an ID to a thread.
+        static THREAD_ID: usize = {
+            let next = COUNTER.fetch_add(1, Ordering::Relaxed);
+            // SAFETY: We cannot permit the reuse of thread IDs since reusing a
+            // thread ID might result in more than one thread "owning" a pool,
+            // and thus, permit accessing a mutable value from multiple threads
+            // simultaneously without synchronization. The intent of this panic
+            // is to be a sanity check. It is not expected that the thread ID
+            // space will actually be exhausted in practice. Even on a 32-bit
+            // system, it would require spawning 2^32 threads (although they
+            // wouldn't all need to run simultaneously, so it is in theory
+            // possible).
+            //
+            // This checks that the counter never wraps around, since atomic
+            // addition wraps around on overflow.
+            if next == 0 {
+                panic!("regex: thread ID allocation space exhausted");
+            }
+            next
+        };
+    );
+
+    /// This puts each stack in the pool below into its own cache line. This is
+    /// an absolutely critical optimization that tends to have the most impact
+    /// in high contention workloads. Without forcing each mutex protected
+    /// into its own cache line, high contention exacerbates the performance
+    /// problem by causing "false sharing." By putting each mutex in its own
+    /// cache-line, we avoid the false sharing problem and the affects of
+    /// contention are greatly reduced.
+    #[derive(Debug)]
+    #[repr(C, align(64))]
+    struct CacheLine<T>(T);
+
+    /// A thread safe pool utilizing std-only features.
+    ///
+    /// The main difference between this and the simplistic alloc-only pool is
+    /// the use of std::sync::Mutex and an "owner thread" optimization that
+    /// makes accesses by the owner of a pool faster than all other threads.
+    /// This makes the common case of running a regex within a single thread
+    /// faster by avoiding mutex unlocking.
+    pub(super) struct Pool<T, F> {
+        /// A function to create more T values when stack is empty and a caller
+        /// has requested a T.
+        create: F,
+        /// Multiple stacks of T values to hand out. These are used when a Pool
+        /// is accessed by a thread that didn't create it.
+        ///
+        /// Conceptually this is `Mutex<Vec<Box<T>>>`, but sharded out to make
+        /// it scale better under high contention work-loads. We index into
+        /// this sequence via `thread_id % stacks.len()`.
+        stacks: Vec<CacheLine<Mutex<Vec<Box<T>>>>>,
+        /// The ID of the thread that owns this pool. The owner is the thread
+        /// that makes the first call to 'get'. When the owner calls 'get', it
+        /// gets 'owner_val' directly instead of returning a T from 'stack'.
+        /// See comments elsewhere for details, but this is intended to be an
+        /// optimization for the common case that makes getting a T faster.
+        ///
+        /// It is initialized to a value of zero (an impossible thread ID) as a
+        /// sentinel to indicate that it is unowned.
+        owner: AtomicUsize,
+        /// A value to return when the caller is in the same thread that
+        /// first called `Pool::get`.
+        ///
+        /// This is set to None when a Pool is first created, and set to Some
+        /// once the first thread calls Pool::get.
+        owner_val: UnsafeCell<Option<T>>,
+    }
+
+    // SAFETY: Since we want to use a Pool from multiple threads simultaneously
+    // behind an Arc, we need for it to be Sync. In cases where T is sync,
+    // Pool<T> would be Sync. However, since we use a Pool to store mutable
+    // scratch space, we wind up using a T that has interior mutability and is
+    // thus itself not Sync. So what we *really* want is for our Pool<T> to by
+    // Sync even when T is not Sync (but is at least Send).
+    //
+    // The only non-sync aspect of a Pool is its 'owner_val' field, which is
+    // used to implement faster access to a pool value in the common case of
+    // a pool being accessed in the same thread in which it was created. The
+    // 'stack' field is also shared, but a Mutex<T> where T: Send is already
+    // Sync. So we only need to worry about 'owner_val'.
+    //
+    // The key is to guarantee that 'owner_val' can only ever be accessed from
+    // one thread. In our implementation below, we guarantee this by only
+    // returning the 'owner_val' when the ID of the current thread matches the
+    // ID of the thread that first called 'Pool::get'. Since this can only ever
+    // be one thread, it follows that only one thread can access 'owner_val' at
+    // any point in time. Thus, it is safe to declare that Pool<T> is Sync when
+    // T is Send.
+    //
+    // If there is a way to achieve our performance goals using safe code, then
+    // I would very much welcome a patch. As it stands, the implementation
+    // below tries to balance safety with performance. The case where a Regex
+    // is used from multiple threads simultaneously will suffer a bit since
+    // getting a value out of the pool will require unlocking a mutex.
+    //
+    // We require `F: Send + Sync` because we call `F` at any point on demand,
+    // potentially from multiple threads simultaneously.
+    unsafe impl<T: Send, F: Send + Sync> Sync for Pool<T, F> {}
+
+    // If T is UnwindSafe, then since we provide exclusive access to any
+    // particular value in the pool, the pool should therefore also be
+    // considered UnwindSafe.
+    //
+    // We require `F: UnwindSafe + RefUnwindSafe` because we call `F` at any
+    // point on demand, so it needs to be unwind safe on both dimensions for
+    // the entire Pool to be unwind safe.
+    impl<T: UnwindSafe, F: UnwindSafe + RefUnwindSafe> UnwindSafe for Pool<T, F> {}
+
+    // If T is UnwindSafe, then since we provide exclusive access to any
+    // particular value in the pool, the pool should therefore also be
+    // considered RefUnwindSafe.
+    //
+    // We require `F: UnwindSafe + RefUnwindSafe` because we call `F` at any
+    // point on demand, so it needs to be unwind safe on both dimensions for
+    // the entire Pool to be unwind safe.
+    impl<T: UnwindSafe, F: UnwindSafe + RefUnwindSafe> RefUnwindSafe
+        for Pool<T, F>
+    {
+    }
+
+    impl<T, F> Pool<T, F> {
+        /// Create a new pool. The given closure is used to create values in
+        /// the pool when necessary.
+        pub(super) fn new(create: F) -> Pool<T, F> {
+            // FIXME: Now that we require 1.65+, Mutex::new is available as
+            // const... So we can almost mark this function as const. But of
+            // course, we're creating a Vec of stacks below (we didn't when I
+            // originally wrote this code). It seems like the best way to work
+            // around this would be to use a `[Stack; MAX_POOL_STACKS]` instead
+            // of a `Vec<Stack>`. I refrained from making this change at time
+            // of writing (2023/10/08) because I was making a lot of other
+            // changes at the same time and wanted to do this more carefully.
+            // Namely, because of the cache line optimization, that `[Stack;
+            // MAX_POOL_STACKS]` would be quite big. It's unclear how bad (if
+            // at all) that would be.
+            //
+            // Another choice would be to lazily allocate the stacks, but...
+            // I'm not so sure about that. Seems like a fair bit of complexity?
+            //
+            // Maybe there's a simple solution I'm missing.
+            //
+            // ... OK, I tried to fix this. First, I did it by putting `stacks`
+            // in an `UnsafeCell` and using a `Once` to lazily initialize it.
+            // I benchmarked it and everything looked okay. I then made this
+            // function `const` and thought I was just about done. But the
+            // public pool type wraps its inner pool in a `Box` to keep its
+            // size down. Blech.
+            //
+            // So then I thought that I could push the box down into this
+            // type (and leave the non-std version unboxed) and use the same
+            // `UnsafeCell` technique to lazily initialize it. This has the
+            // downside of the `Once` now needing to get hit in the owner fast
+            // path, but maybe that's OK? However, I then realized that we can
+            // only lazily initialize `stacks`, `owner` and `owner_val`. The
+            // `create` function needs to be put somewhere outside of the box.
+            // So now the pool is a `Box`, `Once` and a function. Now we're
+            // starting to defeat the point of boxing in the first place. So I
+            // backed out that change too.
+            //
+            // Back to square one. I maybe we just don't make a pool's
+            // constructor const and live with it. It's probably not a huge
+            // deal.
+            let mut stacks = Vec::with_capacity(MAX_POOL_STACKS);
+            for _ in 0..stacks.capacity() {
+                stacks.push(CacheLine(Mutex::new(vec![])));
+            }
+            let owner = AtomicUsize::new(THREAD_ID_UNOWNED);
+            let owner_val = UnsafeCell::new(None); // init'd on first access
+            Pool { create, stacks, owner, owner_val }
+        }
+    }
+
+    impl<T: Send, F: Fn() -> T> Pool<T, F> {
+        /// Get a value from the pool. This may block if another thread is also
+        /// attempting to retrieve a value from the pool.
+        #[inline]
+        pub(super) fn get(&self) -> PoolGuard<'_, T, F> {
+            // Our fast path checks if the caller is the thread that "owns"
+            // this pool. Or stated differently, whether it is the first thread
+            // that tried to extract a value from the pool. If it is, then we
+            // can return a T to the caller without going through a mutex.
+            //
+            // SAFETY: We must guarantee that only one thread gets access
+            // to this value. Since a thread is uniquely identified by the
+            // THREAD_ID thread local, it follows that if the caller's thread
+            // ID is equal to the owner, then only one thread may receive this
+            // value. This is also why we can get away with what looks like a
+            // racy load and a store. We know that if 'owner == caller', then
+            // only one thread can be here, so we don't need to worry about any
+            // other thread setting the owner to something else.
+            let caller = THREAD_ID.with(|id| *id);
+            let owner = self.owner.load(Ordering::Acquire);
+            if caller == owner {
+                // N.B. We could also do a CAS here instead of a load/store,
+                // but ad hoc benchmarking suggests it is slower. And a lot
+                // slower in the case where `get_slow` is common.
+                self.owner.store(THREAD_ID_INUSE, Ordering::Release);
+                return self.guard_owned(caller);
+            }
+            self.get_slow(caller, owner)
+        }
+
+        /// This is the "slow" version that goes through a mutex to pop an
+        /// allocated value off a stack to return to the caller. (Or, if the
+        /// stack is empty, a new value is created.)
+        ///
+        /// If the pool has no owner, then this will set the owner.
+        #[cold]
+        fn get_slow(
+            &self,
+            caller: usize,
+            owner: usize,
+        ) -> PoolGuard<'_, T, F> {
+            if owner == THREAD_ID_UNOWNED {
+                // This sentinel means this pool is not yet owned. We try to
+                // atomically set the owner. If we do, then this thread becomes
+                // the owner and we can return a guard that represents the
+                // special T for the owner.
+                //
+                // Note that we set the owner to a different sentinel that
+                // indicates that the owned value is in use. The owner ID will
+                // get updated to the actual ID of this thread once the guard
+                // returned by this function is put back into the pool.
+                let res = self.owner.compare_exchange(
+                    THREAD_ID_UNOWNED,
+                    THREAD_ID_INUSE,
+                    Ordering::AcqRel,
+                    Ordering::Acquire,
+                );
+                if res.is_ok() {
+                    // SAFETY: A successful CAS above implies this thread is
+                    // the owner and that this is the only such thread that
+                    // can reach here. Thus, there is no data race.
+                    unsafe {
+                        *self.owner_val.get() = Some((self.create)());
+                    }
+                    return self.guard_owned(caller);
+                }
+            }
+            let stack_id = caller % self.stacks.len();
+            // We try to acquire exclusive access to this thread's stack, and
+            // if so, grab a value from it if we can. We put this in a loop so
+            // that it's easy to tweak and experiment with a different number
+            // of tries. In the end, I couldn't see anything obviously better
+            // than one attempt in ad hoc testing.
+            for _ in 0..1 {
+                let mut stack = match self.stacks[stack_id].0.try_lock() {
+                    Err(_) => continue,
+                    Ok(stack) => stack,
+                };
+                if let Some(value) = stack.pop() {
+                    return self.guard_stack(value);
+                }
+                // Unlock the mutex guarding the stack before creating a fresh
+                // value since we no longer need the stack.
+                drop(stack);
+                let value = Box::new((self.create)());
+                return self.guard_stack(value);
+            }
+            // We're only here if we could get access to our stack, so just
+            // create a new value. This seems like it could be wasteful, but
+            // waiting for exclusive access to a stack when there's high
+            // contention is brutal for perf.
+            self.guard_stack_transient(Box::new((self.create)()))
+        }
+
+        /// Puts a value back into the pool. Callers don't need to call this.
+        /// Once the guard that's returned by 'get' is dropped, it is put back
+        /// into the pool automatically.
+        #[inline]
+        fn put_value(&self, value: Box<T>) {
+            let caller = THREAD_ID.with(|id| *id);
+            let stack_id = caller % self.stacks.len();
+            // As with trying to pop a value from this thread's stack, we
+            // merely attempt to get access to push this value back on the
+            // stack. If there's too much contention, we just give up and throw
+            // the value away.
+            //
+            // Interestingly, in ad hoc benchmarking, it is beneficial to
+            // attempt to push the value back more than once, unlike when
+            // popping the value. I don't have a good theory for why this is.
+            // I guess if we drop too many values then that winds up forcing
+            // the pop operation to create new fresh values and thus leads to
+            // less reuse. There's definitely a balancing act here.
+            for _ in 0..10 {
+                let mut stack = match self.stacks[stack_id].0.try_lock() {
+                    Err(_) => continue,
+                    Ok(stack) => stack,
+                };
+                stack.push(value);
+                return;
+            }
+        }
+
+        /// Create a guard that represents the special owned T.
+        #[inline]
+        fn guard_owned(&self, caller: usize) -> PoolGuard<'_, T, F> {
+            PoolGuard { pool: self, value: Err(caller), discard: false }
+        }
+
+        /// Create a guard that contains a value from the pool's stack.
+        #[inline]
+        fn guard_stack(&self, value: Box<T>) -> PoolGuard<'_, T, F> {
+            PoolGuard { pool: self, value: Ok(value), discard: false }
+        }
+
+        /// Create a guard that contains a value from the pool's stack with an
+        /// instruction to throw away the value instead of putting it back
+        /// into the pool.
+        #[inline]
+        fn guard_stack_transient(&self, value: Box<T>) -> PoolGuard<'_, T, F> {
+            PoolGuard { pool: self, value: Ok(value), discard: true }
+        }
+    }
+
+    impl<T: core::fmt::Debug, F> core::fmt::Debug for Pool<T, F> {
+        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+            f.debug_struct("Pool")
+                .field("stacks", &self.stacks)
+                .field("owner", &self.owner)
+                .field("owner_val", &self.owner_val)
+                .finish()
+        }
+    }
+
+    /// A guard that is returned when a caller requests a value from the pool.
+    pub(super) struct PoolGuard<'a, T: Send, F: Fn() -> T> {
+        /// The pool that this guard is attached to.
+        pool: &'a Pool<T, F>,
+        /// This is Err when the guard represents the special "owned" value.
+        /// In which case, the value is retrieved from 'pool.owner_val'. And
+        /// in the special case of `Err(THREAD_ID_DROPPED)`, it means the
+        /// guard has been put back into the pool and should no longer be used.
+        value: Result<Box<T>, usize>,
+        /// When true, the value should be discarded instead of being pushed
+        /// back into the pool. We tend to use this under high contention, and
+        /// this allows us to avoid inflating the size of the pool. (Because
+        /// under contention, we tend to create more values instead of waiting
+        /// for access to a stack of existing values.)
+        discard: bool,
+    }
+
+    impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> {
+        /// Return the underlying value.
+        #[inline]
+        pub(super) fn value(&self) -> &T {
+            match self.value {
+                Ok(ref v) => &**v,
+                // SAFETY: This is safe because the only way a PoolGuard gets
+                // created for self.value=Err is when the current thread
+                // corresponds to the owning thread, of which there can only
+                // be one. Thus, we are guaranteed to be providing exclusive
+                // access here which makes this safe.
+                //
+                // Also, since 'owner_val' is guaranteed to be initialized
+                // before an owned PoolGuard is created, the unchecked unwrap
+                // is safe.
+                Err(id) => unsafe {
+                    // This assert is *not* necessary for safety, since we
+                    // should never be here if the guard had been put back into
+                    // the pool. This is a sanity check to make sure we didn't
+                    // break an internal invariant.
+                    debug_assert_ne!(THREAD_ID_DROPPED, id);
+                    (*self.pool.owner_val.get()).as_ref().unwrap_unchecked()
+                },
+            }
+        }
+
+        /// Return the underlying value as a mutable borrow.
+        #[inline]
+        pub(super) fn value_mut(&mut self) -> &mut T {
+            match self.value {
+                Ok(ref mut v) => &mut **v,
+                // SAFETY: This is safe because the only way a PoolGuard gets
+                // created for self.value=None is when the current thread
+                // corresponds to the owning thread, of which there can only
+                // be one. Thus, we are guaranteed to be providing exclusive
+                // access here which makes this safe.
+                //
+                // Also, since 'owner_val' is guaranteed to be initialized
+                // before an owned PoolGuard is created, the unwrap_unchecked
+                // is safe.
+                Err(id) => unsafe {
+                    // This assert is *not* necessary for safety, since we
+                    // should never be here if the guard had been put back into
+                    // the pool. This is a sanity check to make sure we didn't
+                    // break an internal invariant.
+                    debug_assert_ne!(THREAD_ID_DROPPED, id);
+                    (*self.pool.owner_val.get()).as_mut().unwrap_unchecked()
+                },
+            }
+        }
+
+        /// Consumes this guard and puts it back into the pool.
+        #[inline]
+        pub(super) fn put(this: PoolGuard<'_, T, F>) {
+            // Since this is effectively consuming the guard and putting the
+            // value back into the pool, there's no reason to run its Drop
+            // impl after doing this. I don't believe there is a correctness
+            // problem with doing so, but there's definitely a perf problem
+            // by redoing this work. So we avoid it.
+            let mut this = core::mem::ManuallyDrop::new(this);
+            this.put_imp();
+        }
+
+        /// Puts this guard back into the pool by only borrowing the guard as
+        /// mutable. This should be called at most once.
+        #[inline(always)]
+        fn put_imp(&mut self) {
+            match core::mem::replace(&mut self.value, Err(THREAD_ID_DROPPED)) {
+                Ok(value) => {
+                    // If we were told to discard this value then don't bother
+                    // trying to put it back into the pool. This occurs when
+                    // the pop operation failed to acquire a lock and we
+                    // decided to create a new value in lieu of contending for
+                    // the lock.
+                    if self.discard {
+                        return;
+                    }
+                    self.pool.put_value(value);
+                }
+                // If this guard has a value "owned" by the thread, then
+                // the Pool guarantees that this is the ONLY such guard.
+                // Therefore, in order to place it back into the pool and make
+                // it available, we need to change the owner back to the owning
+                // thread's ID. But note that we use the ID that was stored in
+                // the guard, since a guard can be moved to another thread and
+                // dropped. (A previous iteration of this code read from the
+                // THREAD_ID thread local, which uses the ID of the current
+                // thread which may not be the ID of the owning thread! This
+                // also avoids the TLS access, which is likely a hair faster.)
+                Err(owner) => {
+                    // If we hit this point, it implies 'put_imp' has been
+                    // called multiple times for the same guard which in turn
+                    // corresponds to a bug in this implementation.
+                    assert_ne!(THREAD_ID_DROPPED, owner);
+                    self.pool.owner.store(owner, Ordering::Release);
+                }
+            }
+        }
+    }
+
+    impl<'a, T: Send, F: Fn() -> T> Drop for PoolGuard<'a, T, F> {
+        #[inline]
+        fn drop(&mut self) {
+            self.put_imp();
+        }
+    }
+
+    impl<'a, T: Send + core::fmt::Debug, F: Fn() -> T> core::fmt::Debug
+        for PoolGuard<'a, T, F>
+    {
+        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+            f.debug_struct("PoolGuard")
+                .field("pool", &self.pool)
+                .field("value", &self.value)
+                .finish()
+        }
+    }
+}
+
+// FUTURE: We should consider using Mara Bos's nearly-lock-free version of this
+// here: https://gist.github.com/m-ou-se/5fdcbdf7dcf4585199ce2de697f367a4.
+//
+// One reason why I did things with a "mutex" below is that it isolates the
+// safety concerns to just the Mutex, where as the safety of Mara's pool is a
+// bit more sprawling. I also expect this code to not be used that much, and
+// so is unlikely to get as much real world usage with which to test it. That
+// means the "obviously correct" lever is an important one.
+//
+// The specific reason to use Mara's pool is that it is likely faster and also
+// less likely to hit problems with spin-locks, although it is not completely
+// impervious to them.
+//
+// The best solution to this problem, probably, is a truly lock free pool. That
+// could be done with a lock free linked list. The issue is the ABA problem. It
+// is difficult to avoid, and doing so is complex. BUT, the upshot of that is
+// that if we had a truly lock free pool, then we could also use it above in
+// the 'std' pool instead of a Mutex because it should be completely free the
+// problems that come from spin-locks.
+#[cfg(not(feature = "std"))]
+mod inner {
+    use core::{
+        cell::UnsafeCell,
+        panic::{RefUnwindSafe, UnwindSafe},
+        sync::atomic::{AtomicBool, Ordering},
+    };
+
+    use alloc::{boxed::Box, vec, vec::Vec};
+
+    /// A thread safe pool utilizing alloc-only features.
+    ///
+    /// Unlike the std version, it doesn't seem possible(?) to implement the
+    /// "thread owner" optimization because alloc-only doesn't have any concept
+    /// of threads. So the best we can do is just a normal stack. This will
+    /// increase latency in alloc-only environments.
+    pub(super) struct Pool<T, F> {
+        /// A stack of T values to hand out. These are used when a Pool is
+        /// accessed by a thread that didn't create it.
+        stack: Mutex<Vec<Box<T>>>,
+        /// A function to create more T values when stack is empty and a caller
+        /// has requested a T.
+        create: F,
+    }
+
+    // If T is UnwindSafe, then since we provide exclusive access to any
+    // particular value in the pool, it should therefore also be considered
+    // RefUnwindSafe.
+    impl<T: UnwindSafe, F: UnwindSafe> RefUnwindSafe for Pool<T, F> {}
+
+    impl<T, F> Pool<T, F> {
+        /// Create a new pool. The given closure is used to create values in
+        /// the pool when necessary.
+        pub(super) const fn new(create: F) -> Pool<T, F> {
+            Pool { stack: Mutex::new(vec![]), create }
+        }
+    }
+
+    impl<T: Send, F: Fn() -> T> Pool<T, F> {
+        /// Get a value from the pool. This may block if another thread is also
+        /// attempting to retrieve a value from the pool.
+        #[inline]
+        pub(super) fn get(&self) -> PoolGuard<'_, T, F> {
+            let mut stack = self.stack.lock();
+            let value = match stack.pop() {
+                None => Box::new((self.create)()),
+                Some(value) => value,
+            };
+            PoolGuard { pool: self, value: Some(value) }
+        }
+
+        #[inline]
+        fn put(&self, guard: PoolGuard<'_, T, F>) {
+            let mut guard = core::mem::ManuallyDrop::new(guard);
+            if let Some(value) = guard.value.take() {
+                self.put_value(value);
+            }
+        }
+
+        /// Puts a value back into the pool. Callers don't need to call this.
+        /// Once the guard that's returned by 'get' is dropped, it is put back
+        /// into the pool automatically.
+        #[inline]
+        fn put_value(&self, value: Box<T>) {
+            let mut stack = self.stack.lock();
+            stack.push(value);
+        }
+    }
+
+    impl<T: core::fmt::Debug, F> core::fmt::Debug for Pool<T, F> {
+        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+            f.debug_struct("Pool").field("stack", &self.stack).finish()
+        }
+    }
+
+    /// A guard that is returned when a caller requests a value from the pool.
+    pub(super) struct PoolGuard<'a, T: Send, F: Fn() -> T> {
+        /// The pool that this guard is attached to.
+        pool: &'a Pool<T, F>,
+        /// This is None after the guard has been put back into the pool.
+        value: Option<Box<T>>,
+    }
+
+    impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> {
+        /// Return the underlying value.
+        #[inline]
+        pub(super) fn value(&self) -> &T {
+            self.value.as_deref().unwrap()
+        }
+
+        /// Return the underlying value as a mutable borrow.
+        #[inline]
+        pub(super) fn value_mut(&mut self) -> &mut T {
+            self.value.as_deref_mut().unwrap()
+        }
+
+        /// Consumes this guard and puts it back into the pool.
+        #[inline]
+        pub(super) fn put(this: PoolGuard<'_, T, F>) {
+            // Since this is effectively consuming the guard and putting the
+            // value back into the pool, there's no reason to run its Drop
+            // impl after doing this. I don't believe there is a correctness
+            // problem with doing so, but there's definitely a perf problem
+            // by redoing this work. So we avoid it.
+            let mut this = core::mem::ManuallyDrop::new(this);
+            this.put_imp();
+        }
+
+        /// Puts this guard back into the pool by only borrowing the guard as
+        /// mutable. This should be called at most once.
+        #[inline(always)]
+        fn put_imp(&mut self) {
+            if let Some(value) = self.value.take() {
+                self.pool.put_value(value);
+            }
+        }
+    }
+
+    impl<'a, T: Send, F: Fn() -> T> Drop for PoolGuard<'a, T, F> {
+        #[inline]
+        fn drop(&mut self) {
+            self.put_imp();
+        }
+    }
+
+    impl<'a, T: Send + core::fmt::Debug, F: Fn() -> T> core::fmt::Debug
+        for PoolGuard<'a, T, F>
+    {
+        fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+            f.debug_struct("PoolGuard")
+                .field("pool", &self.pool)
+                .field("value", &self.value)
+                .finish()
+        }
+    }
+
+    /// A spin-lock based mutex. Yes, I have read spinlocks cosnidered
+    /// harmful[1], and if there's a reasonable alternative choice, I'll
+    /// happily take it.
+    ///
+    /// I suspect the most likely alternative here is a Treiber stack, but
+    /// implementing one correctly in a way that avoids the ABA problem looks
+    /// subtle enough that I'm not sure I want to attempt that. But otherwise,
+    /// we only need a mutex in order to implement our pool, so if there's
+    /// something simpler we can use that works for our `Pool` use case, then
+    /// that would be great.
+    ///
+    /// Note that this mutex does not do poisoning.
+    ///
+    /// [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html
+    #[derive(Debug)]
+    struct Mutex<T> {
+        locked: AtomicBool,
+        data: UnsafeCell<T>,
+    }
+
+    // SAFETY: Since a Mutex guarantees exclusive access, as long as we can
+    // send it across threads, it must also be Sync.
+    unsafe impl<T: Send> Sync for Mutex<T> {}
+
+    impl<T> Mutex<T> {
+        /// Create a new mutex for protecting access to the given value across
+        /// multiple threads simultaneously.
+        const fn new(value: T) -> Mutex<T> {
+            Mutex {
+                locked: AtomicBool::new(false),
+                data: UnsafeCell::new(value),
+            }
+        }
+
+        /// Lock this mutex and return a guard providing exclusive access to
+        /// `T`. This blocks if some other thread has already locked this
+        /// mutex.
+        #[inline]
+        fn lock(&self) -> MutexGuard<'_, T> {
+            while self
+                .locked
+                .compare_exchange(
+                    false,
+                    true,
+                    Ordering::AcqRel,
+                    Ordering::Acquire,
+                )
+                .is_err()
+            {
+                core::hint::spin_loop();
+            }
+            // SAFETY: The only way we're here is if we successfully set
+            // 'locked' to true, which implies we must be the only thread here
+            // and thus have exclusive access to 'data'.
+            let data = unsafe { &mut *self.data.get() };
+            MutexGuard { locked: &self.locked, data }
+        }
+    }
+
+    /// A guard that derefs to &T and &mut T. When it's dropped, the lock is
+    /// released.
+    #[derive(Debug)]
+    struct MutexGuard<'a, T> {
+        locked: &'a AtomicBool,
+        data: &'a mut T,
+    }
+
+    impl<'a, T> core::ops::Deref for MutexGuard<'a, T> {
+        type Target = T;
+
+        #[inline]
+        fn deref(&self) -> &T {
+            self.data
+        }
+    }
+
+    impl<'a, T> core::ops::DerefMut for MutexGuard<'a, T> {
+        #[inline]
+        fn deref_mut(&mut self) -> &mut T {
+            self.data
+        }
+    }
+
+    impl<'a, T> Drop for MutexGuard<'a, T> {
+        #[inline]
+        fn drop(&mut self) {
+            // Drop means 'data' is no longer accessible, so we can unlock
+            // the mutex.
+            self.locked.store(false, Ordering::Release);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use core::panic::{RefUnwindSafe, UnwindSafe};
+
+    use alloc::{boxed::Box, vec, vec::Vec};
+
+    use super::*;
+
+    #[test]
+    fn oibits() {
+        fn assert_oitbits<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
+        assert_oitbits::<Pool<Vec<u32>>>();
+        assert_oitbits::<Pool<core::cell::RefCell<Vec<u32>>>>();
+        assert_oitbits::<
+            Pool<
+                Vec<u32>,
+                Box<
+                    dyn Fn() -> Vec<u32>
+                        + Send
+                        + Sync
+                        + UnwindSafe
+                        + RefUnwindSafe,
+                >,
+            >,
+        >();
+    }
+
+    // Tests that Pool implements the "single owner" optimization. That is, the
+    // thread that first accesses the pool gets its own copy, while all other
+    // threads get distinct copies.
+    #[cfg(feature = "std")]
+    #[test]
+    fn thread_owner_optimization() {
+        use std::{cell::RefCell, sync::Arc, vec};
+
+        let pool: Arc<Pool<RefCell<Vec<char>>>> =
+            Arc::new(Pool::new(|| RefCell::new(vec!['a'])));
+        pool.get().borrow_mut().push('x');
+
+        let pool1 = pool.clone();
+        let t1 = std::thread::spawn(move || {
+            let guard = pool1.get();
+            guard.borrow_mut().push('y');
+        });
+
+        let pool2 = pool.clone();
+        let t2 = std::thread::spawn(move || {
+            let guard = pool2.get();
+            guard.borrow_mut().push('z');
+        });
+
+        t1.join().unwrap();
+        t2.join().unwrap();
+
+        // If we didn't implement the single owner optimization, then one of
+        // the threads above is likely to have mutated the [a, x] vec that
+        // we stuffed in the pool before spawning the threads. But since
+        // neither thread was first to access the pool, and because of the
+        // optimization, we should be guaranteed that neither thread mutates
+        // the special owned pool value.
+        //
+        // (Technically this is an implementation detail and not a contract of
+        // Pool's API.)
+        assert_eq!(vec!['a', 'x'], *pool.get().borrow());
+    }
+
+    // This tests that if the "owner" of a pool asks for two values, then it
+    // gets two distinct values and not the same one. This test failed in the
+    // course of developing the pool, which in turn resulted in UB because it
+    // permitted getting aliasing &mut borrows to the same place in memory.
+    #[test]
+    fn thread_owner_distinct() {
+        let pool = Pool::new(|| vec!['a']);
+
+        {
+            let mut g1 = pool.get();
+            let v1 = &mut *g1;
+            let mut g2 = pool.get();
+            let v2 = &mut *g2;
+            v1.push('b');
+            v2.push('c');
+            assert_eq!(&mut vec!['a', 'b'], v1);
+            assert_eq!(&mut vec!['a', 'c'], v2);
+        }
+        // This isn't technically guaranteed, but we
+        // expect to now get the "owned" value (the first
+        // call to 'get()' above) now that it's back in
+        // the pool.
+        assert_eq!(&mut vec!['a', 'b'], &mut *pool.get());
+    }
+
+    // This tests that we can share a guard with another thread, mutate the
+    // underlying value and everything works. This failed in the course of
+    // developing a pool since the pool permitted 'get()' to return the same
+    // value to the owner thread, even before the previous value was put back
+    // into the pool. This in turn resulted in this test producing a data race.
+    #[cfg(feature = "std")]
+    #[test]
+    fn thread_owner_sync() {
+        let pool = Pool::new(|| vec!['a']);
+        {
+            let mut g1 = pool.get();
+            let mut g2 = pool.get();
+            std::thread::scope(|s| {
+                s.spawn(|| {
+                    g1.push('b');
+                });
+                s.spawn(|| {
+                    g2.push('c');
+                });
+            });
+
+            let v1 = &mut *g1;
+            let v2 = &mut *g2;
+            assert_eq!(&mut vec!['a', 'b'], v1);
+            assert_eq!(&mut vec!['a', 'c'], v2);
+        }
+
+        // This isn't technically guaranteed, but we
+        // expect to now get the "owned" value (the first
+        // call to 'get()' above) now that it's back in
+        // the pool.
+        assert_eq!(&mut vec!['a', 'b'], &mut *pool.get());
+    }
+
+    // This tests that if we move a PoolGuard that is owned by the current
+    // thread to another thread and drop it, then the thread owner doesn't
+    // change. During development of the pool, this test failed because the
+    // PoolGuard assumed it was dropped in the same thread from which it was
+    // created, and thus used the current thread's ID as the owner, which could
+    // be different than the actual owner of the pool.
+    #[cfg(feature = "std")]
+    #[test]
+    fn thread_owner_send_drop() {
+        let pool = Pool::new(|| vec!['a']);
+        // Establishes this thread as the owner.
+        {
+            pool.get().push('b');
+        }
+        std::thread::scope(|s| {
+            // Sanity check that we get the same value back.
+            // (Not technically guaranteed.)
+            let mut g = pool.get();
+            assert_eq!(&vec!['a', 'b'], &*g);
+            // Now push it to another thread and drop it.
+            s.spawn(move || {
+                g.push('c');
+            })
+            .join()
+            .unwrap();
+        });
+        // Now check that we're still the owner. This is not technically
+        // guaranteed by the API, but is true in practice given the thread
+        // owner optimization.
+        assert_eq!(&vec!['a', 'b', 'c'], &*pool.get());
+    }
+}
diff --git a/vendor/regex-automata/src/util/prefilter/aho_corasick.rs b/vendor/regex-automata/src/util/prefilter/aho_corasick.rs
new file mode 100644
index 0000000..50cce82
--- /dev/null
+++ b/vendor/regex-automata/src/util/prefilter/aho_corasick.rs
@@ -0,0 +1,149 @@
+use crate::util::{
+    prefilter::PrefilterI,
+    search::{MatchKind, Span},
+};
+
+#[derive(Clone, Debug)]
+pub(crate) struct AhoCorasick {
+    #[cfg(not(feature = "perf-literal-multisubstring"))]
+    _unused: (),
+    #[cfg(feature = "perf-literal-multisubstring")]
+    ac: aho_corasick::AhoCorasick,
+}
+
+impl AhoCorasick {
+    pub(crate) fn new<B: AsRef<[u8]>>(
+        kind: MatchKind,
+        needles: &[B],
+    ) -> Option<AhoCorasick> {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            None
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            // We used to use `aho_corasick::MatchKind::Standard` here when
+            // `kind` was `MatchKind::All`, but this is not correct. The
+            // "standard" Aho-Corasick match semantics are to report a match
+            // immediately as soon as it is seen, but `All` isn't like that.
+            // In particular, with "standard" semantics, given the needles
+            // "abc" and "b" and the haystack "abc," it would report a match
+            // at offset 1 before a match at offset 0. This is never what we
+            // want in the context of the regex engine, regardless of whether
+            // we have leftmost-first or 'all' semantics. Namely, we always
+            // want the leftmost match.
+            let ac_match_kind = match kind {
+                MatchKind::LeftmostFirst | MatchKind::All => {
+                    aho_corasick::MatchKind::LeftmostFirst
+                }
+            };
+            // This is kind of just an arbitrary number, but basically, if we
+            // have a small enough set of literals, then we try to use the VERY
+            // memory hungry DFA. Otherwise, we whimp out and use an NFA. The
+            // upshot is that the NFA is quite lean and decently fast. Faster
+            // than a naive Aho-Corasick NFA anyway.
+            let ac_kind = if needles.len() <= 500 {
+                aho_corasick::AhoCorasickKind::DFA
+            } else {
+                aho_corasick::AhoCorasickKind::ContiguousNFA
+            };
+            let result = aho_corasick::AhoCorasick::builder()
+                .kind(Some(ac_kind))
+                .match_kind(ac_match_kind)
+                .start_kind(aho_corasick::StartKind::Both)
+                // We try to handle all of the prefilter cases in the super
+                // module, and only use Aho-Corasick for the actual automaton.
+                // The aho-corasick crate does have some extra prefilters,
+                // namely, looking for rare bytes to feed to memchr{,2,3}
+                // instead of just the first byte. If we end up wanting
+                // those---and they are somewhat tricky to implement---then
+                // we could port them to this crate.
+                //
+                // The main reason for doing things this way is so we have a
+                // complete and easy to understand picture of which prefilters
+                // are available and how they work. Otherwise it seems too
+                // easy to get into a situation where we have a prefilter
+                // layered on top of prefilter, and that might have unintended
+                // consequences.
+                .prefilter(false)
+                .build(needles);
+            let ac = match result {
+                Ok(ac) => ac,
+                Err(_err) => {
+                    debug!("aho-corasick prefilter failed to build: {}", _err);
+                    return None;
+                }
+            };
+            Some(AhoCorasick { ac })
+        }
+    }
+}
+
+impl PrefilterI for AhoCorasick {
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            let input =
+                aho_corasick::Input::new(haystack).span(span.start..span.end);
+            self.ac
+                .find(input)
+                .map(|m| Span { start: m.start(), end: m.end() })
+        }
+    }
+
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            let input = aho_corasick::Input::new(haystack)
+                .anchored(aho_corasick::Anchored::Yes)
+                .span(span.start..span.end);
+            self.ac
+                .find(input)
+                .map(|m| Span { start: m.start(), end: m.end() })
+        }
+    }
+
+    fn memory_usage(&self) -> usize {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            self.ac.memory_usage()
+        }
+    }
+
+    fn is_fast(&self) -> bool {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            // Aho-Corasick is never considered "fast" because it's never
+            // going to be even close to an order of magnitude faster than the
+            // regex engine itself (assuming a DFA is used). In fact, it is
+            // usually slower. The magic of Aho-Corasick is that it can search
+            // a *large* number of literals with a relatively small amount of
+            // memory. The regex engines are far more wasteful.
+            //
+            // Aho-Corasick may be "fast" when the regex engine corresponds
+            // to, say, the PikeVM. That happens when the lazy DFA couldn't be
+            // built or used for some reason. But in these cases, the regex
+            // itself is likely quite big and we're probably hosed no matter
+            // what we do. (In this case, the best bet is for the caller to
+            // increase some of the memory limits on the hybrid cache capacity
+            // and hope that's enough.)
+            false
+        }
+    }
+}
diff --git a/vendor/regex-automata/src/util/prefilter/byteset.rs b/vendor/regex-automata/src/util/prefilter/byteset.rs
new file mode 100644
index 0000000..a669d6c
--- /dev/null
+++ b/vendor/regex-automata/src/util/prefilter/byteset.rs
@@ -0,0 +1,58 @@
+use crate::util::{
+    prefilter::PrefilterI,
+    search::{MatchKind, Span},
+};
+
+#[derive(Clone, Debug)]
+pub(crate) struct ByteSet([bool; 256]);
+
+impl ByteSet {
+    pub(crate) fn new<B: AsRef<[u8]>>(
+        _kind: MatchKind,
+        needles: &[B],
+    ) -> Option<ByteSet> {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            None
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            let mut set = [false; 256];
+            for needle in needles.iter() {
+                let needle = needle.as_ref();
+                if needle.len() != 1 {
+                    return None;
+                }
+                set[usize::from(needle[0])] = true;
+            }
+            Some(ByteSet(set))
+        }
+    }
+}
+
+impl PrefilterI for ByteSet {
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        haystack[span].iter().position(|&b| self.0[usize::from(b)]).map(|i| {
+            let start = span.start + i;
+            let end = start + 1;
+            Span { start, end }
+        })
+    }
+
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        let b = *haystack.get(span.start)?;
+        if self.0[usize::from(b)] {
+            Some(Span { start: span.start, end: span.start + 1 })
+        } else {
+            None
+        }
+    }
+
+    fn memory_usage(&self) -> usize {
+        0
+    }
+
+    fn is_fast(&self) -> bool {
+        false
+    }
+}
diff --git a/vendor/regex-automata/src/util/prefilter/memchr.rs b/vendor/regex-automata/src/util/prefilter/memchr.rs
new file mode 100644
index 0000000..3d44b83
--- /dev/null
+++ b/vendor/regex-automata/src/util/prefilter/memchr.rs
@@ -0,0 +1,186 @@
+use crate::util::{
+    prefilter::PrefilterI,
+    search::{MatchKind, Span},
+};
+
+#[derive(Clone, Debug)]
+pub(crate) struct Memchr(u8);
+
+impl Memchr {
+    pub(crate) fn new<B: AsRef<[u8]>>(
+        _kind: MatchKind,
+        needles: &[B],
+    ) -> Option<Memchr> {
+        #[cfg(not(feature = "perf-literal-substring"))]
+        {
+            None
+        }
+        #[cfg(feature = "perf-literal-substring")]
+        {
+            if needles.len() != 1 {
+                return None;
+            }
+            if needles[0].as_ref().len() != 1 {
+                return None;
+            }
+            Some(Memchr(needles[0].as_ref()[0]))
+        }
+    }
+}
+
+impl PrefilterI for Memchr {
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "perf-literal-substring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-substring")]
+        {
+            memchr::memchr(self.0, &haystack[span]).map(|i| {
+                let start = span.start + i;
+                let end = start + 1;
+                Span { start, end }
+            })
+        }
+    }
+
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        let b = *haystack.get(span.start)?;
+        if self.0 == b {
+            Some(Span { start: span.start, end: span.start + 1 })
+        } else {
+            None
+        }
+    }
+
+    fn memory_usage(&self) -> usize {
+        0
+    }
+
+    fn is_fast(&self) -> bool {
+        true
+    }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct Memchr2(u8, u8);
+
+impl Memchr2 {
+    pub(crate) fn new<B: AsRef<[u8]>>(
+        _kind: MatchKind,
+        needles: &[B],
+    ) -> Option<Memchr2> {
+        #[cfg(not(feature = "perf-literal-substring"))]
+        {
+            None
+        }
+        #[cfg(feature = "perf-literal-substring")]
+        {
+            if needles.len() != 2 {
+                return None;
+            }
+            if !needles.iter().all(|n| n.as_ref().len() == 1) {
+                return None;
+            }
+            let b1 = needles[0].as_ref()[0];
+            let b2 = needles[1].as_ref()[0];
+            Some(Memchr2(b1, b2))
+        }
+    }
+}
+
+impl PrefilterI for Memchr2 {
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "perf-literal-substring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-substring")]
+        {
+            memchr::memchr2(self.0, self.1, &haystack[span]).map(|i| {
+                let start = span.start + i;
+                let end = start + 1;
+                Span { start, end }
+            })
+        }
+    }
+
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        let b = *haystack.get(span.start)?;
+        if self.0 == b || self.1 == b {
+            Some(Span { start: span.start, end: span.start + 1 })
+        } else {
+            None
+        }
+    }
+
+    fn memory_usage(&self) -> usize {
+        0
+    }
+
+    fn is_fast(&self) -> bool {
+        true
+    }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct Memchr3(u8, u8, u8);
+
+impl Memchr3 {
+    pub(crate) fn new<B: AsRef<[u8]>>(
+        _kind: MatchKind,
+        needles: &[B],
+    ) -> Option<Memchr3> {
+        #[cfg(not(feature = "perf-literal-substring"))]
+        {
+            None
+        }
+        #[cfg(feature = "perf-literal-substring")]
+        {
+            if needles.len() != 3 {
+                return None;
+            }
+            if !needles.iter().all(|n| n.as_ref().len() == 1) {
+                return None;
+            }
+            let b1 = needles[0].as_ref()[0];
+            let b2 = needles[1].as_ref()[0];
+            let b3 = needles[2].as_ref()[0];
+            Some(Memchr3(b1, b2, b3))
+        }
+    }
+}
+
+impl PrefilterI for Memchr3 {
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "perf-literal-substring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-substring")]
+        {
+            memchr::memchr3(self.0, self.1, self.2, &haystack[span]).map(|i| {
+                let start = span.start + i;
+                let end = start + 1;
+                Span { start, end }
+            })
+        }
+    }
+
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        let b = *haystack.get(span.start)?;
+        if self.0 == b || self.1 == b || self.2 == b {
+            Some(Span { start: span.start, end: span.start + 1 })
+        } else {
+            None
+        }
+    }
+
+    fn memory_usage(&self) -> usize {
+        0
+    }
+
+    fn is_fast(&self) -> bool {
+        true
+    }
+}
diff --git a/vendor/regex-automata/src/util/prefilter/memmem.rs b/vendor/regex-automata/src/util/prefilter/memmem.rs
new file mode 100644
index 0000000..deea17b
--- /dev/null
+++ b/vendor/regex-automata/src/util/prefilter/memmem.rs
@@ -0,0 +1,88 @@
+use crate::util::{
+    prefilter::PrefilterI,
+    search::{MatchKind, Span},
+};
+
+#[derive(Clone, Debug)]
+pub(crate) struct Memmem {
+    #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))]
+    _unused: (),
+    #[cfg(all(feature = "std", feature = "perf-literal-substring"))]
+    finder: memchr::memmem::Finder<'static>,
+}
+
+impl Memmem {
+    pub(crate) fn new<B: AsRef<[u8]>>(
+        _kind: MatchKind,
+        needles: &[B],
+    ) -> Option<Memmem> {
+        #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))]
+        {
+            None
+        }
+        #[cfg(all(feature = "std", feature = "perf-literal-substring"))]
+        {
+            if needles.len() != 1 {
+                return None;
+            }
+            let needle = needles[0].as_ref();
+            let finder = memchr::memmem::Finder::new(needle).into_owned();
+            Some(Memmem { finder })
+        }
+    }
+}
+
+impl PrefilterI for Memmem {
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))]
+        {
+            unreachable!()
+        }
+        #[cfg(all(feature = "std", feature = "perf-literal-substring"))]
+        {
+            self.finder.find(&haystack[span]).map(|i| {
+                let start = span.start + i;
+                let end = start + self.finder.needle().len();
+                Span { start, end }
+            })
+        }
+    }
+
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))]
+        {
+            unreachable!()
+        }
+        #[cfg(all(feature = "std", feature = "perf-literal-substring"))]
+        {
+            let needle = self.finder.needle();
+            if haystack[span].starts_with(needle) {
+                Some(Span { end: span.start + needle.len(), ..span })
+            } else {
+                None
+            }
+        }
+    }
+
+    fn memory_usage(&self) -> usize {
+        #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))]
+        {
+            unreachable!()
+        }
+        #[cfg(all(feature = "std", feature = "perf-literal-substring"))]
+        {
+            self.finder.needle().len()
+        }
+    }
+
+    fn is_fast(&self) -> bool {
+        #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))]
+        {
+            unreachable!()
+        }
+        #[cfg(all(feature = "std", feature = "perf-literal-substring"))]
+        {
+            true
+        }
+    }
+}
diff --git a/vendor/regex-automata/src/util/prefilter/mod.rs b/vendor/regex-automata/src/util/prefilter/mod.rs
new file mode 100644
index 0000000..51fc922
--- /dev/null
+++ b/vendor/regex-automata/src/util/prefilter/mod.rs
@@ -0,0 +1,696 @@
+/*!
+Defines a prefilter for accelerating regex searches.
+
+A prefilter can be created by building a [`Prefilter`] value.
+
+A prefilter represents one of the most important optimizations available for
+accelerating regex searches. The idea of a prefilter is to very quickly find
+candidate locations in a haystack where a regex _could_ match. Once a candidate
+is found, it is then intended for the regex engine to run at that position to
+determine whether the candidate is a match or a false positive.
+
+In the aforementioned description of the prefilter optimization also lay its
+demise. Namely, if a prefilter has a high false positive rate and it produces
+lots of candidates, then a prefilter can overall make a regex search slower.
+It can run more slowly because more time is spent ping-ponging between the
+prefilter search and the regex engine attempting to confirm each candidate as
+a match. This ping-ponging has overhead that adds up, and is exacerbated by
+a high false positive rate.
+
+Nevertheless, the optimization is still generally worth performing in most
+cases. Particularly given just how much throughput can be improved. (It is not
+uncommon for prefilter optimizations to improve throughput by one or two orders
+of magnitude.)
+
+Typically a prefilter is used to find occurrences of literal prefixes from a
+regex pattern, but this isn't required. A prefilter can be used to look for
+suffixes or even inner literals.
+
+Note that as of now, prefilters throw away information about which pattern
+each literal comes from. In other words, when a prefilter finds a match,
+there's no way to know which pattern (or patterns) it came from. Therefore,
+in order to confirm a match, you'll have to check all of the patterns by
+running the full regex engine.
+*/
+
+mod aho_corasick;
+mod byteset;
+mod memchr;
+mod memmem;
+mod teddy;
+
+use core::{
+    borrow::Borrow,
+    fmt::Debug,
+    panic::{RefUnwindSafe, UnwindSafe},
+};
+
+#[cfg(feature = "alloc")]
+use alloc::sync::Arc;
+
+#[cfg(feature = "syntax")]
+use regex_syntax::hir::{literal, Hir};
+
+use crate::util::search::{MatchKind, Span};
+
+pub(crate) use crate::util::prefilter::{
+    aho_corasick::AhoCorasick,
+    byteset::ByteSet,
+    memchr::{Memchr, Memchr2, Memchr3},
+    memmem::Memmem,
+    teddy::Teddy,
+};
+
+/// A prefilter for accelerating regex searches.
+///
+/// If you already have your literals that you want to search with,
+/// then the vanilla [`Prefilter::new`] constructor is for you. But
+/// if you have an [`Hir`] value from the `regex-syntax` crate, then
+/// [`Prefilter::from_hir_prefix`] might be more convenient. Namely, it uses
+/// the [`regex-syntax::hir::literal`](regex_syntax::hir::literal) module to
+/// extract literal prefixes for you, optimize them and then select and build a
+/// prefilter matcher.
+///
+/// A prefilter must have **zero false negatives**. However, by its very
+/// nature, it may produce false positives. That is, a prefilter will never
+/// skip over a position in the haystack that corresponds to a match of the
+/// original regex pattern, but it *may* produce a match for a position
+/// in the haystack that does *not* correspond to a match of the original
+/// regex pattern. If you use either the [`Prefilter::from_hir_prefix`] or
+/// [`Prefilter::from_hirs_prefix`] constructors, then this guarantee is
+/// upheld for you automatically. This guarantee is not preserved if you use
+/// [`Prefilter::new`] though, since it is up to the caller to provide correct
+/// literal strings with respect to the original regex pattern.
+///
+/// # Cloning
+///
+/// It is an API guarantee that cloning a prefilter is cheap. That is, cloning
+/// it will not duplicate whatever heap memory is used to represent the
+/// underlying matcher.
+///
+/// # Example
+///
+/// This example shows how to attach a `Prefilter` to the
+/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) in order to accelerate
+/// searches.
+///
+/// ```
+/// use regex_automata::{
+///     nfa::thompson::pikevm::PikeVM,
+///     util::prefilter::Prefilter,
+///     Match, MatchKind,
+/// };
+///
+/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Bruce "])
+///     .expect("a prefilter");
+/// let re = PikeVM::builder()
+///     .configure(PikeVM::config().prefilter(Some(pre)))
+///     .build(r"Bruce \w+")?;
+/// let mut cache = re.create_cache();
+/// assert_eq!(
+///     Some(Match::must(0, 6..23)),
+///     re.find(&mut cache, "Hello Bruce Springsteen!"),
+/// );
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// But note that if you get your prefilter incorrect, it could lead to an
+/// incorrect result!
+///
+/// ```
+/// use regex_automata::{
+///     nfa::thompson::pikevm::PikeVM,
+///     util::prefilter::Prefilter,
+///     Match, MatchKind,
+/// };
+///
+/// // This prefilter is wrong!
+/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Patti "])
+///     .expect("a prefilter");
+/// let re = PikeVM::builder()
+///     .configure(PikeVM::config().prefilter(Some(pre)))
+///     .build(r"Bruce \w+")?;
+/// let mut cache = re.create_cache();
+/// // We find no match even though the regex does match.
+/// assert_eq!(
+///     None,
+///     re.find(&mut cache, "Hello Bruce Springsteen!"),
+/// );
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Debug)]
+pub struct Prefilter {
+    #[cfg(not(feature = "alloc"))]
+    _unused: (),
+    #[cfg(feature = "alloc")]
+    pre: Arc<dyn PrefilterI>,
+    #[cfg(feature = "alloc")]
+    is_fast: bool,
+}
+
+impl Prefilter {
+    /// Create a new prefilter from a sequence of needles and a corresponding
+    /// match semantics.
+    ///
+    /// This may return `None` for a variety of reasons, for example, if
+    /// a suitable prefilter could not be constructed. That might occur
+    /// if they are unavailable (e.g., the `perf-literal-substring` and
+    /// `perf-literal-multisubstring` features aren't enabled), or it might
+    /// occur because of heuristics or other artifacts of how the prefilter
+    /// works.
+    ///
+    /// Note that if you have an [`Hir`] expression, it may be more convenient
+    /// to use [`Prefilter::from_hir_prefix`]. It will automatically handle the
+    /// task of extracting prefix literals for you.
+    ///
+    /// # Example
+    ///
+    /// This example shows how match semantics can impact the matching
+    /// algorithm used by the prefilter. For this reason, it is important to
+    /// ensure that the match semantics given here are consistent with the
+    /// match semantics intended for the regular expression that the literals
+    /// were extracted from.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     util::{prefilter::Prefilter, syntax},
+    ///     MatchKind, Span,
+    /// };
+    ///
+    /// let hay = "Hello samwise";
+    ///
+    /// // With leftmost-first, we find 'samwise' here because it comes
+    /// // before 'sam' in the sequence we give it..
+    /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["samwise", "sam"])
+    ///     .expect("a prefilter");
+    /// assert_eq!(
+    ///     Some(Span::from(6..13)),
+    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    /// // Still with leftmost-first but with the literals reverse, now 'sam'
+    /// // will match instead!
+    /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["sam", "samwise"])
+    ///     .expect("a prefilter");
+    /// assert_eq!(
+    ///     Some(Span::from(6..9)),
+    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn new<B: AsRef<[u8]>>(
+        kind: MatchKind,
+        needles: &[B],
+    ) -> Option<Prefilter> {
+        Choice::new(kind, needles).and_then(Prefilter::from_choice)
+    }
+
+    /// This turns a prefilter selection into a `Prefilter`. That is, in turns
+    /// the enum given into a trait object.
+    fn from_choice(choice: Choice) -> Option<Prefilter> {
+        #[cfg(not(feature = "alloc"))]
+        {
+            None
+        }
+        #[cfg(feature = "alloc")]
+        {
+            let pre: Arc<dyn PrefilterI> = match choice {
+                Choice::Memchr(p) => Arc::new(p),
+                Choice::Memchr2(p) => Arc::new(p),
+                Choice::Memchr3(p) => Arc::new(p),
+                Choice::Memmem(p) => Arc::new(p),
+                Choice::Teddy(p) => Arc::new(p),
+                Choice::ByteSet(p) => Arc::new(p),
+                Choice::AhoCorasick(p) => Arc::new(p),
+            };
+            let is_fast = pre.is_fast();
+            Some(Prefilter { pre, is_fast })
+        }
+    }
+
+    /// This attempts to extract prefixes from the given `Hir` expression for
+    /// the given match semantics, and if possible, builds a prefilter for
+    /// them.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to build a prefilter directly from an [`Hir`]
+    /// expression, and use to find an occurrence of a prefix from the regex
+    /// pattern.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     util::{prefilter::Prefilter, syntax},
+    ///     MatchKind, Span,
+    /// };
+    ///
+    /// let hir = syntax::parse(r"(Bruce|Patti) \w+")?;
+    /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
+    ///     .expect("a prefilter");
+    /// let hay = "Hello Patti Scialfa!";
+    /// assert_eq!(
+    ///     Some(Span::from(6..12)),
+    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "syntax")]
+    pub fn from_hir_prefix(kind: MatchKind, hir: &Hir) -> Option<Prefilter> {
+        Prefilter::from_hirs_prefix(kind, &[hir])
+    }
+
+    /// This attempts to extract prefixes from the given `Hir` expressions for
+    /// the given match semantics, and if possible, builds a prefilter for
+    /// them.
+    ///
+    /// Note that as of now, prefilters throw away information about which
+    /// pattern each literal comes from. In other words, when a prefilter finds
+    /// a match, there's no way to know which pattern (or patterns) it came
+    /// from. Therefore, in order to confirm a match, you'll have to check all
+    /// of the patterns by running the full regex engine.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to build a prefilter directly from multiple
+    /// `Hir` expressions expression, and use it to find an occurrence of a
+    /// prefix from the regex patterns.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     util::{prefilter::Prefilter, syntax},
+    ///     MatchKind, Span,
+    /// };
+    ///
+    /// let hirs = syntax::parse_many(&[
+    ///     r"(Bruce|Patti) \w+",
+    ///     r"Mrs?\. Doubtfire",
+    /// ])?;
+    /// let pre = Prefilter::from_hirs_prefix(MatchKind::LeftmostFirst, &hirs)
+    ///     .expect("a prefilter");
+    /// let hay = "Hello Mrs. Doubtfire";
+    /// assert_eq!(
+    ///     Some(Span::from(6..20)),
+    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "syntax")]
+    pub fn from_hirs_prefix<H: Borrow<Hir>>(
+        kind: MatchKind,
+        hirs: &[H],
+    ) -> Option<Prefilter> {
+        prefixes(kind, hirs)
+            .literals()
+            .and_then(|lits| Prefilter::new(kind, lits))
+    }
+
+    /// Run this prefilter on `haystack[span.start..end]` and return a matching
+    /// span if one exists.
+    ///
+    /// The span returned is guaranteed to have a start position greater than
+    /// or equal to the one given, and an end position less than or equal to
+    /// the one given.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to build a prefilter directly from an [`Hir`]
+    /// expression, and use it to find an occurrence of a prefix from the regex
+    /// pattern.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     util::{prefilter::Prefilter, syntax},
+    ///     MatchKind, Span,
+    /// };
+    ///
+    /// let hir = syntax::parse(r"Bruce \w+")?;
+    /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
+    ///     .expect("a prefilter");
+    /// let hay = "Hello Bruce Springsteen!";
+    /// assert_eq!(
+    ///     Some(Span::from(6..12)),
+    ///     pre.find(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "alloc"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "alloc")]
+        {
+            self.pre.find(haystack, span)
+        }
+    }
+
+    /// Returns the span of a prefix of `haystack[span.start..span.end]` if
+    /// the prefilter matches.
+    ///
+    /// The span returned is guaranteed to have a start position equivalent to
+    /// the one given, and an end position less than or equal to the one given.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to build a prefilter directly from an [`Hir`]
+    /// expression, and use it to find an occurrence of a prefix from the regex
+    /// pattern that begins at the start of a haystack only.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     util::{prefilter::Prefilter, syntax},
+    ///     MatchKind, Span,
+    /// };
+    ///
+    /// let hir = syntax::parse(r"Bruce \w+")?;
+    /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir)
+    ///     .expect("a prefilter");
+    /// let hay = "Hello Bruce Springsteen!";
+    /// // Nothing is found here because 'Bruce' does
+    /// // not occur at the beginning of our search.
+    /// assert_eq!(
+    ///     None,
+    ///     pre.prefix(hay.as_bytes(), Span::from(0..hay.len())),
+    /// );
+    /// // But if we change where we start the search
+    /// // to begin where 'Bruce ' begins, then a
+    /// // match will be found.
+    /// assert_eq!(
+    ///     Some(Span::from(6..12)),
+    ///     pre.prefix(hay.as_bytes(), Span::from(6..hay.len())),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "alloc"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "alloc")]
+        {
+            self.pre.prefix(haystack, span)
+        }
+    }
+
+    /// Returns the heap memory, in bytes, used by the underlying prefilter.
+    #[inline]
+    pub fn memory_usage(&self) -> usize {
+        #[cfg(not(feature = "alloc"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "alloc")]
+        {
+            self.pre.memory_usage()
+        }
+    }
+
+    /// Implementations might return true here if they believe themselves to
+    /// be "fast." The concept of "fast" is deliberately left vague, but in
+    /// practice this usually corresponds to whether it's believed that SIMD
+    /// will be used.
+    ///
+    /// Why do we care about this? Well, some prefilter tricks tend to come
+    /// with their own bits of overhead, and so might only make sense if we
+    /// know that a scan will be *much* faster than the regex engine itself.
+    /// Otherwise, the trick may not be worth doing. Whether something is
+    /// "much" faster than the regex engine generally boils down to whether
+    /// SIMD is used. (But not always. Even a SIMD matcher with a high false
+    /// positive rate can become quite slow.)
+    ///
+    /// Even if this returns true, it is still possible for the prefilter to
+    /// be "slow." Remember, prefilters are just heuristics. We can't really
+    /// *know* a prefilter will be fast without actually trying the prefilter.
+    /// (Which of course we cannot afford to do.)
+    #[inline]
+    pub(crate) fn is_fast(&self) -> bool {
+        #[cfg(not(feature = "alloc"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "alloc")]
+        {
+            self.is_fast
+        }
+    }
+}
+
+/// A trait for abstracting over prefilters. Basically, a prefilter is
+/// something that do an unanchored *and* an anchored search in a haystack
+/// within a given span.
+///
+/// This exists pretty much only so that we can use prefilters as a trait
+/// object (which is what `Prefilter` is). If we ever move off of trait objects
+/// and to an enum, then it's likely this trait could be removed.
+pub(crate) trait PrefilterI:
+    Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static
+{
+    /// Run this prefilter on `haystack[span.start..end]` and return a matching
+    /// span if one exists.
+    ///
+    /// The span returned is guaranteed to have a start position greater than
+    /// or equal to the one given, and an end position less than or equal to
+    /// the one given.
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span>;
+
+    /// Returns the span of a prefix of `haystack[span.start..span.end]` if
+    /// the prefilter matches.
+    ///
+    /// The span returned is guaranteed to have a start position equivalent to
+    /// the one given, and an end position less than or equal to the one given.
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span>;
+
+    /// Returns the heap memory, in bytes, used by the underlying prefilter.
+    fn memory_usage(&self) -> usize;
+
+    /// Implementations might return true here if they believe themselves to
+    /// be "fast." See [`Prefilter::is_fast`] for more details.
+    fn is_fast(&self) -> bool;
+}
+
+#[cfg(feature = "alloc")]
+impl<P: PrefilterI + ?Sized> PrefilterI for Arc<P> {
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        (&**self).find(haystack, span)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        (&**self).prefix(haystack, span)
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn memory_usage(&self) -> usize {
+        (&**self).memory_usage()
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_fast(&self) -> bool {
+        (&**self).is_fast()
+    }
+}
+
+/// A type that encapsulates the selection of a prefilter algorithm from a
+/// sequence of needles.
+///
+/// The existence of this type is a little tricky, because we don't (currently)
+/// use it for performing a search. Instead, we really only consume it by
+/// converting the underlying prefilter into a trait object, whether that be
+/// `dyn PrefilterI` or `dyn Strategy` (for the meta regex engine). In order
+/// to avoid re-copying the prefilter selection logic, we isolate it here, and
+/// then force anything downstream that wants to convert it to a trait object
+/// to do trivial case analysis on it.
+///
+/// One wonders whether we *should* use an enum instead of a trait object.
+/// At time of writing, I chose trait objects based on instinct because 1) I
+/// knew I wasn't going to inline anything and 2) there would potentially be
+/// many different choices. However, as of time of writing, I haven't actually
+/// compared the trait object approach to the enum approach. That probably
+/// should be litigated, but I ran out of steam.
+///
+/// Note that if the `alloc` feature is disabled, then values of this type
+/// are (and should) never be constructed. Also, in practice, for any of the
+/// prefilters to be selected, you'll need at least one of the `perf-literal-*`
+/// features enabled.
+#[derive(Clone, Debug)]
+pub(crate) enum Choice {
+    Memchr(Memchr),
+    Memchr2(Memchr2),
+    Memchr3(Memchr3),
+    Memmem(Memmem),
+    Teddy(Teddy),
+    ByteSet(ByteSet),
+    AhoCorasick(AhoCorasick),
+}
+
+impl Choice {
+    /// Select what is believed to be the best prefilter algorithm for the
+    /// match semantics and sequence of needles given.
+    ///
+    /// This selection algorithm uses the needles as given without any
+    /// modification. For example, if `[bar]` is given, then this doesn't
+    /// try to select `memchr` for `b`. Instead, it would select `memmem`
+    /// for `bar`. If callers would want `memchr` selected for `[bar]`, then
+    /// callers should massages the literals themselves. That is, callers are
+    /// responsible for heuristics surrounding which sequence of literals is
+    /// best.
+    ///
+    /// What this selection algorithm does is attempt to use the fastest
+    /// prefilter that works for the literals given. So if `[a, b]`, is given,
+    /// then `memchr2` is selected.
+    ///
+    /// Of course, which prefilter is selected is also subject to what
+    /// is available. For example, if `alloc` isn't enabled, then
+    /// that limits which prefilters can be selected. Similarly, if
+    /// `perf-literal-substring` isn't enabled, then nothing from the `memchr`
+    /// crate can be returned.
+    pub(crate) fn new<B: AsRef<[u8]>>(
+        kind: MatchKind,
+        needles: &[B],
+    ) -> Option<Choice> {
+        // An empty set means the regex matches nothing, so no sense in
+        // building a prefilter.
+        if needles.len() == 0 {
+            debug!("prefilter building failed: found empty set of literals");
+            return None;
+        }
+        // If the regex can match the empty string, then the prefilter
+        // will by definition match at every position. This is obviously
+        // completely ineffective.
+        if needles.iter().any(|n| n.as_ref().is_empty()) {
+            debug!("prefilter building failed: literals match empty string");
+            return None;
+        }
+        // BREADCRUMBS: Perhaps the literal optimizer should special case
+        // sequences of length two or three if the leading bytes of each are
+        // "rare"? Or perhaps, if there are two or three total possible leading
+        // bytes, regardless of the number of literals, and all are rare...
+        // Then well, perhaps we should use memchr2 or memchr3 in those cases?
+        if let Some(pre) = Memchr::new(kind, needles) {
+            debug!("prefilter built: memchr");
+            return Some(Choice::Memchr(pre));
+        }
+        if let Some(pre) = Memchr2::new(kind, needles) {
+            debug!("prefilter built: memchr2");
+            return Some(Choice::Memchr2(pre));
+        }
+        if let Some(pre) = Memchr3::new(kind, needles) {
+            debug!("prefilter built: memchr3");
+            return Some(Choice::Memchr3(pre));
+        }
+        if let Some(pre) = Memmem::new(kind, needles) {
+            debug!("prefilter built: memmem");
+            return Some(Choice::Memmem(pre));
+        }
+        if let Some(pre) = Teddy::new(kind, needles) {
+            debug!("prefilter built: teddy");
+            return Some(Choice::Teddy(pre));
+        }
+        if let Some(pre) = ByteSet::new(kind, needles) {
+            debug!("prefilter built: byteset");
+            return Some(Choice::ByteSet(pre));
+        }
+        if let Some(pre) = AhoCorasick::new(kind, needles) {
+            debug!("prefilter built: aho-corasick");
+            return Some(Choice::AhoCorasick(pre));
+        }
+        debug!("prefilter building failed: no strategy could be found");
+        None
+    }
+}
+
+/// Extracts all of the prefix literals from the given HIR expressions into a
+/// single `Seq`. The literals in the sequence are ordered with respect to the
+/// order of the given HIR expressions and consistent with the match semantics
+/// given.
+///
+/// The sequence returned is "optimized." That is, they may be shrunk or even
+/// truncated according to heuristics with the intent of making them more
+/// useful as a prefilter. (Which translates to both using faster algorithms
+/// and minimizing the false positive rate.)
+///
+/// Note that this erases any connection between the literals and which pattern
+/// (or patterns) they came from.
+///
+/// The match kind given must correspond to the match semantics of the regex
+/// that is represented by the HIRs given. The match semantics may change the
+/// literal sequence returned.
+#[cfg(feature = "syntax")]
+pub(crate) fn prefixes<H>(kind: MatchKind, hirs: &[H]) -> literal::Seq
+where
+    H: core::borrow::Borrow<Hir>,
+{
+    let mut extractor = literal::Extractor::new();
+    extractor.kind(literal::ExtractKind::Prefix);
+
+    let mut prefixes = literal::Seq::empty();
+    for hir in hirs {
+        prefixes.union(&mut extractor.extract(hir.borrow()));
+    }
+    debug!(
+        "prefixes (len={:?}, exact={:?}) extracted before optimization: {:?}",
+        prefixes.len(),
+        prefixes.is_exact(),
+        prefixes
+    );
+    match kind {
+        MatchKind::All => {
+            prefixes.sort();
+            prefixes.dedup();
+        }
+        MatchKind::LeftmostFirst => {
+            prefixes.optimize_for_prefix_by_preference();
+        }
+    }
+    debug!(
+        "prefixes (len={:?}, exact={:?}) extracted after optimization: {:?}",
+        prefixes.len(),
+        prefixes.is_exact(),
+        prefixes
+    );
+    prefixes
+}
+
+/// Like `prefixes`, but for all suffixes of all matches for the given HIRs.
+#[cfg(feature = "syntax")]
+pub(crate) fn suffixes<H>(kind: MatchKind, hirs: &[H]) -> literal::Seq
+where
+    H: core::borrow::Borrow<Hir>,
+{
+    let mut extractor = literal::Extractor::new();
+    extractor.kind(literal::ExtractKind::Suffix);
+
+    let mut suffixes = literal::Seq::empty();
+    for hir in hirs {
+        suffixes.union(&mut extractor.extract(hir.borrow()));
+    }
+    debug!(
+        "suffixes (len={:?}, exact={:?}) extracted before optimization: {:?}",
+        suffixes.len(),
+        suffixes.is_exact(),
+        suffixes
+    );
+    match kind {
+        MatchKind::All => {
+            suffixes.sort();
+            suffixes.dedup();
+        }
+        MatchKind::LeftmostFirst => {
+            suffixes.optimize_for_suffix_by_preference();
+        }
+    }
+    debug!(
+        "suffixes (len={:?}, exact={:?}) extracted after optimization: {:?}",
+        suffixes.len(),
+        suffixes.is_exact(),
+        suffixes
+    );
+    suffixes
+}
diff --git a/vendor/regex-automata/src/util/prefilter/teddy.rs b/vendor/regex-automata/src/util/prefilter/teddy.rs
new file mode 100644
index 0000000..fc79f2b
--- /dev/null
+++ b/vendor/regex-automata/src/util/prefilter/teddy.rs
@@ -0,0 +1,160 @@
+use crate::util::{
+    prefilter::PrefilterI,
+    search::{MatchKind, Span},
+};
+
+#[derive(Clone, Debug)]
+pub(crate) struct Teddy {
+    #[cfg(not(feature = "perf-literal-multisubstring"))]
+    _unused: (),
+    /// The actual Teddy searcher.
+    ///
+    /// Technically, it's possible that Teddy doesn't actually get used, since
+    /// Teddy does require its haystack to at least be of a certain size
+    /// (usually around the size of whatever vector is being used, so ~16
+    /// or ~32 bytes). For haystacks shorter than that, the implementation
+    /// currently uses Rabin-Karp.
+    #[cfg(feature = "perf-literal-multisubstring")]
+    searcher: aho_corasick::packed::Searcher,
+    /// When running an anchored search, the packed searcher can't handle it so
+    /// we defer to Aho-Corasick itself. Kind of sad, but changing the packed
+    /// searchers to support anchored search would be difficult at worst and
+    /// annoying at best. Since packed searchers only apply to small numbers of
+    /// literals, we content ourselves that this is not much of an added cost.
+    /// (That packed searchers only work with a small number of literals is
+    /// also why we use a DFA here. Otherwise, the memory usage of a DFA would
+    /// likely be unacceptable.)
+    #[cfg(feature = "perf-literal-multisubstring")]
+    anchored_ac: aho_corasick::dfa::DFA,
+    /// The length of the smallest literal we look for.
+    ///
+    /// We use this as a heuristic to figure out whether this will be "fast" or
+    /// not. Generally, the longer the better, because longer needles are more
+    /// discriminating and thus reduce false positive rate.
+    #[cfg(feature = "perf-literal-multisubstring")]
+    minimum_len: usize,
+}
+
+impl Teddy {
+    pub(crate) fn new<B: AsRef<[u8]>>(
+        kind: MatchKind,
+        needles: &[B],
+    ) -> Option<Teddy> {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            None
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            // We only really support leftmost-first semantics. In
+            // theory we could at least support leftmost-longest, as the
+            // aho-corasick crate does, but regex-automata doesn't know about
+            // leftmost-longest currently.
+            //
+            // And like the aho-corasick prefilter, if we're using `All`
+            // semantics, then we can still use leftmost semantics for a
+            // prefilter. (This might be a suspicious choice for the literal
+            // engine, which uses a prefilter as a regex engine directly, but
+            // that only happens when using leftmost-first semantics.)
+            let (packed_match_kind, ac_match_kind) = match kind {
+                MatchKind::LeftmostFirst | MatchKind::All => (
+                    aho_corasick::packed::MatchKind::LeftmostFirst,
+                    aho_corasick::MatchKind::LeftmostFirst,
+                ),
+            };
+            let minimum_len =
+                needles.iter().map(|n| n.as_ref().len()).min().unwrap_or(0);
+            let packed = aho_corasick::packed::Config::new()
+                .match_kind(packed_match_kind)
+                .builder()
+                .extend(needles)
+                .build()?;
+            let anchored_ac = aho_corasick::dfa::DFA::builder()
+                .match_kind(ac_match_kind)
+                .start_kind(aho_corasick::StartKind::Anchored)
+                .prefilter(false)
+                .build(needles)
+                .ok()?;
+            Some(Teddy { searcher: packed, anchored_ac, minimum_len })
+        }
+    }
+}
+
+impl PrefilterI for Teddy {
+    fn find(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            let ac_span =
+                aho_corasick::Span { start: span.start, end: span.end };
+            self.searcher
+                .find_in(haystack, ac_span)
+                .map(|m| Span { start: m.start(), end: m.end() })
+        }
+    }
+
+    fn prefix(&self, haystack: &[u8], span: Span) -> Option<Span> {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            use aho_corasick::automaton::Automaton;
+            let input = aho_corasick::Input::new(haystack)
+                .anchored(aho_corasick::Anchored::Yes)
+                .span(span.start..span.end);
+            self.anchored_ac
+                .try_find(&input)
+                // OK because we build the DFA with anchored support.
+                .expect("aho-corasick DFA should never fail")
+                .map(|m| Span { start: m.start(), end: m.end() })
+        }
+    }
+
+    fn memory_usage(&self) -> usize {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            use aho_corasick::automaton::Automaton;
+            self.searcher.memory_usage() + self.anchored_ac.memory_usage()
+        }
+    }
+
+    fn is_fast(&self) -> bool {
+        #[cfg(not(feature = "perf-literal-multisubstring"))]
+        {
+            unreachable!()
+        }
+        #[cfg(feature = "perf-literal-multisubstring")]
+        {
+            // Teddy is usually quite fast, but I have seen some cases where
+            // a large number of literals can overwhelm it and make it not so
+            // fast. We make an educated but conservative guess at a limit, at
+            // which point, we're not so comfortable thinking Teddy is "fast."
+            //
+            // Well... this used to incorporate a "limit" on the *number*
+            // of literals, but I have since changed it to a minimum on the
+            // *smallest* literal. Namely, when there is a very small literal
+            // (1 or 2 bytes), it is far more likely that it leads to a higher
+            // false positive rate. (Although, of course, not always. For
+            // example, 'zq' is likely to have a very low false positive rate.)
+            // But when we have 3 bytes, we have a really good chance of being
+            // quite discriminatory and thus fast.
+            //
+            // We may still want to add some kind of limit on the number of
+            // literals here, but keep in mind that Teddy already has its own
+            // somewhat small limit (64 at time of writing). The main issue
+            // here is that if 'is_fast' is false, it opens the door for the
+            // reverse inner optimization to kick in. We really only want to
+            // resort to the reverse inner optimization if we absolutely must.
+            self.minimum_len >= 3
+        }
+    }
+}
diff --git a/vendor/regex-automata/src/util/primitives.rs b/vendor/regex-automata/src/util/primitives.rs
new file mode 100644
index 0000000..5c5d187
--- /dev/null
+++ b/vendor/regex-automata/src/util/primitives.rs
@@ -0,0 +1,776 @@
+/*!
+Lower level primitive types that are useful in a variety of circumstances.
+
+# Overview
+
+This list represents the principle types in this module and briefly describes
+when you might want to use them.
+
+* [`PatternID`] - A type that represents the identifier of a regex pattern.
+This is probably the most widely used type in this module (which is why it's
+also re-exported in the crate root).
+* [`StateID`] - A type the represents the identifier of a finite automaton
+state. This is used for both NFAs and DFAs, with the notable exception of
+the hybrid NFA/DFA. (The hybrid NFA/DFA uses a special purpose "lazy" state
+identifier.)
+* [`SmallIndex`] - The internal representation of both a `PatternID` and a
+`StateID`. Its purpose is to serve as a type that can index memory without
+being as big as a `usize` on 64-bit targets. The main idea behind this type
+is that there are many things in regex engines that will, in practice, never
+overflow a 32-bit integer. (For example, like the number of patterns in a regex
+or the number of states in an NFA.) Thus, a `SmallIndex` can be used to index
+memory without peppering `as` casts everywhere. Moreover, it forces callers
+to handle errors in the case where, somehow, the value would otherwise overflow
+either a 32-bit integer or a `usize` (e.g., on 16-bit targets).
+* [`NonMaxUsize`] - Represents a `usize` that cannot be `usize::MAX`. As a
+result, `Option<NonMaxUsize>` has the same size in memory as a `usize`. This
+useful, for example, when representing the offsets of submatches since it
+reduces memory usage by a factor of 2. It is a legal optimization since Rust
+guarantees that slices never have a length that exceeds `isize::MAX`.
+*/
+
+use core::num::NonZeroUsize;
+
+#[cfg(feature = "alloc")]
+use alloc::vec::Vec;
+
+use crate::util::int::{Usize, U16, U32, U64};
+
+/// A `usize` that can never be `usize::MAX`.
+///
+/// This is similar to `core::num::NonZeroUsize`, but instead of not permitting
+/// a zero value, this does not permit a max value.
+///
+/// This is useful in certain contexts where one wants to optimize the memory
+/// usage of things that contain match offsets. Namely, since Rust slices
+/// are guaranteed to never have a length exceeding `isize::MAX`, we can use
+/// `usize::MAX` as a sentinel to indicate that no match was found. Indeed,
+/// types like `Option<NonMaxUsize>` have exactly the same size in memory as a
+/// `usize`.
+///
+/// This type is defined to be `repr(transparent)` for
+/// `core::num::NonZeroUsize`, which is in turn defined to be
+/// `repr(transparent)` for `usize`.
+#[derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)]
+#[repr(transparent)]
+pub struct NonMaxUsize(NonZeroUsize);
+
+impl NonMaxUsize {
+    /// Create a new `NonMaxUsize` from the given value.
+    ///
+    /// This returns `None` only when the given value is equal to `usize::MAX`.
+    #[inline]
+    pub fn new(value: usize) -> Option<NonMaxUsize> {
+        NonZeroUsize::new(value.wrapping_add(1)).map(NonMaxUsize)
+    }
+
+    /// Return the underlying `usize` value. The returned value is guaranteed
+    /// to not equal `usize::MAX`.
+    #[inline]
+    pub fn get(self) -> usize {
+        self.0.get().wrapping_sub(1)
+    }
+}
+
+// We provide our own Debug impl because seeing the internal repr can be quite
+// surprising if you aren't expecting it. e.g., 'NonMaxUsize(5)' vs just '5'.
+impl core::fmt::Debug for NonMaxUsize {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "{:?}", self.get())
+    }
+}
+
+/// A type that represents a "small" index.
+///
+/// The main idea of this type is to provide something that can index memory,
+/// but uses less memory than `usize` on 64-bit systems. Specifically, its
+/// representation is always a `u32` and has `repr(transparent)` enabled. (So
+/// it is safe to transmute between a `u32` and a `SmallIndex`.)
+///
+/// A small index is typically useful in cases where there is no practical way
+/// that the index will overflow a 32-bit integer. A good example of this is
+/// an NFA state. If you could somehow build an NFA with `2^30` states, its
+/// memory usage would be exorbitant and its runtime execution would be so
+/// slow as to be completely worthless. Therefore, this crate generally deems
+/// it acceptable to return an error if it would otherwise build an NFA that
+/// requires a slice longer than what a 32-bit integer can index. In exchange,
+/// we can use 32-bit indices instead of 64-bit indices in various places.
+///
+/// This type ensures this by providing a constructor that will return an error
+/// if its argument cannot fit into the type. This makes it much easier to
+/// handle these sorts of boundary cases that are otherwise extremely subtle.
+///
+/// On all targets, this type guarantees that its value will fit in a `u32`,
+/// `i32`, `usize` and an `isize`. This means that on 16-bit targets, for
+/// example, this type's maximum value will never overflow an `isize`,
+/// which means it will never overflow a `i16` even though its internal
+/// representation is still a `u32`.
+///
+/// The purpose for making the type fit into even signed integer types like
+/// `isize` is to guarantee that the difference between any two small indices
+/// is itself also a small index. This is useful in certain contexts, e.g.,
+/// for delta encoding.
+///
+/// # Other types
+///
+/// The following types wrap `SmallIndex` to provide a more focused use case:
+///
+/// * [`PatternID`] is for representing the identifiers of patterns.
+/// * [`StateID`] is for representing the identifiers of states in finite
+/// automata. It is used for both NFAs and DFAs.
+///
+/// # Representation
+///
+/// This type is always represented internally by a `u32` and is marked as
+/// `repr(transparent)`. Thus, this type always has the same representation as
+/// a `u32`. It is thus safe to transmute between a `u32` and a `SmallIndex`.
+///
+/// # Indexing
+///
+/// For convenience, callers may use a `SmallIndex` to index slices.
+///
+/// # Safety
+///
+/// While a `SmallIndex` is meant to guarantee that its value fits into `usize`
+/// without using as much space as a `usize` on all targets, callers must
+/// not rely on this property for safety. Callers may choose to rely on this
+/// property for correctness however. For example, creating a `SmallIndex` with
+/// an invalid value can be done in entirely safe code. This may in turn result
+/// in panics or silent logical errors.
+#[derive(
+    Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
+)]
+#[repr(transparent)]
+pub struct SmallIndex(u32);
+
+impl SmallIndex {
+    /// The maximum index value.
+    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+    pub const MAX: SmallIndex =
+        // FIXME: Use as_usize() once const functions in traits are stable.
+        SmallIndex::new_unchecked(core::i32::MAX as usize - 1);
+
+    /// The maximum index value.
+    #[cfg(target_pointer_width = "16")]
+    pub const MAX: SmallIndex =
+        SmallIndex::new_unchecked(core::isize::MAX - 1);
+
+    /// The total number of values that can be represented as a small index.
+    pub const LIMIT: usize = SmallIndex::MAX.as_usize() + 1;
+
+    /// The zero index value.
+    pub const ZERO: SmallIndex = SmallIndex::new_unchecked(0);
+
+    /// The number of bytes that a single small index uses in memory.
+    pub const SIZE: usize = core::mem::size_of::<SmallIndex>();
+
+    /// Create a new small index.
+    ///
+    /// If the given index exceeds [`SmallIndex::MAX`], then this returns
+    /// an error.
+    #[inline]
+    pub fn new(index: usize) -> Result<SmallIndex, SmallIndexError> {
+        SmallIndex::try_from(index)
+    }
+
+    /// Create a new small index without checking whether the given value
+    /// exceeds [`SmallIndex::MAX`].
+    ///
+    /// Using this routine with an invalid index value will result in
+    /// unspecified behavior, but *not* undefined behavior. In particular, an
+    /// invalid index value is likely to cause panics or possibly even silent
+    /// logical errors.
+    ///
+    /// Callers must never rely on a `SmallIndex` to be within a certain range
+    /// for memory safety.
+    #[inline]
+    pub const fn new_unchecked(index: usize) -> SmallIndex {
+        // FIXME: Use as_u32() once const functions in traits are stable.
+        SmallIndex(index as u32)
+    }
+
+    /// Like [`SmallIndex::new`], but panics if the given index is not valid.
+    #[inline]
+    pub fn must(index: usize) -> SmallIndex {
+        SmallIndex::new(index).expect("invalid small index")
+    }
+
+    /// Return this small index as a `usize`. This is guaranteed to never
+    /// overflow `usize`.
+    #[inline]
+    pub const fn as_usize(&self) -> usize {
+        // FIXME: Use as_usize() once const functions in traits are stable.
+        self.0 as usize
+    }
+
+    /// Return this small index as a `u64`. This is guaranteed to never
+    /// overflow.
+    #[inline]
+    pub const fn as_u64(&self) -> u64 {
+        // FIXME: Use u64::from() once const functions in traits are stable.
+        self.0 as u64
+    }
+
+    /// Return the internal `u32` of this small index. This is guaranteed to
+    /// never overflow `u32`.
+    #[inline]
+    pub const fn as_u32(&self) -> u32 {
+        self.0
+    }
+
+    /// Return the internal `u32` of this small index represented as an `i32`.
+    /// This is guaranteed to never overflow an `i32`.
+    #[inline]
+    pub const fn as_i32(&self) -> i32 {
+        // This is OK because we guarantee that our max value is <= i32::MAX.
+        self.0 as i32
+    }
+
+    /// Returns one more than this small index as a usize.
+    ///
+    /// Since a small index has constraints on its maximum value, adding `1` to
+    /// it will always fit in a `usize`, `u32` and a `i32`.
+    #[inline]
+    pub fn one_more(&self) -> usize {
+        self.as_usize() + 1
+    }
+
+    /// Decode this small index from the bytes given using the native endian
+    /// byte order for the current target.
+    ///
+    /// If the decoded integer is not representable as a small index for the
+    /// current target, then this returns an error.
+    #[inline]
+    pub fn from_ne_bytes(
+        bytes: [u8; 4],
+    ) -> Result<SmallIndex, SmallIndexError> {
+        let id = u32::from_ne_bytes(bytes);
+        if id > SmallIndex::MAX.as_u32() {
+            return Err(SmallIndexError { attempted: u64::from(id) });
+        }
+        Ok(SmallIndex::new_unchecked(id.as_usize()))
+    }
+
+    /// Decode this small index from the bytes given using the native endian
+    /// byte order for the current target.
+    ///
+    /// This is analogous to [`SmallIndex::new_unchecked`] in that is does not
+    /// check whether the decoded integer is representable as a small index.
+    #[inline]
+    pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> SmallIndex {
+        SmallIndex::new_unchecked(u32::from_ne_bytes(bytes).as_usize())
+    }
+
+    /// Return the underlying small index integer as raw bytes in native endian
+    /// format.
+    #[inline]
+    pub fn to_ne_bytes(&self) -> [u8; 4] {
+        self.0.to_ne_bytes()
+    }
+}
+
+impl<T> core::ops::Index<SmallIndex> for [T] {
+    type Output = T;
+
+    #[inline]
+    fn index(&self, index: SmallIndex) -> &T {
+        &self[index.as_usize()]
+    }
+}
+
+impl<T> core::ops::IndexMut<SmallIndex> for [T] {
+    #[inline]
+    fn index_mut(&mut self, index: SmallIndex) -> &mut T {
+        &mut self[index.as_usize()]
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<T> core::ops::Index<SmallIndex> for Vec<T> {
+    type Output = T;
+
+    #[inline]
+    fn index(&self, index: SmallIndex) -> &T {
+        &self[index.as_usize()]
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<T> core::ops::IndexMut<SmallIndex> for Vec<T> {
+    #[inline]
+    fn index_mut(&mut self, index: SmallIndex) -> &mut T {
+        &mut self[index.as_usize()]
+    }
+}
+
+impl From<u8> for SmallIndex {
+    fn from(index: u8) -> SmallIndex {
+        SmallIndex::new_unchecked(usize::from(index))
+    }
+}
+
+impl TryFrom<u16> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: u16) -> Result<SmallIndex, SmallIndexError> {
+        if u32::from(index) > SmallIndex::MAX.as_u32() {
+            return Err(SmallIndexError { attempted: u64::from(index) });
+        }
+        Ok(SmallIndex::new_unchecked(index.as_usize()))
+    }
+}
+
+impl TryFrom<u32> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: u32) -> Result<SmallIndex, SmallIndexError> {
+        if index > SmallIndex::MAX.as_u32() {
+            return Err(SmallIndexError { attempted: u64::from(index) });
+        }
+        Ok(SmallIndex::new_unchecked(index.as_usize()))
+    }
+}
+
+impl TryFrom<u64> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: u64) -> Result<SmallIndex, SmallIndexError> {
+        if index > SmallIndex::MAX.as_u64() {
+            return Err(SmallIndexError { attempted: index });
+        }
+        Ok(SmallIndex::new_unchecked(index.as_usize()))
+    }
+}
+
+impl TryFrom<usize> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: usize) -> Result<SmallIndex, SmallIndexError> {
+        if index > SmallIndex::MAX.as_usize() {
+            return Err(SmallIndexError { attempted: index.as_u64() });
+        }
+        Ok(SmallIndex::new_unchecked(index))
+    }
+}
+
+#[cfg(test)]
+impl quickcheck::Arbitrary for SmallIndex {
+    fn arbitrary(gen: &mut quickcheck::Gen) -> SmallIndex {
+        use core::cmp::max;
+
+        let id = max(i32::MIN + 1, i32::arbitrary(gen)).abs();
+        if id > SmallIndex::MAX.as_i32() {
+            SmallIndex::MAX
+        } else {
+            SmallIndex::new(usize::try_from(id).unwrap()).unwrap()
+        }
+    }
+}
+
+/// This error occurs when a small index could not be constructed.
+///
+/// This occurs when given an integer exceeding the maximum small index value.
+///
+/// When the `std` feature is enabled, this implements the `Error` trait.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct SmallIndexError {
+    attempted: u64,
+}
+
+impl SmallIndexError {
+    /// Returns the value that could not be converted to a small index.
+    pub fn attempted(&self) -> u64 {
+        self.attempted
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for SmallIndexError {}
+
+impl core::fmt::Display for SmallIndexError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(
+            f,
+            "failed to create small index from {:?}, which exceeds {:?}",
+            self.attempted(),
+            SmallIndex::MAX,
+        )
+    }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct SmallIndexIter {
+    rng: core::ops::Range<usize>,
+}
+
+impl Iterator for SmallIndexIter {
+    type Item = SmallIndex;
+
+    fn next(&mut self) -> Option<SmallIndex> {
+        if self.rng.start >= self.rng.end {
+            return None;
+        }
+        let next_id = self.rng.start + 1;
+        let id = core::mem::replace(&mut self.rng.start, next_id);
+        // new_unchecked is OK since we asserted that the number of
+        // elements in this iterator will fit in an ID at construction.
+        Some(SmallIndex::new_unchecked(id))
+    }
+}
+
+macro_rules! index_type_impls {
+    ($name:ident, $err:ident, $iter:ident, $withiter:ident) => {
+        impl $name {
+            /// The maximum value.
+            pub const MAX: $name = $name(SmallIndex::MAX);
+
+            /// The total number of values that can be represented.
+            pub const LIMIT: usize = SmallIndex::LIMIT;
+
+            /// The zero value.
+            pub const ZERO: $name = $name(SmallIndex::ZERO);
+
+            /// The number of bytes that a single value uses in memory.
+            pub const SIZE: usize = SmallIndex::SIZE;
+
+            /// Create a new value that is represented by a "small index."
+            ///
+            /// If the given index exceeds the maximum allowed value, then this
+            /// returns an error.
+            #[inline]
+            pub fn new(value: usize) -> Result<$name, $err> {
+                SmallIndex::new(value).map($name).map_err($err)
+            }
+
+            /// Create a new value without checking whether the given argument
+            /// exceeds the maximum.
+            ///
+            /// Using this routine with an invalid value will result in
+            /// unspecified behavior, but *not* undefined behavior. In
+            /// particular, an invalid ID value is likely to cause panics or
+            /// possibly even silent logical errors.
+            ///
+            /// Callers must never rely on this type to be within a certain
+            /// range for memory safety.
+            #[inline]
+            pub const fn new_unchecked(value: usize) -> $name {
+                $name(SmallIndex::new_unchecked(value))
+            }
+
+            /// Like `new`, but panics if the given value is not valid.
+            #[inline]
+            pub fn must(value: usize) -> $name {
+                $name::new(value).expect(concat!(
+                    "invalid ",
+                    stringify!($name),
+                    " value"
+                ))
+            }
+
+            /// Return the internal value as a `usize`. This is guaranteed to
+            /// never overflow `usize`.
+            #[inline]
+            pub const fn as_usize(&self) -> usize {
+                self.0.as_usize()
+            }
+
+            /// Return the internal value as a `u64`. This is guaranteed to
+            /// never overflow.
+            #[inline]
+            pub const fn as_u64(&self) -> u64 {
+                self.0.as_u64()
+            }
+
+            /// Return the internal value as a `u32`. This is guaranteed to
+            /// never overflow `u32`.
+            #[inline]
+            pub const fn as_u32(&self) -> u32 {
+                self.0.as_u32()
+            }
+
+            /// Return the internal value as a i32`. This is guaranteed to
+            /// never overflow an `i32`.
+            #[inline]
+            pub const fn as_i32(&self) -> i32 {
+                self.0.as_i32()
+            }
+
+            /// Returns one more than this value as a usize.
+            ///
+            /// Since values represented by a "small index" have constraints
+            /// on their maximum value, adding `1` to it will always fit in a
+            /// `usize`, `u32` and a `i32`.
+            #[inline]
+            pub fn one_more(&self) -> usize {
+                self.0.one_more()
+            }
+
+            /// Decode this value from the bytes given using the native endian
+            /// byte order for the current target.
+            ///
+            /// If the decoded integer is not representable as a small index
+            /// for the current target, then this returns an error.
+            #[inline]
+            pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<$name, $err> {
+                SmallIndex::from_ne_bytes(bytes).map($name).map_err($err)
+            }
+
+            /// Decode this value from the bytes given using the native endian
+            /// byte order for the current target.
+            ///
+            /// This is analogous to `new_unchecked` in that is does not check
+            /// whether the decoded integer is representable as a small index.
+            #[inline]
+            pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> $name {
+                $name(SmallIndex::from_ne_bytes_unchecked(bytes))
+            }
+
+            /// Return the underlying integer as raw bytes in native endian
+            /// format.
+            #[inline]
+            pub fn to_ne_bytes(&self) -> [u8; 4] {
+                self.0.to_ne_bytes()
+            }
+
+            /// Returns an iterator over all values from 0 up to and not
+            /// including the given length.
+            ///
+            /// If the given length exceeds this type's limit, then this
+            /// panics.
+            pub(crate) fn iter(len: usize) -> $iter {
+                $iter::new(len)
+            }
+        }
+
+        // We write our own Debug impl so that we get things like PatternID(5)
+        // instead of PatternID(SmallIndex(5)).
+        impl core::fmt::Debug for $name {
+            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                f.debug_tuple(stringify!($name)).field(&self.as_u32()).finish()
+            }
+        }
+
+        impl<T> core::ops::Index<$name> for [T] {
+            type Output = T;
+
+            #[inline]
+            fn index(&self, index: $name) -> &T {
+                &self[index.as_usize()]
+            }
+        }
+
+        impl<T> core::ops::IndexMut<$name> for [T] {
+            #[inline]
+            fn index_mut(&mut self, index: $name) -> &mut T {
+                &mut self[index.as_usize()]
+            }
+        }
+
+        #[cfg(feature = "alloc")]
+        impl<T> core::ops::Index<$name> for Vec<T> {
+            type Output = T;
+
+            #[inline]
+            fn index(&self, index: $name) -> &T {
+                &self[index.as_usize()]
+            }
+        }
+
+        #[cfg(feature = "alloc")]
+        impl<T> core::ops::IndexMut<$name> for Vec<T> {
+            #[inline]
+            fn index_mut(&mut self, index: $name) -> &mut T {
+                &mut self[index.as_usize()]
+            }
+        }
+
+        impl From<u8> for $name {
+            fn from(value: u8) -> $name {
+                $name(SmallIndex::from(value))
+            }
+        }
+
+        impl TryFrom<u16> for $name {
+            type Error = $err;
+
+            fn try_from(value: u16) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        impl TryFrom<u32> for $name {
+            type Error = $err;
+
+            fn try_from(value: u32) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        impl TryFrom<u64> for $name {
+            type Error = $err;
+
+            fn try_from(value: u64) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        impl TryFrom<usize> for $name {
+            type Error = $err;
+
+            fn try_from(value: usize) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        #[cfg(test)]
+        impl quickcheck::Arbitrary for $name {
+            fn arbitrary(gen: &mut quickcheck::Gen) -> $name {
+                $name(SmallIndex::arbitrary(gen))
+            }
+        }
+
+        /// This error occurs when a value could not be constructed.
+        ///
+        /// This occurs when given an integer exceeding the maximum allowed
+        /// value.
+        ///
+        /// When the `std` feature is enabled, this implements the `Error`
+        /// trait.
+        #[derive(Clone, Debug, Eq, PartialEq)]
+        pub struct $err(SmallIndexError);
+
+        impl $err {
+            /// Returns the value that could not be converted to an ID.
+            pub fn attempted(&self) -> u64 {
+                self.0.attempted()
+            }
+        }
+
+        #[cfg(feature = "std")]
+        impl std::error::Error for $err {}
+
+        impl core::fmt::Display for $err {
+            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                write!(
+                    f,
+                    "failed to create {} from {:?}, which exceeds {:?}",
+                    stringify!($name),
+                    self.attempted(),
+                    $name::MAX,
+                )
+            }
+        }
+
+        #[derive(Clone, Debug)]
+        pub(crate) struct $iter(SmallIndexIter);
+
+        impl $iter {
+            fn new(len: usize) -> $iter {
+                assert!(
+                    len <= $name::LIMIT,
+                    "cannot create iterator for {} when number of \
+                     elements exceed {:?}",
+                    stringify!($name),
+                    $name::LIMIT,
+                );
+                $iter(SmallIndexIter { rng: 0..len })
+            }
+        }
+
+        impl Iterator for $iter {
+            type Item = $name;
+
+            fn next(&mut self) -> Option<$name> {
+                self.0.next().map($name)
+            }
+        }
+
+        /// An iterator adapter that is like std::iter::Enumerate, but attaches
+        /// small index values instead. It requires `ExactSizeIterator`. At
+        /// construction, it ensures that the index of each element in the
+        /// iterator is representable in the corresponding small index type.
+        #[derive(Clone, Debug)]
+        pub(crate) struct $withiter<I> {
+            it: I,
+            ids: $iter,
+        }
+
+        impl<I: Iterator + ExactSizeIterator> $withiter<I> {
+            fn new(it: I) -> $withiter<I> {
+                let ids = $name::iter(it.len());
+                $withiter { it, ids }
+            }
+        }
+
+        impl<I: Iterator + ExactSizeIterator> Iterator for $withiter<I> {
+            type Item = ($name, I::Item);
+
+            fn next(&mut self) -> Option<($name, I::Item)> {
+                let item = self.it.next()?;
+                // Number of elements in this iterator must match, according
+                // to contract of ExactSizeIterator.
+                let id = self.ids.next().unwrap();
+                Some((id, item))
+            }
+        }
+    };
+}
+
+/// The identifier of a regex pattern, represented by a [`SmallIndex`].
+///
+/// The identifier for a pattern corresponds to its relative position among
+/// other patterns in a single finite state machine. Namely, when building
+/// a multi-pattern regex engine, one must supply a sequence of patterns to
+/// match. The position (starting at 0) of each pattern in that sequence
+/// represents its identifier. This identifier is in turn used to identify and
+/// report matches of that pattern in various APIs.
+///
+/// See the [`SmallIndex`] type for more information about what it means for
+/// a pattern ID to be a "small index."
+///
+/// Note that this type is defined in the
+/// [`util::primitives`](crate::util::primitives) module, but it is also
+/// re-exported at the crate root due to how common it is.
+#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
+#[repr(transparent)]
+pub struct PatternID(SmallIndex);
+
+/// The identifier of a finite automaton state, represented by a
+/// [`SmallIndex`].
+///
+/// Most regex engines in this crate are built on top of finite automata. Each
+/// state in a finite automaton defines transitions from its state to another.
+/// Those transitions point to other states via their identifiers, i.e., a
+/// `StateID`. Since finite automata tend to contain many transitions, it is
+/// much more memory efficient to define state IDs as small indices.
+///
+/// See the [`SmallIndex`] type for more information about what it means for
+/// a state ID to be a "small index."
+#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
+#[repr(transparent)]
+pub struct StateID(SmallIndex);
+
+index_type_impls!(PatternID, PatternIDError, PatternIDIter, WithPatternIDIter);
+index_type_impls!(StateID, StateIDError, StateIDIter, WithStateIDIter);
+
+/// A utility trait that defines a couple of adapters for making it convenient
+/// to access indices as "small index" types. We require ExactSizeIterator so
+/// that iterator construction can do a single check to make sure the index of
+/// each element is representable by its small index type.
+pub(crate) trait IteratorIndexExt: Iterator {
+    fn with_pattern_ids(self) -> WithPatternIDIter<Self>
+    where
+        Self: Sized + ExactSizeIterator,
+    {
+        WithPatternIDIter::new(self)
+    }
+
+    fn with_state_ids(self) -> WithStateIDIter<Self>
+    where
+        Self: Sized + ExactSizeIterator,
+    {
+        WithStateIDIter::new(self)
+    }
+}
+
+impl<I: Iterator> IteratorIndexExt for I {}
diff --git a/vendor/regex-automata/src/util/search.rs b/vendor/regex-automata/src/util/search.rs
new file mode 100644
index 0000000..39aec52
--- /dev/null
+++ b/vendor/regex-automata/src/util/search.rs
@@ -0,0 +1,1969 @@
+/*!
+Types and routines that support the search APIs of most regex engines.
+
+This sub-module isn't exposed directly, but rather, its contents are exported
+at the crate root due to the universality of most of the types and routines in
+this module.
+*/
+
+use core::ops::{Range, RangeBounds};
+
+use crate::util::{escape::DebugByte, primitives::PatternID, utf8};
+
+/// The parameters for a regex search including the haystack to search.
+///
+/// It turns out that regex searches have a few parameters, and in most cases,
+/// those parameters have defaults that work in the vast majority of cases.
+/// This `Input` type exists to make that common case seamnless while also
+/// providing an avenue for changing the parameters of a search. In particular,
+/// this type enables doing so without a combinatorial explosion of different
+/// methods and/or superfluous parameters in the common cases.
+///
+/// An `Input` permits configuring the following things:
+///
+/// * Search only a substring of a haystack, while taking the broader context
+/// into account for resolving look-around assertions.
+/// * Indicating whether to search for all patterns in a regex, or to
+/// only search for one pattern in particular.
+/// * Whether to perform an anchored on unanchored search.
+/// * Whether to report a match as early as possible.
+///
+/// All of these parameters, except for the haystack, have sensible default
+/// values. This means that the minimal search configuration is simply a call
+/// to [`Input::new`] with your haystack. Setting any other parameter is
+/// optional.
+///
+/// Moreover, for any `H` that implements `AsRef<[u8]>`, there exists a
+/// `From<H> for Input` implementation. This is useful because many of the
+/// search APIs in this crate accept an `Into<Input>`. This means you can
+/// provide string or byte strings to these routines directly, and they'll
+/// automatically get converted into an `Input` for you.
+///
+/// The lifetime parameter `'h` refers to the lifetime of the haystack.
+///
+/// # Organization
+///
+/// The API of `Input` is split into a few different parts:
+///
+/// * A builder-like API that transforms a `Input` by value. Examples:
+/// [`Input::span`] and [`Input::anchored`].
+/// * A setter API that permits mutating parameters in place. Examples:
+/// [`Input::set_span`] and [`Input::set_anchored`].
+/// * A getter API that permits retrieving any of the search parameters.
+/// Examples: [`Input::get_span`] and [`Input::get_anchored`].
+/// * A few convenience getter routines that don't conform to the above naming
+/// pattern due to how common they are. Examples: [`Input::haystack`],
+/// [`Input::start`] and [`Input::end`].
+/// * Miscellaneous predicates and other helper routines that are useful
+/// in some contexts. Examples: [`Input::is_char_boundary`].
+///
+/// A `Input` exposes so much because it is meant to be used by both callers of
+/// regex engines _and_ implementors of regex engines. A constraining factor is
+/// that regex engines should accept a `&Input` as its lowest level API, which
+/// means that implementors should only use the "getter" APIs of a `Input`.
+///
+/// # Valid bounds and search termination
+///
+/// An `Input` permits setting the bounds of a search via either
+/// [`Input::span`] or [`Input::range`]. The bounds set must be valid, or
+/// else a panic will occur. Bounds are valid if and only if:
+///
+/// * The bounds represent a valid range into the input's haystack.
+/// * **or** the end bound is a valid ending bound for the haystack *and*
+/// the start bound is exactly one greater than the start bound.
+///
+/// In the latter case, [`Input::is_done`] will return true and indicates any
+/// search receiving such an input should immediately return with no match.
+///
+/// Note that while `Input` is used for reverse searches in this crate, the
+/// `Input::is_done` predicate assumes a forward search. Because unsigned
+/// offsets are used internally, there is no way to tell from only the offsets
+/// whether a reverse search is done or not.
+///
+/// # Regex engine support
+///
+/// Any regex engine accepting an `Input` must support at least the following
+/// things:
+///
+/// * Searching a `&[u8]` for matches.
+/// * Searching a substring of `&[u8]` for a match, such that any match
+/// reported must appear entirely within that substring.
+/// * For a forwards search, a match should never be reported when
+/// [`Input::is_done`] returns true. (For reverse searches, termination should
+/// be handled outside of `Input`.)
+///
+/// Supporting other aspects of an `Input` are optional, but regex engines
+/// should handle aspects they don't support gracefully. How this is done is
+/// generally up to the regex engine. This crate generally treats unsupported
+/// anchored modes as an error to report for example, but for simplicity, in
+/// the meta regex engine, trying to search with an invalid pattern ID just
+/// results in no match being reported.
+#[derive(Clone)]
+pub struct Input<'h> {
+    haystack: &'h [u8],
+    span: Span,
+    anchored: Anchored,
+    earliest: bool,
+}
+
+impl<'h> Input<'h> {
+    /// Create a new search configuration for the given haystack.
+    #[inline]
+    pub fn new<H: ?Sized + AsRef<[u8]>>(haystack: &'h H) -> Input<'h> {
+        Input {
+            haystack: haystack.as_ref(),
+            span: Span { start: 0, end: haystack.as_ref().len() },
+            anchored: Anchored::No,
+            earliest: false,
+        }
+    }
+
+    /// Set the span for this search.
+    ///
+    /// This routine does not panic if the span given is not a valid range for
+    /// this search's haystack. If this search is run with an invalid range,
+    /// then the most likely outcome is that the actual search execution will
+    /// panic.
+    ///
+    /// This routine is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`. To provide anything supported by range
+    /// syntax, use the [`Input::range`] method.
+    ///
+    /// The default span is the entire haystack.
+    ///
+    /// Note that [`Input::range`] overrides this method and vice versa.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given span does not correspond to valid bounds in
+    /// the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// This example shows how the span of the search can impact whether a
+    /// match is reported or not. This is particularly relevant for look-around
+    /// operators, which might take things outside of the span into account
+    /// when determining whether they match.
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     Match, Input,
+    /// };
+    ///
+    /// // Look for 'at', but as a distinct word.
+    /// let re = PikeVM::new(r"\bat\b")?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = re.create_captures();
+    ///
+    /// // Our haystack contains 'at', but not as a distinct word.
+    /// let haystack = "batter";
+    ///
+    /// // A standard search finds nothing, as expected.
+    /// let input = Input::new(haystack);
+    /// re.search(&mut cache, &input, &mut caps);
+    /// assert_eq!(None, caps.get_match());
+    ///
+    /// // But if we wanted to search starting at position '1', we might
+    /// // slice the haystack. If we do this, it's impossible for the \b
+    /// // anchors to take the surrounding context into account! And thus,
+    /// // a match is produced.
+    /// let input = Input::new(&haystack[1..3]);
+    /// re.search(&mut cache, &input, &mut caps);
+    /// assert_eq!(Some(Match::must(0, 0..2)), caps.get_match());
+    ///
+    /// // But if we specify the span of the search instead of slicing the
+    /// // haystack, then the regex engine can "see" outside of the span
+    /// // and resolve the anchors correctly.
+    /// let input = Input::new(haystack).span(1..3);
+    /// re.search(&mut cache, &input, &mut caps);
+    /// assert_eq!(None, caps.get_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// This may seem a little ham-fisted, but this scenario tends to come up
+    /// if some other regex engine found the match span and now you need to
+    /// re-process that span to look for capturing groups. (e.g., Run a faster
+    /// DFA first, find a match, then run the PikeVM on just the match span to
+    /// resolve capturing groups.) In order to implement that sort of logic
+    /// correctly, you need to set the span on the search instead of slicing
+    /// the haystack directly.
+    ///
+    /// The other advantage of using this routine to specify the bounds of the
+    /// search is that the match offsets are still reported in terms of the
+    /// original haystack. For example, the second search in the example above
+    /// reported a match at position `0`, even though `at` starts at offset
+    /// `1` because we sliced the haystack.
+    #[inline]
+    pub fn span<S: Into<Span>>(mut self, span: S) -> Input<'h> {
+        self.set_span(span);
+        self
+    }
+
+    /// Like `Input::span`, but accepts any range instead.
+    ///
+    /// This routine does not panic if the range given is not a valid range for
+    /// this search's haystack. If this search is run with an invalid range,
+    /// then the most likely outcome is that the actual search execution will
+    /// panic.
+    ///
+    /// The default range is the entire haystack.
+    ///
+    /// Note that [`Input::span`] overrides this method and vice versa.
+    ///
+    /// # Panics
+    ///
+    /// This routine will panic if the given range could not be converted
+    /// to a valid [`Range`]. For example, this would panic when given
+    /// `0..=usize::MAX` since it cannot be represented using a half-open
+    /// interval in terms of `usize`.
+    ///
+    /// This also panics if the given range does not correspond to valid bounds
+    /// in the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    ///
+    /// let input = Input::new("foobar").range(2..=4);
+    /// assert_eq!(2..5, input.get_range());
+    /// ```
+    #[inline]
+    pub fn range<R: RangeBounds<usize>>(mut self, range: R) -> Input<'h> {
+        self.set_range(range);
+        self
+    }
+
+    /// Sets the anchor mode of a search.
+    ///
+    /// When a search is anchored (so that's [`Anchored::Yes`] or
+    /// [`Anchored::Pattern`]), a match must begin at the start of a search.
+    /// When a search is not anchored (that's [`Anchored::No`]), regex engines
+    /// will behave as if the pattern started with a `(?s-u:.)*?`. This prefix
+    /// permits a match to appear anywhere.
+    ///
+    /// By default, the anchored mode is [`Anchored::No`].
+    ///
+    /// **WARNING:** this is subtly different than using a `^` at the start of
+    /// your regex. A `^` forces a regex to match exclusively at the start of
+    /// a haystack, regardless of where you begin your search. In contrast,
+    /// anchoring a search will allow your regex to match anywhere in your
+    /// haystack, but the match must start at the beginning of a search.
+    ///
+    /// For example, consider the haystack `aba` and the following searches:
+    ///
+    /// 1. The regex `^a` is compiled with `Anchored::No` and searches `aba`
+    ///    starting at position `2`. Since `^` requires the match to start at
+    ///    the beginning of the haystack and `2 > 0`, no match is found.
+    /// 2. The regex `a` is compiled with `Anchored::Yes` and searches `aba`
+    ///    starting at position `2`. This reports a match at `[2, 3]` since
+    ///    the match starts where the search started. Since there is no `^`,
+    ///    there is no requirement for the match to start at the beginning of
+    ///    the haystack.
+    /// 3. The regex `a` is compiled with `Anchored::Yes` and searches `aba`
+    ///    starting at position `1`. Since `b` corresponds to position `1` and
+    ///    since the search is anchored, it finds no match. While the regex
+    ///    matches at other positions, configuring the search to be anchored
+    ///    requires that it only report a match that begins at the same offset
+    ///    as the beginning of the search.
+    /// 4. The regex `a` is compiled with `Anchored::No` and searches `aba`
+    ///    starting at position `1`. Since the search is not anchored and
+    ///    the regex does not start with `^`, the search executes as if there
+    ///    is a `(?s:.)*?` prefix that permits it to match anywhere. Thus, it
+    ///    reports a match at `[2, 3]`.
+    ///
+    /// Note that the [`Anchored::Pattern`] mode is like `Anchored::Yes`,
+    /// except it only reports matches for a particular pattern.
+    ///
+    /// # Example
+    ///
+    /// This demonstrates the differences between an anchored search and
+    /// a pattern that begins with `^` (as described in the above warning
+    /// message).
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     Anchored, Match, Input,
+    /// };
+    ///
+    /// let haystack = "aba";
+    ///
+    /// let re = PikeVM::new(r"^a")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    /// let input = Input::new(haystack).span(2..3).anchored(Anchored::No);
+    /// re.search(&mut cache, &input, &mut caps);
+    /// // No match is found because 2 is not the beginning of the haystack,
+    /// // which is what ^ requires.
+    /// assert_eq!(None, caps.get_match());
+    ///
+    /// let re = PikeVM::new(r"a")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    /// let input = Input::new(haystack).span(2..3).anchored(Anchored::Yes);
+    /// re.search(&mut cache, &input, &mut caps);
+    /// // An anchored search can still match anywhere in the haystack, it just
+    /// // must begin at the start of the search which is '2' in this case.
+    /// assert_eq!(Some(Match::must(0, 2..3)), caps.get_match());
+    ///
+    /// let re = PikeVM::new(r"a")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    /// let input = Input::new(haystack).span(1..3).anchored(Anchored::Yes);
+    /// re.search(&mut cache, &input, &mut caps);
+    /// // No match is found since we start searching at offset 1 which
+    /// // corresponds to 'b'. Since there is no '(?s:.)*?' prefix, no match
+    /// // is found.
+    /// assert_eq!(None, caps.get_match());
+    ///
+    /// let re = PikeVM::new(r"a")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    /// let input = Input::new(haystack).span(1..3).anchored(Anchored::No);
+    /// re.search(&mut cache, &input, &mut caps);
+    /// // Since anchored=no, an implicit '(?s:.)*?' prefix was added to the
+    /// // pattern. Even though the search starts at 'b', the 'match anything'
+    /// // prefix allows the search to match 'a'.
+    /// let expected = Some(Match::must(0, 2..3));
+    /// assert_eq!(expected, caps.get_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn anchored(mut self, mode: Anchored) -> Input<'h> {
+        self.set_anchored(mode);
+        self
+    }
+
+    /// Whether to execute an "earliest" search or not.
+    ///
+    /// When running a non-overlapping search, an "earliest" search will return
+    /// the match location as early as possible. For example, given a pattern
+    /// of `foo[0-9]+` and a haystack of `foo12345`, a normal leftmost search
+    /// will return `foo12345` as a match. But an "earliest" search for regex
+    /// engines that support "earliest" semantics will return `foo1` as a
+    /// match, since as soon as the first digit following `foo` is seen, it is
+    /// known to have found a match.
+    ///
+    /// Note that "earliest" semantics generally depend on the regex engine.
+    /// Different regex engines may determine there is a match at different
+    /// points. So there is no guarantee that "earliest" matches will always
+    /// return the same offsets for all regex engines. The "earliest" notion
+    /// is really about when the particular regex engine determines there is
+    /// a match rather than a consistent semantic unto itself. This is often
+    /// useful for implementing "did a match occur or not" predicates, but
+    /// sometimes the offset is useful as well.
+    ///
+    /// This is disabled by default.
+    ///
+    /// # Example
+    ///
+    /// This example shows the difference between "earliest" searching and
+    /// normal searching.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match, Input};
+    ///
+    /// let re = PikeVM::new(r"foo[0-9]+")?;
+    /// let mut cache = re.create_cache();
+    /// let mut caps = re.create_captures();
+    ///
+    /// // A normal search implements greediness like you expect.
+    /// let input = Input::new("foo12345");
+    /// re.search(&mut cache, &input, &mut caps);
+    /// assert_eq!(Some(Match::must(0, 0..8)), caps.get_match());
+    ///
+    /// // When 'earliest' is enabled and the regex engine supports
+    /// // it, the search will bail once it knows a match has been
+    /// // found.
+    /// let input = Input::new("foo12345").earliest(true);
+    /// re.search(&mut cache, &input, &mut caps);
+    /// assert_eq!(Some(Match::must(0, 0..4)), caps.get_match());
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn earliest(mut self, yes: bool) -> Input<'h> {
+        self.set_earliest(yes);
+        self
+    }
+
+    /// Set the span for this search configuration.
+    ///
+    /// This is like the [`Input::span`] method, except this mutates the
+    /// span in place.
+    ///
+    /// This routine is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given span does not correspond to valid bounds in
+    /// the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_span(2..4);
+    /// assert_eq!(2..4, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_span<S: Into<Span>>(&mut self, span: S) {
+        let span = span.into();
+        assert!(
+            span.end <= self.haystack.len()
+                && span.start <= span.end.wrapping_add(1),
+            "invalid span {:?} for haystack of length {}",
+            span,
+            self.haystack.len(),
+        );
+        self.span = span;
+    }
+
+    /// Set the span for this search configuration given any range.
+    ///
+    /// This is like the [`Input::range`] method, except this mutates the
+    /// span in place.
+    ///
+    /// This routine does not panic if the range given is not a valid range for
+    /// this search's haystack. If this search is run with an invalid range,
+    /// then the most likely outcome is that the actual search execution will
+    /// panic.
+    ///
+    /// # Panics
+    ///
+    /// This routine will panic if the given range could not be converted
+    /// to a valid [`Range`]. For example, this would panic when given
+    /// `0..=usize::MAX` since it cannot be represented using a half-open
+    /// interval in terms of `usize`.
+    ///
+    /// This also panics if the given span does not correspond to valid bounds
+    /// in the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_range(2..=4);
+    /// assert_eq!(2..5, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_range<R: RangeBounds<usize>>(&mut self, range: R) {
+        use core::ops::Bound;
+
+        // It's a little weird to convert ranges into spans, and then spans
+        // back into ranges when we actually slice the haystack. Because
+        // of that process, we always represent everything as a half-open
+        // internal. Therefore, handling things like m..=n is a little awkward.
+        let start = match range.start_bound() {
+            Bound::Included(&i) => i,
+            // Can this case ever happen? Range syntax doesn't support it...
+            Bound::Excluded(&i) => i.checked_add(1).unwrap(),
+            Bound::Unbounded => 0,
+        };
+        let end = match range.end_bound() {
+            Bound::Included(&i) => i.checked_add(1).unwrap(),
+            Bound::Excluded(&i) => i,
+            Bound::Unbounded => self.haystack().len(),
+        };
+        self.set_span(Span { start, end });
+    }
+
+    /// Set the starting offset for the span for this search configuration.
+    ///
+    /// This is a convenience routine for only mutating the start of a span
+    /// without having to set the entire span.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the span resulting from the new start position does not
+    /// correspond to valid bounds in the haystack or the termination of a
+    /// search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_start(5);
+    /// assert_eq!(5..6, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_start(&mut self, start: usize) {
+        self.set_span(Span { start, ..self.get_span() });
+    }
+
+    /// Set the ending offset for the span for this search configuration.
+    ///
+    /// This is a convenience routine for only mutating the end of a span
+    /// without having to set the entire span.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the span resulting from the new end position does not
+    /// correspond to valid bounds in the haystack or the termination of a
+    /// search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_end(5);
+    /// assert_eq!(0..5, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_end(&mut self, end: usize) {
+        self.set_span(Span { end, ..self.get_span() });
+    }
+
+    /// Set the anchor mode of a search.
+    ///
+    /// This is like [`Input::anchored`], except it mutates the search
+    /// configuration in place.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{Anchored, Input, PatternID};
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(Anchored::No, input.get_anchored());
+    ///
+    /// let pid = PatternID::must(5);
+    /// input.set_anchored(Anchored::Pattern(pid));
+    /// assert_eq!(Anchored::Pattern(pid), input.get_anchored());
+    /// ```
+    #[inline]
+    pub fn set_anchored(&mut self, mode: Anchored) {
+        self.anchored = mode;
+    }
+
+    /// Set whether the search should execute in "earliest" mode or not.
+    ///
+    /// This is like [`Input::earliest`], except it mutates the search
+    /// configuration in place.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert!(!input.get_earliest());
+    /// input.set_earliest(true);
+    /// assert!(input.get_earliest());
+    /// ```
+    #[inline]
+    pub fn set_earliest(&mut self, yes: bool) {
+        self.earliest = yes;
+    }
+
+    /// Return a borrow of the underlying haystack as a slice of bytes.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(b"foobar", input.haystack());
+    /// ```
+    #[inline]
+    pub fn haystack(&self) -> &[u8] {
+        self.haystack
+    }
+
+    /// Return the start position of this search.
+    ///
+    /// This is a convenience routine for `search.get_span().start()`.
+    ///
+    /// When [`Input::is_done`] is `false`, this is guaranteed to return
+    /// an offset that is less than or equal to [`Input::end`]. Otherwise,
+    /// the offset is one greater than [`Input::end`].
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(0, input.start());
+    ///
+    /// let input = Input::new("foobar").span(2..4);
+    /// assert_eq!(2, input.start());
+    /// ```
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.get_span().start
+    }
+
+    /// Return the end position of this search.
+    ///
+    /// This is a convenience routine for `search.get_span().end()`.
+    ///
+    /// This is guaranteed to return an offset that is a valid exclusive end
+    /// bound for this input's haystack.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(6, input.end());
+    ///
+    /// let input = Input::new("foobar").span(2..4);
+    /// assert_eq!(4, input.end());
+    /// ```
+    #[inline]
+    pub fn end(&self) -> usize {
+        self.get_span().end
+    }
+
+    /// Return the span for this search configuration.
+    ///
+    /// If one was not explicitly set, then the span corresponds to the entire
+    /// range of the haystack.
+    ///
+    /// When [`Input::is_done`] is `false`, the span returned is guaranteed
+    /// to correspond to valid bounds for this input's haystack.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{Input, Span};
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(Span { start: 0, end: 6 }, input.get_span());
+    /// ```
+    #[inline]
+    pub fn get_span(&self) -> Span {
+        self.span
+    }
+
+    /// Return the span as a range for this search configuration.
+    ///
+    /// If one was not explicitly set, then the span corresponds to the entire
+    /// range of the haystack.
+    ///
+    /// When [`Input::is_done`] is `false`, the range returned is guaranteed
+    /// to correspond to valid bounds for this input's haystack.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// ```
+    #[inline]
+    pub fn get_range(&self) -> Range<usize> {
+        self.get_span().range()
+    }
+
+    /// Return the anchored mode for this search configuration.
+    ///
+    /// If no anchored mode was set, then it defaults to [`Anchored::No`].
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{Anchored, Input, PatternID};
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(Anchored::No, input.get_anchored());
+    ///
+    /// let pid = PatternID::must(5);
+    /// input.set_anchored(Anchored::Pattern(pid));
+    /// assert_eq!(Anchored::Pattern(pid), input.get_anchored());
+    /// ```
+    #[inline]
+    pub fn get_anchored(&self) -> Anchored {
+        self.anchored
+    }
+
+    /// Return whether this search should execute in "earliest" mode.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert!(!input.get_earliest());
+    /// ```
+    #[inline]
+    pub fn get_earliest(&self) -> bool {
+        self.earliest
+    }
+
+    /// Return true if and only if this search can never return any other
+    /// matches.
+    ///
+    /// This occurs when the start position of this search is greater than the
+    /// end position of the search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert!(!input.is_done());
+    /// input.set_start(6);
+    /// assert!(!input.is_done());
+    /// input.set_start(7);
+    /// assert!(input.is_done());
+    /// ```
+    #[inline]
+    pub fn is_done(&self) -> bool {
+        self.get_span().start > self.get_span().end
+    }
+
+    /// Returns true if and only if the given offset in this search's haystack
+    /// falls on a valid UTF-8 encoded codepoint boundary.
+    ///
+    /// If the haystack is not valid UTF-8, then the behavior of this routine
+    /// is unspecified.
+    ///
+    /// # Example
+    ///
+    /// This shows where codepoint boundaries do and don't exist in valid
+    /// UTF-8.
+    ///
+    /// ```
+    /// use regex_automata::Input;
+    ///
+    /// let input = Input::new("☃");
+    /// assert!(input.is_char_boundary(0));
+    /// assert!(!input.is_char_boundary(1));
+    /// assert!(!input.is_char_boundary(2));
+    /// assert!(input.is_char_boundary(3));
+    /// assert!(!input.is_char_boundary(4));
+    /// ```
+    #[inline]
+    pub fn is_char_boundary(&self, offset: usize) -> bool {
+        utf8::is_boundary(self.haystack(), offset)
+    }
+}
+
+impl<'h> core::fmt::Debug for Input<'h> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        use crate::util::escape::DebugHaystack;
+
+        f.debug_struct("Input")
+            .field("haystack", &DebugHaystack(self.haystack()))
+            .field("span", &self.span)
+            .field("anchored", &self.anchored)
+            .field("earliest", &self.earliest)
+            .finish()
+    }
+}
+
+impl<'h, H: ?Sized + AsRef<[u8]>> From<&'h H> for Input<'h> {
+    fn from(haystack: &'h H) -> Input<'h> {
+        Input::new(haystack)
+    }
+}
+
+/// A representation of a span reported by a regex engine.
+///
+/// A span corresponds to the starting and ending _byte offsets_ of a
+/// contiguous region of bytes. The starting offset is inclusive while the
+/// ending offset is exclusive. That is, a span is a half-open interval.
+///
+/// A span is used to report the offsets of a match, but it is also used to
+/// convey which region of a haystack should be searched via routines like
+/// [`Input::span`].
+///
+/// This is basically equivalent to a `std::ops::Range<usize>`, except this
+/// type implements `Copy` which makes it more ergonomic to use in the context
+/// of this crate. Like a range, this implements `Index` for `[u8]` and `str`,
+/// and `IndexMut` for `[u8]`. For convenience, this also impls `From<Range>`,
+/// which means things like `Span::from(5..10)` work.
+#[derive(Clone, Copy, Eq, Hash, PartialEq)]
+pub struct Span {
+    /// The start offset of the span, inclusive.
+    pub start: usize,
+    /// The end offset of the span, exclusive.
+    pub end: usize,
+}
+
+impl Span {
+    /// Returns this span as a range.
+    #[inline]
+    pub fn range(&self) -> Range<usize> {
+        Range::from(*self)
+    }
+
+    /// Returns true when this span is empty. That is, when `start >= end`.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.start >= self.end
+    }
+
+    /// Returns the length of this span.
+    ///
+    /// This returns `0` in precisely the cases that `is_empty` returns `true`.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.end.saturating_sub(self.start)
+    }
+
+    /// Returns true when the given offset is contained within this span.
+    ///
+    /// Note that an empty span contains no offsets and will always return
+    /// false.
+    #[inline]
+    pub fn contains(&self, offset: usize) -> bool {
+        !self.is_empty() && self.start <= offset && offset <= self.end
+    }
+
+    /// Returns a new span with `offset` added to this span's `start` and `end`
+    /// values.
+    #[inline]
+    pub fn offset(&self, offset: usize) -> Span {
+        Span { start: self.start + offset, end: self.end + offset }
+    }
+}
+
+impl core::fmt::Debug for Span {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "{}..{}", self.start, self.end)
+    }
+}
+
+impl core::ops::Index<Span> for [u8] {
+    type Output = [u8];
+
+    #[inline]
+    fn index(&self, index: Span) -> &[u8] {
+        &self[index.range()]
+    }
+}
+
+impl core::ops::IndexMut<Span> for [u8] {
+    #[inline]
+    fn index_mut(&mut self, index: Span) -> &mut [u8] {
+        &mut self[index.range()]
+    }
+}
+
+impl core::ops::Index<Span> for str {
+    type Output = str;
+
+    #[inline]
+    fn index(&self, index: Span) -> &str {
+        &self[index.range()]
+    }
+}
+
+impl From<Range<usize>> for Span {
+    #[inline]
+    fn from(range: Range<usize>) -> Span {
+        Span { start: range.start, end: range.end }
+    }
+}
+
+impl From<Span> for Range<usize> {
+    #[inline]
+    fn from(span: Span) -> Range<usize> {
+        Range { start: span.start, end: span.end }
+    }
+}
+
+impl PartialEq<Range<usize>> for Span {
+    #[inline]
+    fn eq(&self, range: &Range<usize>) -> bool {
+        self.start == range.start && self.end == range.end
+    }
+}
+
+impl PartialEq<Span> for Range<usize> {
+    #[inline]
+    fn eq(&self, span: &Span) -> bool {
+        self.start == span.start && self.end == span.end
+    }
+}
+
+/// A representation of "half" of a match reported by a DFA.
+///
+/// This is called a "half" match because it only includes the end location (or
+/// start location for a reverse search) of a match. This corresponds to the
+/// information that a single DFA scan can report. Getting the other half of
+/// the match requires a second scan with a reversed DFA.
+///
+/// A half match also includes the pattern that matched. The pattern is
+/// identified by an ID, which corresponds to its position (starting from `0`)
+/// relative to other patterns used to construct the corresponding DFA. If only
+/// a single pattern is provided to the DFA, then all matches are guaranteed to
+/// have a pattern ID of `0`.
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct HalfMatch {
+    /// The pattern ID.
+    pattern: PatternID,
+    /// The offset of the match.
+    ///
+    /// For forward searches, the offset is exclusive. For reverse searches,
+    /// the offset is inclusive.
+    offset: usize,
+}
+
+impl HalfMatch {
+    /// Create a new half match from a pattern ID and a byte offset.
+    #[inline]
+    pub fn new(pattern: PatternID, offset: usize) -> HalfMatch {
+        HalfMatch { pattern, offset }
+    }
+
+    /// Create a new half match from a pattern ID and a byte offset.
+    ///
+    /// This is like [`HalfMatch::new`], but accepts a `usize` instead of a
+    /// [`PatternID`]. This panics if the given `usize` is not representable
+    /// as a `PatternID`.
+    #[inline]
+    pub fn must(pattern: usize, offset: usize) -> HalfMatch {
+        HalfMatch::new(PatternID::new(pattern).unwrap(), offset)
+    }
+
+    /// Returns the ID of the pattern that matched.
+    ///
+    /// The ID of a pattern is derived from the position in which it was
+    /// originally inserted into the corresponding DFA. The first pattern has
+    /// identifier `0`, and each subsequent pattern is `1`, `2` and so on.
+    #[inline]
+    pub fn pattern(&self) -> PatternID {
+        self.pattern
+    }
+
+    /// The position of the match.
+    ///
+    /// If this match was produced by a forward search, then the offset is
+    /// exclusive. If this match was produced by a reverse search, then the
+    /// offset is inclusive.
+    #[inline]
+    pub fn offset(&self) -> usize {
+        self.offset
+    }
+}
+
+/// A representation of a match reported by a regex engine.
+///
+/// A match has two essential pieces of information: the [`PatternID`] that
+/// matches, and the [`Span`] of the match in a haystack.
+///
+/// The pattern is identified by an ID, which corresponds to its position
+/// (starting from `0`) relative to other patterns used to construct the
+/// corresponding regex engine. If only a single pattern is provided, then all
+/// matches are guaranteed to have a pattern ID of `0`.
+///
+/// Every match reported by a regex engine guarantees that its span has its
+/// start offset as less than or equal to its end offset.
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct Match {
+    /// The pattern ID.
+    pattern: PatternID,
+    /// The underlying match span.
+    span: Span,
+}
+
+impl Match {
+    /// Create a new match from a pattern ID and a span.
+    ///
+    /// This constructor is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `end < start`.
+    ///
+    /// # Example
+    ///
+    /// This shows how to create a match for the first pattern in a regex
+    /// object using convenient range syntax.
+    ///
+    /// ```
+    /// use regex_automata::{Match, PatternID};
+    ///
+    /// let m = Match::new(PatternID::ZERO, 5..10);
+    /// assert_eq!(0, m.pattern().as_usize());
+    /// assert_eq!(5, m.start());
+    /// assert_eq!(10, m.end());
+    /// ```
+    #[inline]
+    pub fn new<S: Into<Span>>(pattern: PatternID, span: S) -> Match {
+        let span: Span = span.into();
+        assert!(span.start <= span.end, "invalid match span");
+        Match { pattern, span }
+    }
+
+    /// Create a new match from a pattern ID and a byte offset span.
+    ///
+    /// This constructor is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`.
+    ///
+    /// This is like [`Match::new`], but accepts a `usize` instead of a
+    /// [`PatternID`]. This panics if the given `usize` is not representable
+    /// as a `PatternID`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `end < start` or if `pattern > PatternID::MAX`.
+    ///
+    /// # Example
+    ///
+    /// This shows how to create a match for the third pattern in a regex
+    /// object using convenient range syntax.
+    ///
+    /// ```
+    /// use regex_automata::Match;
+    ///
+    /// let m = Match::must(3, 5..10);
+    /// assert_eq!(3, m.pattern().as_usize());
+    /// assert_eq!(5, m.start());
+    /// assert_eq!(10, m.end());
+    /// ```
+    #[inline]
+    pub fn must<S: Into<Span>>(pattern: usize, span: S) -> Match {
+        Match::new(PatternID::must(pattern), span)
+    }
+
+    /// Returns the ID of the pattern that matched.
+    ///
+    /// The ID of a pattern is derived from the position in which it was
+    /// originally inserted into the corresponding regex engine. The first
+    /// pattern has identifier `0`, and each subsequent pattern is `1`, `2` and
+    /// so on.
+    #[inline]
+    pub fn pattern(&self) -> PatternID {
+        self.pattern
+    }
+
+    /// The starting position of the match.
+    ///
+    /// This is a convenience routine for `Match::span().start`.
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.span().start
+    }
+
+    /// The ending position of the match.
+    ///
+    /// This is a convenience routine for `Match::span().end`.
+    #[inline]
+    pub fn end(&self) -> usize {
+        self.span().end
+    }
+
+    /// Returns the match span as a range.
+    ///
+    /// This is a convenience routine for `Match::span().range()`.
+    #[inline]
+    pub fn range(&self) -> core::ops::Range<usize> {
+        self.span().range()
+    }
+
+    /// Returns the span for this match.
+    #[inline]
+    pub fn span(&self) -> Span {
+        self.span
+    }
+
+    /// Returns true when the span in this match is empty.
+    ///
+    /// An empty match can only be returned when the regex itself can match
+    /// the empty string.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.span().is_empty()
+    }
+
+    /// Returns the length of this match.
+    ///
+    /// This returns `0` in precisely the cases that `is_empty` returns `true`.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.span().len()
+    }
+}
+
+/// A set of `PatternID`s.
+///
+/// A set of pattern identifiers is useful for recording which patterns have
+/// matched a particular haystack. A pattern set _only_ includes pattern
+/// identifiers. It does not include offset information.
+///
+/// # Example
+///
+/// This shows basic usage of a set.
+///
+/// ```
+/// use regex_automata::{PatternID, PatternSet};
+///
+/// let pid1 = PatternID::must(5);
+/// let pid2 = PatternID::must(8);
+/// // Create a new empty set.
+/// let mut set = PatternSet::new(10);
+/// // Insert pattern IDs.
+/// set.insert(pid1);
+/// set.insert(pid2);
+/// // Test membership.
+/// assert!(set.contains(pid1));
+/// assert!(set.contains(pid2));
+/// // Get all members.
+/// assert_eq!(
+///     vec![5, 8],
+///     set.iter().map(|p| p.as_usize()).collect::<Vec<usize>>(),
+/// );
+/// // Clear the set.
+/// set.clear();
+/// // Test that it is indeed empty.
+/// assert!(set.is_empty());
+/// ```
+#[cfg(feature = "alloc")]
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct PatternSet {
+    /// The number of patterns set to 'true' in this set.
+    len: usize,
+    /// A map from PatternID to boolean of whether a pattern matches or not.
+    ///
+    /// This should probably be a bitset, but it's probably unlikely to matter
+    /// much in practice.
+    ///
+    /// The main downside of this representation (and similarly for a bitset)
+    /// is that iteration scales with the capacity of the set instead of
+    /// the length of the set. This doesn't seem likely to be a problem in
+    /// practice.
+    ///
+    /// Another alternative is to just use a 'SparseSet' for this. It does use
+    /// more memory (quite a bit more), but that seems fine I think compared
+    /// to the memory being used by the regex engine. The real hiccup with
+    /// it is that it yields pattern IDs in the order they were inserted.
+    /// Which is actually kind of nice, but at the time of writing, pattern
+    /// IDs are yielded in ascending order in the regex crate RegexSet API.
+    /// If we did change to 'SparseSet', we could provide an additional
+    /// 'iter_match_order' iterator, but keep the ascending order one for
+    /// compatibility.
+    which: alloc::boxed::Box<[bool]>,
+}
+
+#[cfg(feature = "alloc")]
+impl PatternSet {
+    /// Create a new set of pattern identifiers with the given capacity.
+    ///
+    /// The given capacity typically corresponds to (at least) the number of
+    /// patterns in a compiled regex object.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given capacity exceeds [`PatternID::LIMIT`]. This is
+    /// impossible if you use the `pattern_len()` method as defined on any of
+    /// the regex engines in this crate. Namely, a regex will fail to build by
+    /// returning an error if the number of patterns given to it exceeds the
+    /// limit. Therefore, the number of patterns in a valid regex is always
+    /// a correct capacity to provide here.
+    pub fn new(capacity: usize) -> PatternSet {
+        assert!(
+            capacity <= PatternID::LIMIT,
+            "pattern set capacity exceeds limit of {}",
+            PatternID::LIMIT,
+        );
+        PatternSet {
+            len: 0,
+            which: alloc::vec![false; capacity].into_boxed_slice(),
+        }
+    }
+
+    /// Clear this set such that it contains no pattern IDs.
+    pub fn clear(&mut self) {
+        self.len = 0;
+        for matched in self.which.iter_mut() {
+            *matched = false;
+        }
+    }
+
+    /// Return true if and only if the given pattern identifier is in this set.
+    pub fn contains(&self, pid: PatternID) -> bool {
+        pid.as_usize() < self.capacity() && self.which[pid]
+    }
+
+    /// Insert the given pattern identifier into this set and return `true` if
+    /// the given pattern ID was not previously in this set.
+    ///
+    /// If the pattern identifier is already in this set, then this is a no-op.
+    ///
+    /// Use [`PatternSet::try_insert`] for a fallible version of this routine.
+    ///
+    /// # Panics
+    ///
+    /// This panics if this pattern set has insufficient capacity to
+    /// store the given pattern ID.
+    pub fn insert(&mut self, pid: PatternID) -> bool {
+        self.try_insert(pid)
+            .expect("PatternSet should have sufficient capacity")
+    }
+
+    /// Insert the given pattern identifier into this set and return `true` if
+    /// the given pattern ID was not previously in this set.
+    ///
+    /// If the pattern identifier is already in this set, then this is a no-op.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error if this pattern set has insufficient capacity to
+    /// store the given pattern ID.
+    pub fn try_insert(
+        &mut self,
+        pid: PatternID,
+    ) -> Result<bool, PatternSetInsertError> {
+        if pid.as_usize() >= self.capacity() {
+            return Err(PatternSetInsertError {
+                attempted: pid,
+                capacity: self.capacity(),
+            });
+        }
+        if self.which[pid] {
+            return Ok(false);
+        }
+        self.len += 1;
+        self.which[pid] = true;
+        Ok(true)
+    }
+
+    /*
+    // This is currently commented out because it is unused and it is unclear
+    // whether it's useful or not. What's the harm in having it? When, if
+    // we ever wanted to change our representation to a 'SparseSet', then
+    // supporting this method would be a bit tricky. So in order to keep some
+    // API evolution flexibility, we leave it out for now.
+
+    /// Remove the given pattern identifier from this set.
+    ///
+    /// If the pattern identifier was not previously in this set, then this
+    /// does not change the set and returns `false`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `pid` exceeds the capacity of this set.
+    pub fn remove(&mut self, pid: PatternID) -> bool {
+        if !self.which[pid] {
+            return false;
+        }
+        self.len -= 1;
+        self.which[pid] = false;
+        true
+    }
+    */
+
+    /// Return true if and only if this set has no pattern identifiers in it.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Return true if and only if this set has the maximum number of pattern
+    /// identifiers in the set. This occurs precisely when `PatternSet::len()
+    /// == PatternSet::capacity()`.
+    ///
+    /// This particular property is useful to test because it may allow one to
+    /// stop a search earlier than you might otherwise. Namely, if a search is
+    /// only reporting which patterns match a haystack and if you know all of
+    /// the patterns match at a given point, then there's no new information
+    /// that can be learned by continuing the search. (Because a pattern set
+    /// does not keep track of offset information.)
+    pub fn is_full(&self) -> bool {
+        self.len() == self.capacity()
+    }
+
+    /// Returns the total number of pattern identifiers in this set.
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns the total number of pattern identifiers that may be stored
+    /// in this set.
+    ///
+    /// This is guaranteed to be less than or equal to [`PatternID::LIMIT`].
+    ///
+    /// Typically, the capacity of a pattern set matches the number of patterns
+    /// in a regex object with which you are searching.
+    pub fn capacity(&self) -> usize {
+        self.which.len()
+    }
+
+    /// Returns an iterator over all pattern identifiers in this set.
+    ///
+    /// The iterator yields pattern identifiers in ascending order, starting
+    /// at zero.
+    pub fn iter(&self) -> PatternSetIter<'_> {
+        PatternSetIter { it: self.which.iter().enumerate() }
+    }
+}
+
+/// An error that occurs when a `PatternID` failed to insert into a
+/// `PatternSet`.
+///
+/// An insert fails when the given `PatternID` exceeds the configured capacity
+/// of the `PatternSet`.
+///
+/// This error is created by the [`PatternSet::try_insert`] routine.
+#[cfg(feature = "alloc")]
+#[derive(Clone, Debug)]
+pub struct PatternSetInsertError {
+    attempted: PatternID,
+    capacity: usize,
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for PatternSetInsertError {}
+
+#[cfg(feature = "alloc")]
+impl core::fmt::Display for PatternSetInsertError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(
+            f,
+            "failed to insert pattern ID {} into pattern set \
+             with insufficiet capacity of {}",
+            self.attempted.as_usize(),
+            self.capacity,
+        )
+    }
+}
+
+/// An iterator over all pattern identifiers in a [`PatternSet`].
+///
+/// The lifetime parameter `'a` refers to the lifetime of the pattern set being
+/// iterated over.
+///
+/// This iterator is created by the [`PatternSet::iter`] method.
+#[cfg(feature = "alloc")]
+#[derive(Clone, Debug)]
+pub struct PatternSetIter<'a> {
+    it: core::iter::Enumerate<core::slice::Iter<'a, bool>>,
+}
+
+#[cfg(feature = "alloc")]
+impl<'a> Iterator for PatternSetIter<'a> {
+    type Item = PatternID;
+
+    fn next(&mut self) -> Option<PatternID> {
+        while let Some((index, &yes)) = self.it.next() {
+            if yes {
+                // Only valid 'PatternID' values can be inserted into the set
+                // and construction of the set panics if the capacity would
+                // permit storing invalid pattern IDs. Thus, 'yes' is only true
+                // precisely when 'index' corresponds to a valid 'PatternID'.
+                return Some(PatternID::new_unchecked(index));
+            }
+        }
+        None
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.it.size_hint()
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<'a> DoubleEndedIterator for PatternSetIter<'a> {
+    fn next_back(&mut self) -> Option<PatternID> {
+        while let Some((index, &yes)) = self.it.next_back() {
+            if yes {
+                // Only valid 'PatternID' values can be inserted into the set
+                // and construction of the set panics if the capacity would
+                // permit storing invalid pattern IDs. Thus, 'yes' is only true
+                // precisely when 'index' corresponds to a valid 'PatternID'.
+                return Some(PatternID::new_unchecked(index));
+            }
+        }
+        None
+    }
+}
+
+/// The type of anchored search to perform.
+///
+/// This is *almost* a boolean option. That is, you can either do an unanchored
+/// search for any pattern in a regex, or you can do an anchored search for any
+/// pattern in a regex.
+///
+/// A third option exists that, assuming the regex engine supports it, permits
+/// you to do an anchored search for a specific pattern.
+///
+/// Note that there is no way to run an unanchored search for a specific
+/// pattern. If you need that, you'll need to build separate regexes for each
+/// pattern.
+///
+/// # Errors
+///
+/// If a regex engine does not support the anchored mode selected, then the
+/// regex engine will return an error. While any non-trivial regex engine
+/// should support at least one of the available anchored modes, there is no
+/// singular mode that is guaranteed to be universally supported. Some regex
+/// engines might only support unanchored searches (DFAs compiled without
+/// anchored starting states) and some regex engines might only support
+/// anchored searches (like the one-pass DFA).
+///
+/// The specific error returned is a [`MatchError`] with a
+/// [`MatchErrorKind::UnsupportedAnchored`] kind. The kind includes the
+/// `Anchored` value given that is unsupported.
+///
+/// Note that regex engines should report "no match" if, for example, an
+/// `Anchored::Pattern` is provided with an invalid pattern ID _but_ where
+/// anchored searches for a specific pattern are supported. This is smooths out
+/// behavior such that it's possible to guarantee that an error never occurs
+/// based on how the regex engine is configured. All regex engines in this
+/// crate report "no match" when searching for an invalid pattern ID, but where
+/// searching for a valid pattern ID is otherwise supported.
+///
+/// # Example
+///
+/// This example shows how to use the various `Anchored` modes to run a
+/// search. We use the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM)
+/// because it supports all modes unconditionally. Some regex engines, like
+/// the [`onepass::DFA`](crate::dfa::onepass::DFA) cannot support unanchored
+/// searches.
+///
+/// ```
+/// # if cfg!(miri) { return Ok(()); } // miri takes too long
+/// use regex_automata::{
+///     nfa::thompson::pikevm::PikeVM,
+///     Anchored, Input, Match, PatternID,
+/// };
+///
+/// let re = PikeVM::new_many(&[
+///     r"Mrs. \w+",
+///     r"Miss \w+",
+///     r"Mr. \w+",
+///     r"Ms. \w+",
+/// ])?;
+/// let mut cache = re.create_cache();
+/// let hay = "Hello Mr. Springsteen!";
+///
+/// // The default is to do an unanchored search.
+/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, hay));
+/// // Explicitly ask for an unanchored search. Same as above.
+/// let input = Input::new(hay).anchored(Anchored::No);
+/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, hay));
+///
+/// // Now try an anchored search. Since the match doesn't start at the
+/// // beginning of the haystack, no match is found!
+/// let input = Input::new(hay).anchored(Anchored::Yes);
+/// assert_eq!(None, re.find(&mut cache, input));
+///
+/// // We can try an anchored search again, but move the location of where
+/// // we start the search. Note that the offsets reported are still in
+/// // terms of the overall haystack and not relative to where we started
+/// // the search.
+/// let input = Input::new(hay).anchored(Anchored::Yes).range(6..);
+/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, input));
+///
+/// // Now try an anchored search for a specific pattern. We specifically
+/// // choose a pattern that we know doesn't match to prove that the search
+/// // only looks for the pattern we provide.
+/// let input = Input::new(hay)
+///     .anchored(Anchored::Pattern(PatternID::must(1)))
+///     .range(6..);
+/// assert_eq!(None, re.find(&mut cache, input));
+///
+/// // But if we switch it to the pattern that we know matches, then we find
+/// // the match.
+/// let input = Input::new(hay)
+///     .anchored(Anchored::Pattern(PatternID::must(2)))
+///     .range(6..);
+/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, input));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Anchored {
+    /// Run an unanchored search. This means a match may occur anywhere at or
+    /// after the start position of the search.
+    ///
+    /// This search can return a match for any pattern in the regex.
+    No,
+    /// Run an anchored search. This means that a match must begin at the
+    /// start position of the search.
+    ///
+    /// This search can return a match for any pattern in the regex.
+    Yes,
+    /// Run an anchored search for a specific pattern. This means that a match
+    /// must be for the given pattern and must begin at the start position of
+    /// the search.
+    Pattern(PatternID),
+}
+
+impl Anchored {
+    /// Returns true if and only if this anchor mode corresponds to any kind of
+    /// anchored search.
+    ///
+    /// # Example
+    ///
+    /// This examples shows that both `Anchored::Yes` and `Anchored::Pattern`
+    /// are considered anchored searches.
+    ///
+    /// ```
+    /// use regex_automata::{Anchored, PatternID};
+    ///
+    /// assert!(!Anchored::No.is_anchored());
+    /// assert!(Anchored::Yes.is_anchored());
+    /// assert!(Anchored::Pattern(PatternID::ZERO).is_anchored());
+    /// ```
+    #[inline]
+    pub fn is_anchored(&self) -> bool {
+        matches!(*self, Anchored::Yes | Anchored::Pattern(_))
+    }
+
+    /// Returns the pattern ID associated with this configuration if it is an
+    /// anchored search for a specific pattern. Otherwise `None` is returned.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{Anchored, PatternID};
+    ///
+    /// assert_eq!(None, Anchored::No.pattern());
+    /// assert_eq!(None, Anchored::Yes.pattern());
+    ///
+    /// let pid = PatternID::must(5);
+    /// assert_eq!(Some(pid), Anchored::Pattern(pid).pattern());
+    /// ```
+    #[inline]
+    pub fn pattern(&self) -> Option<PatternID> {
+        match *self {
+            Anchored::Pattern(pid) => Some(pid),
+            _ => None,
+        }
+    }
+}
+
+/// The kind of match semantics to use for a regex pattern.
+///
+/// The default match kind is `LeftmostFirst`, and this corresponds to the
+/// match semantics used by most backtracking engines, such as Perl.
+///
+/// # Leftmost first or "preference order" match semantics
+///
+/// Leftmost-first semantics determine which match to report when there are
+/// multiple paths through a regex that match at the same position. The tie is
+/// essentially broken by how a backtracker would behave. For example, consider
+/// running the regex `foofoofoo|foofoo|foo` on the haystack `foofoo`. In this
+/// case, both the `foofoo` and `foo` branches match at position `0`. So should
+/// the end of the match be `3` or `6`?
+///
+/// A backtracker will conceptually work by trying `foofoofoo` and failing.
+/// Then it will try `foofoo`, find the match and stop there. Thus, the
+/// leftmost-first match position is `6`. This is called "leftmost-first" or
+/// "preference order" because the order of the branches as written in the
+/// regex pattern is what determines how to break the tie.
+///
+/// (Note that leftmost-longest match semantics, which break ties by always
+/// taking the longest matching string, are not currently supported by this
+/// crate. These match semantics tend to be found in POSIX regex engines.)
+///
+/// This example shows how leftmost-first semantics work, and how it even
+/// applies to multi-pattern regexes:
+///
+/// ```
+/// use regex_automata::{
+///     nfa::thompson::pikevm::PikeVM,
+///     Match,
+/// };
+///
+/// let re = PikeVM::new_many(&[
+///     r"foofoofoo",
+///     r"foofoo",
+///     r"foo",
+/// ])?;
+/// let mut cache = re.create_cache();
+/// let got: Vec<Match> = re.find_iter(&mut cache, "foofoo").collect();
+/// let expected = vec![Match::must(1, 0..6)];
+/// assert_eq!(expected, got);
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// # All matches
+///
+/// The `All` match semantics report any and all matches, and generally will
+/// attempt to match as much as possible. It doesn't respect any sort of match
+/// priority at all, so things like non-greedy matching don't work in this
+/// mode.
+///
+/// The fact that non-greedy matching doesn't work generally makes most forms
+/// of unanchored non-overlapping searches have unintuitive behavior. Namely,
+/// unanchored searches behave as if there is a `(?s-u:.)*?` prefix at the
+/// beginning of the pattern, which is specifically non-greedy. Since it will
+/// be treated as greedy in `All` match semantics, this generally means that
+/// it will first attempt to consume all of the haystack and is likely to wind
+/// up skipping matches.
+///
+/// Generally speaking, `All` should only be used in two circumstances:
+///
+/// * When running an anchored search and there is a desire to match as much as
+/// possible. For example, when building a reverse regex matcher to find the
+/// start of a match after finding the end. In this case, the reverse search
+/// is anchored to the end of the match found by the forward search.
+/// * When running overlapping searches. Since `All` encodes all possible
+/// matches, this is generally what you want for an overlapping search. If you
+/// try to use leftmost-first in an overlapping search, it is likely to produce
+/// counter-intuitive results since leftmost-first specifically excludes some
+/// matches from its underlying finite state machine.
+///
+/// This example demonstrates the counter-intuitive behavior of `All` semantics
+/// when using a standard leftmost unanchored search:
+///
+/// ```
+/// use regex_automata::{
+///     nfa::thompson::pikevm::PikeVM,
+///     Match, MatchKind,
+/// };
+///
+/// let re = PikeVM::builder()
+///     .configure(PikeVM::config().match_kind(MatchKind::All))
+///     .build("foo")?;
+/// let hay = "first foo second foo wat";
+/// let mut cache = re.create_cache();
+/// let got: Vec<Match> = re.find_iter(&mut cache, hay).collect();
+/// // Notice that it completely skips the first 'foo'!
+/// let expected = vec![Match::must(0, 17..20)];
+/// assert_eq!(expected, got);
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// This second example shows how `All` semantics are useful for an overlapping
+/// search. Note that we use lower level lazy DFA APIs here since the NFA
+/// engines only currently support a very limited form of overlapping search.
+///
+/// ```
+/// use regex_automata::{
+///     hybrid::dfa::{DFA, OverlappingState},
+///     HalfMatch, Input, MatchKind,
+/// };
+///
+/// let re = DFA::builder()
+///     // If we didn't set 'All' semantics here, then the regex would only
+///     // match 'foo' at offset 3 and nothing else. Why? Because the state
+///     // machine implements preference order and knows that the 'foofoo' and
+///     // 'foofoofoo' branches can never match since 'foo' will always match
+///     // when they match and take priority.
+///     .configure(DFA::config().match_kind(MatchKind::All))
+///     .build(r"foo|foofoo|foofoofoo")?;
+/// let mut cache = re.create_cache();
+/// let mut state = OverlappingState::start();
+/// let input = Input::new("foofoofoo");
+/// let mut got = vec![];
+/// loop {
+///     re.try_search_overlapping_fwd(&mut cache, &input, &mut state)?;
+///     let m = match state.get_match() {
+///         None => break,
+///         Some(m) => m,
+///     };
+///     got.push(m);
+/// }
+/// let expected = vec![
+///     HalfMatch::must(0, 3),
+///     HalfMatch::must(0, 6),
+///     HalfMatch::must(0, 9),
+/// ];
+/// assert_eq!(expected, got);
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[non_exhaustive]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum MatchKind {
+    /// Report all possible matches.
+    All,
+    /// Report only the leftmost matches. When multiple leftmost matches exist,
+    /// report the match corresponding to the part of the regex that appears
+    /// first in the syntax.
+    LeftmostFirst,
+    // There is prior art in RE2 that shows that we should be able to add
+    // LeftmostLongest too. The tricky part of it is supporting ungreedy
+    // repetitions. Instead of treating all NFA states as having equivalent
+    // priority (as in 'All') or treating all NFA states as having distinct
+    // priority based on order (as in 'LeftmostFirst'), we instead group NFA
+    // states into sets, and treat members of each set as having equivalent
+    // priority, but having greater priority than all following members
+    // of different sets.
+    //
+    // However, it's not clear whether it's really worth adding this. After
+    // all, leftmost-longest can be emulated when using literals by using
+    // leftmost-first and sorting the literals by length in descending order.
+    // However, this won't work for arbitrary regexes. e.g., `\w|\w\w` will
+    // always match `a` in `ab` when using leftmost-first, but leftmost-longest
+    // would match `ab`.
+}
+
+impl MatchKind {
+    #[cfg(feature = "alloc")]
+    pub(crate) fn continue_past_first_match(&self) -> bool {
+        *self == MatchKind::All
+    }
+}
+
+impl Default for MatchKind {
+    fn default() -> MatchKind {
+        MatchKind::LeftmostFirst
+    }
+}
+
+/// An error indicating that a search stopped before reporting whether a
+/// match exists or not.
+///
+/// To be very clear, this error type implies that one cannot assume that no
+/// matches occur, since the search stopped before completing. That is, if
+/// you're looking for information about where a search determined that no
+/// match can occur, then this error type does *not* give you that. (Indeed, at
+/// the time of writing, if you need such a thing, you have to write your own
+/// search routine.)
+///
+/// Normally, when one searches for something, the response is either an
+/// affirmative "it was found at this location" or a negative "not found at
+/// all." However, in some cases, a regex engine can be configured to stop its
+/// search before concluding whether a match exists or not. When this happens,
+/// it may be important for the caller to know why the regex engine gave up and
+/// where in the input it gave up at. This error type exposes the 'why' and the
+/// 'where.'
+///
+/// For example, the DFAs provided by this library generally cannot correctly
+/// implement Unicode word boundaries. Instead, they provide an option to
+/// eagerly support them on ASCII text (since Unicode word boundaries are
+/// equivalent to ASCII word boundaries when searching ASCII text), but will
+/// "give up" if a non-ASCII byte is seen. In such cases, one is usually
+/// required to either report the failure to the caller (unergonomic) or
+/// otherwise fall back to some other regex engine (ergonomic, but potentially
+/// costly).
+///
+/// More generally, some regex engines offer the ability for callers to specify
+/// certain bytes that will trigger the regex engine to automatically quit if
+/// they are seen.
+///
+/// Still yet, there may be other reasons for a failed match. For example,
+/// the hybrid DFA provided by this crate can be configured to give up if it
+/// believes that it is not efficient. This in turn permits callers to choose a
+/// different regex engine.
+///
+/// (Note that DFAs are configured by default to never quit or give up in this
+/// fashion. For example, by default, a DFA will fail to build if the regex
+/// pattern contains a Unicode word boundary. One needs to opt into the "quit"
+/// behavior via options, like
+/// [`hybrid::dfa::Config::unicode_word_boundary`](crate::hybrid::dfa::Config::unicode_word_boundary).)
+///
+/// There are a couple other ways a search
+/// can fail. For example, when using the
+/// [`BoundedBacktracker`](crate::nfa::thompson::backtrack::BoundedBacktracker)
+/// with a haystack that is too long, or trying to run an unanchored search
+/// with a [one-pass DFA](crate::dfa::onepass).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct MatchError(
+    #[cfg(feature = "alloc")] alloc::boxed::Box<MatchErrorKind>,
+    #[cfg(not(feature = "alloc"))] MatchErrorKind,
+);
+
+impl MatchError {
+    /// Create a new error value with the given kind.
+    ///
+    /// This is a more verbose version of the kind-specific constructors,
+    /// e.g., `MatchError::quit`.
+    pub fn new(kind: MatchErrorKind) -> MatchError {
+        #[cfg(feature = "alloc")]
+        {
+            MatchError(alloc::boxed::Box::new(kind))
+        }
+        #[cfg(not(feature = "alloc"))]
+        {
+            MatchError(kind)
+        }
+    }
+
+    /// Returns a reference to the underlying error kind.
+    pub fn kind(&self) -> &MatchErrorKind {
+        &self.0
+    }
+
+    /// Create a new "quit" error. The given `byte` corresponds to the value
+    /// that tripped a search's quit condition, and `offset` corresponds to the
+    /// location in the haystack at which the search quit.
+    ///
+    /// This is the same as calling `MatchError::new` with a
+    /// [`MatchErrorKind::Quit`] kind.
+    pub fn quit(byte: u8, offset: usize) -> MatchError {
+        MatchError::new(MatchErrorKind::Quit { byte, offset })
+    }
+
+    /// Create a new "gave up" error. The given `offset` corresponds to the
+    /// location in the haystack at which the search gave up.
+    ///
+    /// This is the same as calling `MatchError::new` with a
+    /// [`MatchErrorKind::GaveUp`] kind.
+    pub fn gave_up(offset: usize) -> MatchError {
+        MatchError::new(MatchErrorKind::GaveUp { offset })
+    }
+
+    /// Create a new "haystack too long" error. The given `len` corresponds to
+    /// the length of the haystack that was problematic.
+    ///
+    /// This is the same as calling `MatchError::new` with a
+    /// [`MatchErrorKind::HaystackTooLong`] kind.
+    pub fn haystack_too_long(len: usize) -> MatchError {
+        MatchError::new(MatchErrorKind::HaystackTooLong { len })
+    }
+
+    /// Create a new "unsupported anchored" error. This occurs when the caller
+    /// requests a search with an anchor mode that is not supported by the
+    /// regex engine.
+    ///
+    /// This is the same as calling `MatchError::new` with a
+    /// [`MatchErrorKind::UnsupportedAnchored`] kind.
+    pub fn unsupported_anchored(mode: Anchored) -> MatchError {
+        MatchError::new(MatchErrorKind::UnsupportedAnchored { mode })
+    }
+}
+
+/// The underlying kind of a [`MatchError`].
+///
+/// This is a **non-exhaustive** enum. That means new variants may be added in
+/// a semver-compatible release.
+#[non_exhaustive]
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum MatchErrorKind {
+    /// The search saw a "quit" byte at which it was instructed to stop
+    /// searching.
+    Quit {
+        /// The "quit" byte that was observed that caused the search to stop.
+        byte: u8,
+        /// The offset at which the quit byte was observed.
+        offset: usize,
+    },
+    /// The search, based on heuristics, determined that it would be better
+    /// to stop, typically to provide the caller an opportunity to use an
+    /// alternative regex engine.
+    ///
+    /// Currently, the only way for this to occur is via the lazy DFA and
+    /// only when it is configured to do so (it will not return this error by
+    /// default).
+    GaveUp {
+        /// The offset at which the search stopped. This corresponds to the
+        /// position immediately following the last byte scanned.
+        offset: usize,
+    },
+    /// This error occurs if the haystack given to the regex engine was too
+    /// long to be searched. This occurs, for example, with regex engines
+    /// like the bounded backtracker that have a configurable fixed amount of
+    /// capacity that is tied to the length of the haystack. Anything beyond
+    /// that configured limit will result in an error at search time.
+    HaystackTooLong {
+        /// The length of the haystack that exceeded the limit.
+        len: usize,
+    },
+    /// An error indicating that a particular type of anchored search was
+    /// requested, but that the regex engine does not support it.
+    ///
+    /// Note that this error should not be returned by a regex engine simply
+    /// because the pattern ID is invalid (i.e., equal to or exceeds the number
+    /// of patterns in the regex). In that case, the regex engine should report
+    /// a non-match.
+    UnsupportedAnchored {
+        /// The anchored mode given that is unsupported.
+        mode: Anchored,
+    },
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for MatchError {}
+
+impl core::fmt::Display for MatchError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        match *self.kind() {
+            MatchErrorKind::Quit { byte, offset } => write!(
+                f,
+                "quit search after observing byte {:?} at offset {}",
+                DebugByte(byte),
+                offset,
+            ),
+            MatchErrorKind::GaveUp { offset } => {
+                write!(f, "gave up searching at offset {}", offset)
+            }
+            MatchErrorKind::HaystackTooLong { len } => {
+                write!(f, "haystack of length {} is too long", len)
+            }
+            MatchErrorKind::UnsupportedAnchored { mode: Anchored::Yes } => {
+                write!(f, "anchored searches are not supported or enabled")
+            }
+            MatchErrorKind::UnsupportedAnchored { mode: Anchored::No } => {
+                write!(f, "unanchored searches are not supported or enabled")
+            }
+            MatchErrorKind::UnsupportedAnchored {
+                mode: Anchored::Pattern(pid),
+            } => {
+                write!(
+                    f,
+                    "anchored searches for a specific pattern ({}) are \
+                     not supported or enabled",
+                    pid.as_usize(),
+                )
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // We test that our 'MatchError' type is the size we expect. This isn't an
+    // API guarantee, but if the size increases, we really want to make sure we
+    // decide to do that intentionally. So this should be a speed bump. And in
+    // general, we should not increase the size without a very good reason.
+    //
+    // Why? Because low level search APIs return Result<.., MatchError>. When
+    // MatchError gets bigger, so to does the Result type.
+    //
+    // Now, when 'alloc' is enabled, we do box the error, which de-emphasizes
+    // the importance of keeping a small error type. But without 'alloc', we
+    // still want things to be small.
+    #[test]
+    fn match_error_size() {
+        let expected_size = if cfg!(feature = "alloc") {
+            core::mem::size_of::<usize>()
+        } else {
+            2 * core::mem::size_of::<usize>()
+        };
+        assert_eq!(expected_size, core::mem::size_of::<MatchError>());
+    }
+
+    // Same as above, but for the underlying match error kind.
+    #[cfg(target_pointer_width = "64")]
+    #[test]
+    fn match_error_kind_size() {
+        let expected_size = 2 * core::mem::size_of::<usize>();
+        assert_eq!(expected_size, core::mem::size_of::<MatchErrorKind>());
+    }
+
+    #[cfg(target_pointer_width = "32")]
+    #[test]
+    fn match_error_kind_size() {
+        let expected_size = 3 * core::mem::size_of::<usize>();
+        assert_eq!(expected_size, core::mem::size_of::<MatchErrorKind>());
+    }
+}
diff --git a/vendor/regex-automata/src/util/sparse_set.rs b/vendor/regex-automata/src/util/sparse_set.rs
new file mode 100644
index 0000000..cbaa0b6
--- /dev/null
+++ b/vendor/regex-automata/src/util/sparse_set.rs
@@ -0,0 +1,239 @@
+/*!
+This module defines a sparse set data structure. Its most interesting
+properties are:
+
+* They preserve insertion order.
+* Set membership testing is done in constant time.
+* Set insertion is done in constant time.
+* Clearing the set is done in constant time.
+
+The cost for doing this is that the capacity of the set needs to be known up
+front, and the elements in the set are limited to state identifiers.
+
+These sets are principally used when traversing an NFA state graph. This
+happens at search time, for example, in the PikeVM. It also happens during DFA
+determinization.
+*/
+
+use alloc::{vec, vec::Vec};
+
+use crate::util::primitives::StateID;
+
+/// A pairse of sparse sets.
+///
+/// This is useful when one needs to compute NFA epsilon closures from a
+/// previous set of states derived from an epsilon closure. One set can be the
+/// starting states where as the other set can be the destination states after
+/// following the transitions for a particular byte of input.
+///
+/// There is no significance to 'set1' or 'set2'. They are both sparse sets of
+/// the same size.
+///
+/// The members of this struct are exposed so that callers may borrow 'set1'
+/// and 'set2' individually without being force to borrow both at the same
+/// time.
+#[derive(Clone, Debug)]
+pub(crate) struct SparseSets {
+    pub(crate) set1: SparseSet,
+    pub(crate) set2: SparseSet,
+}
+
+impl SparseSets {
+    /// Create a new pair of sparse sets where each set has the given capacity.
+    ///
+    /// This panics if the capacity given is bigger than `StateID::LIMIT`.
+    pub(crate) fn new(capacity: usize) -> SparseSets {
+        SparseSets {
+            set1: SparseSet::new(capacity),
+            set2: SparseSet::new(capacity),
+        }
+    }
+
+    /// Resizes these sparse sets to have the new capacity given.
+    ///
+    /// The sets are automatically cleared.
+    ///
+    /// This panics if the capacity given is bigger than `StateID::LIMIT`.
+    #[inline]
+    pub(crate) fn resize(&mut self, new_capacity: usize) {
+        self.set1.resize(new_capacity);
+        self.set2.resize(new_capacity);
+    }
+
+    /// Clear both sparse sets.
+    pub(crate) fn clear(&mut self) {
+        self.set1.clear();
+        self.set2.clear();
+    }
+
+    /// Swap set1 with set2.
+    pub(crate) fn swap(&mut self) {
+        core::mem::swap(&mut self.set1, &mut self.set2);
+    }
+
+    /// Returns the memory usage, in bytes, used by this pair of sparse sets.
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.set1.memory_usage() + self.set2.memory_usage()
+    }
+}
+
+/// A sparse set used for representing ordered NFA states.
+///
+/// This supports constant time addition and membership testing. Clearing an
+/// entire set can also be done in constant time. Iteration yields elements
+/// in the order in which they were inserted.
+///
+/// The data structure is based on: https://research.swtch.com/sparse
+/// Note though that we don't actually use uninitialized memory. We generally
+/// reuse sparse sets, so the initial allocation cost is bareable. However, its
+/// other properties listed above are extremely useful.
+#[derive(Clone)]
+pub(crate) struct SparseSet {
+    /// The number of elements currently in this set.
+    len: usize,
+    /// Dense contains the ids in the order in which they were inserted.
+    dense: Vec<StateID>,
+    /// Sparse maps ids to their location in dense.
+    ///
+    /// A state ID is in the set if and only if
+    /// sparse[id] < len && id == dense[sparse[id]].
+    ///
+    /// Note that these are indices into 'dense'. It's a little weird to use
+    /// StateID here, but we know our length can never exceed the bounds of
+    /// StateID (enforced by 'resize') and StateID will be at most 4 bytes
+    /// where as a usize is likely double that in most cases.
+    sparse: Vec<StateID>,
+}
+
+impl SparseSet {
+    /// Create a new sparse set with the given capacity.
+    ///
+    /// Sparse sets have a fixed size and they cannot grow. Attempting to
+    /// insert more distinct elements than the total capacity of the set will
+    /// result in a panic.
+    ///
+    /// This panics if the capacity given is bigger than `StateID::LIMIT`.
+    #[inline]
+    pub(crate) fn new(capacity: usize) -> SparseSet {
+        let mut set = SparseSet { len: 0, dense: vec![], sparse: vec![] };
+        set.resize(capacity);
+        set
+    }
+
+    /// Resizes this sparse set to have the new capacity given.
+    ///
+    /// This set is automatically cleared.
+    ///
+    /// This panics if the capacity given is bigger than `StateID::LIMIT`.
+    #[inline]
+    pub(crate) fn resize(&mut self, new_capacity: usize) {
+        assert!(
+            new_capacity <= StateID::LIMIT,
+            "sparse set capacity cannot excced {:?}",
+            StateID::LIMIT
+        );
+        self.clear();
+        self.dense.resize(new_capacity, StateID::ZERO);
+        self.sparse.resize(new_capacity, StateID::ZERO);
+    }
+
+    /// Returns the capacity of this set.
+    ///
+    /// The capacity represents a fixed limit on the number of distinct
+    /// elements that are allowed in this set. The capacity cannot be changed.
+    #[inline]
+    pub(crate) fn capacity(&self) -> usize {
+        self.dense.len()
+    }
+
+    /// Returns the number of elements in this set.
+    #[inline]
+    pub(crate) fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns true if and only if this set is empty.
+    #[inline]
+    pub(crate) fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Insert the state ID value into this set and return true if the given
+    /// state ID was not previously in this set.
+    ///
+    /// This operation is idempotent. If the given value is already in this
+    /// set, then this is a no-op.
+    ///
+    /// If more than `capacity` ids are inserted, then this panics.
+    ///
+    /// This is marked as inline(always) since the compiler won't inline it
+    /// otherwise, and it's a fairly hot piece of code in DFA determinization.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn insert(&mut self, id: StateID) -> bool {
+        if self.contains(id) {
+            return false;
+        }
+
+        let i = self.len();
+        assert!(
+            i < self.capacity(),
+            "{:?} exceeds capacity of {:?} when inserting {:?}",
+            i,
+            self.capacity(),
+            id,
+        );
+        // OK since i < self.capacity() and self.capacity() is guaranteed to
+        // be <= StateID::LIMIT.
+        let index = StateID::new_unchecked(i);
+        self.dense[index] = id;
+        self.sparse[id] = index;
+        self.len += 1;
+        true
+    }
+
+    /// Returns true if and only if this set contains the given value.
+    #[inline]
+    pub(crate) fn contains(&self, id: StateID) -> bool {
+        let index = self.sparse[id];
+        index.as_usize() < self.len() && self.dense[index] == id
+    }
+
+    /// Clear this set such that it has no members.
+    #[inline]
+    pub(crate) fn clear(&mut self) {
+        self.len = 0;
+    }
+
+    #[inline]
+    pub(crate) fn iter(&self) -> SparseSetIter<'_> {
+        SparseSetIter(self.dense[..self.len()].iter())
+    }
+
+    /// Returns the heap memory usage, in bytes, used by this sparse set.
+    #[inline]
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.dense.len() * StateID::SIZE + self.sparse.len() * StateID::SIZE
+    }
+}
+
+impl core::fmt::Debug for SparseSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let elements: Vec<StateID> = self.iter().collect();
+        f.debug_tuple("SparseSet").field(&elements).finish()
+    }
+}
+
+/// An iterator over all elements in a sparse set.
+///
+/// The lifetime `'a` refers to the lifetime of the set being iterated over.
+#[derive(Debug)]
+pub(crate) struct SparseSetIter<'a>(core::slice::Iter<'a, StateID>);
+
+impl<'a> Iterator for SparseSetIter<'a> {
+    type Item = StateID;
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn next(&mut self) -> Option<StateID> {
+        self.0.next().map(|&id| id)
+    }
+}
diff --git a/vendor/regex-automata/src/util/start.rs b/vendor/regex-automata/src/util/start.rs
new file mode 100644
index 0000000..2715378
--- /dev/null
+++ b/vendor/regex-automata/src/util/start.rs
@@ -0,0 +1,479 @@
+/*!
+Provides helpers for dealing with start state configurations in DFAs.
+*/
+
+use crate::util::{
+    look::LookMatcher,
+    search::{Anchored, Input},
+    wire::{self, DeserializeError, SerializeError},
+};
+
+/// The configuration used to determine a DFA's start state for a search.
+///
+/// A DFA has a single starting state in the typical textbook description. That
+/// is, it corresponds to the set of all starting states for the NFA that built
+/// it, along with their espsilon closures. In this crate, however, DFAs have
+/// many possible start states due to a few factors:
+///
+/// * DFAs support the ability to run either anchored or unanchored searches.
+/// Each type of search needs its own start state. For example, an unanchored
+/// search requires starting at a state corresponding to a regex with a
+/// `(?s-u:.)*?` prefix, which will match through anything.
+/// * DFAs also optionally support starting an anchored search for any one
+/// specific pattern. Each such pattern requires its own start state.
+/// * If a look-behind assertion like `^` or `\b` is used in the regex, then
+/// the DFA will need to inspect a single byte immediately before the start of
+/// the search to choose the correct start state.
+///
+/// Indeed, this configuration precisely encapsulates all of the above factors.
+/// The [`Config::anchored`] method sets which kind of anchored search to
+/// perform while the [`Config::look_behind`] method provides a way to set
+/// the byte that occurs immediately before the start of the search.
+///
+/// Generally speaking, this type is only useful when you want to run searches
+/// without using an [`Input`]. In particular, an `Input` wants a haystack
+/// slice, but callers may not have a contiguous sequence of bytes as a
+/// haystack in all cases. This type provides a lower level of control such
+/// that callers can provide their own anchored configuration and look-behind
+/// byte explicitly.
+///
+/// # Example
+///
+/// This shows basic usage that permits running a search with a DFA without
+/// using the `Input` abstraction.
+///
+/// ```
+/// use regex_automata::{
+///     dfa::{Automaton, dense},
+///     util::start,
+///     Anchored,
+/// };
+///
+/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?;
+/// let haystack = "quartz";
+///
+/// let config = start::Config::new().anchored(Anchored::Yes);
+/// let mut state = dfa.start_state(&config)?;
+/// for &b in haystack.as_bytes().iter() {
+///     state = dfa.next_state(state, b);
+/// }
+/// state = dfa.next_eoi_state(state);
+/// assert!(dfa.is_match_state(state));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// This example shows how to correctly run a search that doesn't begin at
+/// the start of a haystack. Notice how we set the look-behind byte, and as
+/// a result, the `\b` assertion does not match.
+///
+/// ```
+/// use regex_automata::{
+///     dfa::{Automaton, dense},
+///     util::start,
+///     Anchored,
+/// };
+///
+/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?;
+/// let haystack = "quartz";
+///
+/// let config = start::Config::new()
+///     .anchored(Anchored::Yes)
+///     .look_behind(Some(b'q'));
+/// let mut state = dfa.start_state(&config)?;
+/// for &b in haystack.as_bytes().iter().skip(1) {
+///     state = dfa.next_state(state, b);
+/// }
+/// state = dfa.next_eoi_state(state);
+/// // No match!
+/// assert!(!dfa.is_match_state(state));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// If we had instead not set a look-behind byte, then the DFA would assume
+/// that it was starting at the beginning of the haystack, and thus `\b` should
+/// match. This in turn would result in erroneously reporting a match:
+///
+/// ```
+/// use regex_automata::{
+///     dfa::{Automaton, dense},
+///     util::start,
+///     Anchored,
+/// };
+///
+/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?;
+/// let haystack = "quartz";
+///
+/// // Whoops, forgot the look-behind byte...
+/// let config = start::Config::new().anchored(Anchored::Yes);
+/// let mut state = dfa.start_state(&config)?;
+/// for &b in haystack.as_bytes().iter().skip(1) {
+///     state = dfa.next_state(state, b);
+/// }
+/// state = dfa.next_eoi_state(state);
+/// // And now we get a match unexpectedly.
+/// assert!(dfa.is_match_state(state));
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+#[derive(Clone, Debug)]
+pub struct Config {
+    look_behind: Option<u8>,
+    anchored: Anchored,
+}
+
+impl Config {
+    /// Create a new default start configuration.
+    ///
+    /// The default is an unanchored search that starts at the beginning of the
+    /// haystack.
+    pub fn new() -> Config {
+        Config { anchored: Anchored::No, look_behind: None }
+    }
+
+    /// A convenience routine for building a start configuration from an
+    /// [`Input`] for a forward search.
+    ///
+    /// This automatically sets the look-behind byte to the byte immediately
+    /// preceding the start of the search. If the start of the search is at
+    /// offset `0`, then no look-behind byte is set.
+    pub fn from_input_forward(input: &Input<'_>) -> Config {
+        let look_behind = input
+            .start()
+            .checked_sub(1)
+            .and_then(|i| input.haystack().get(i).copied());
+        Config { look_behind, anchored: input.get_anchored() }
+    }
+
+    /// A convenience routine for building a start configuration from an
+    /// [`Input`] for a reverse search.
+    ///
+    /// This automatically sets the look-behind byte to the byte immediately
+    /// following the end of the search. If the end of the search is at
+    /// offset `haystack.len()`, then no look-behind byte is set.
+    pub fn from_input_reverse(input: &Input<'_>) -> Config {
+        let look_behind = input.haystack().get(input.end()).copied();
+        Config { look_behind, anchored: input.get_anchored() }
+    }
+
+    /// Set the look-behind byte at the start of a search.
+    ///
+    /// Unless the search is intended to logically start at the beginning of a
+    /// haystack, this should _always_ be set to the byte immediately preceding
+    /// the start of the search. If no look-behind byte is set, then the start
+    /// configuration will assume it is at the beginning of the haystack. For
+    /// example, the anchor `^` will match.
+    ///
+    /// The default is that no look-behind byte is set.
+    pub fn look_behind(mut self, byte: Option<u8>) -> Config {
+        self.look_behind = byte;
+        self
+    }
+
+    /// Set the anchored mode of a search.
+    ///
+    /// The default is an unanchored search.
+    pub fn anchored(mut self, mode: Anchored) -> Config {
+        self.anchored = mode;
+        self
+    }
+
+    /// Return the look-behind byte in this configuration, if one exists.
+    pub fn get_look_behind(&self) -> Option<u8> {
+        self.look_behind
+    }
+
+    /// Return the anchored mode in this configuration.
+    pub fn get_anchored(&self) -> Anchored {
+        self.anchored
+    }
+}
+
+/// A map from every possible byte value to its corresponding starting
+/// configuration.
+///
+/// This map is used in order to lookup the start configuration for a particular
+/// position in a haystack. This start configuration is then used in
+/// combination with things like the anchored mode and pattern ID to fully
+/// determine the start state.
+///
+/// Generally speaking, this map is only used for fully compiled DFAs and lazy
+/// DFAs. For NFAs (including the one-pass DFA), the start state is generally
+/// selected by virtue of traversing the NFA state graph. DFAs do the same
+/// thing, but at build time and not search time. (Well, technically the lazy
+/// DFA does it at search time, but it does enough work to cache the full
+/// result of the epsilon closure that the NFA engines tend to need to do.)
+#[derive(Clone)]
+pub(crate) struct StartByteMap {
+    map: [Start; 256],
+}
+
+impl StartByteMap {
+    /// Create a new map from byte values to their corresponding starting
+    /// configurations. The map is determined, in part, by how look-around
+    /// assertions are matched via the matcher given.
+    pub(crate) fn new(lookm: &LookMatcher) -> StartByteMap {
+        let mut map = [Start::NonWordByte; 256];
+        map[usize::from(b'\n')] = Start::LineLF;
+        map[usize::from(b'\r')] = Start::LineCR;
+        map[usize::from(b'_')] = Start::WordByte;
+
+        let mut byte = b'0';
+        while byte <= b'9' {
+            map[usize::from(byte)] = Start::WordByte;
+            byte += 1;
+        }
+        byte = b'A';
+        while byte <= b'Z' {
+            map[usize::from(byte)] = Start::WordByte;
+            byte += 1;
+        }
+        byte = b'a';
+        while byte <= b'z' {
+            map[usize::from(byte)] = Start::WordByte;
+            byte += 1;
+        }
+
+        let lineterm = lookm.get_line_terminator();
+        // If our line terminator is normal, then it is already handled by
+        // the LineLF and LineCR configurations. But if it's weird, then we
+        // overwrite whatever was there before for that terminator with a
+        // special configuration. The trick here is that if the terminator
+        // is, say, a word byte like `a`, then callers seeing this start
+        // configuration need to account for that and build their DFA state as
+        // if it *also* came from a word byte.
+        if lineterm != b'\r' && lineterm != b'\n' {
+            map[usize::from(lineterm)] = Start::CustomLineTerminator;
+        }
+        StartByteMap { map }
+    }
+
+    /// Return the starting configuration for the given look-behind byte.
+    ///
+    /// If no look-behind exists, callers should use `Start::Text`.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn get(&self, byte: u8) -> Start {
+        self.map[usize::from(byte)]
+    }
+
+    /// Deserializes a byte class map from the given slice. If the slice is of
+    /// insufficient length or otherwise contains an impossible mapping, then
+    /// an error is returned. Upon success, the number of bytes read along with
+    /// the map are returned. The number of bytes read is always a multiple of
+    /// 8.
+    pub(crate) fn from_bytes(
+        slice: &[u8],
+    ) -> Result<(StartByteMap, usize), DeserializeError> {
+        wire::check_slice_len(slice, 256, "start byte map")?;
+        let mut map = [Start::NonWordByte; 256];
+        for (i, &repr) in slice[..256].iter().enumerate() {
+            map[i] = match Start::from_usize(usize::from(repr)) {
+                Some(start) => start,
+                None => {
+                    return Err(DeserializeError::generic(
+                        "found invalid starting configuration",
+                    ))
+                }
+            };
+        }
+        Ok((StartByteMap { map }, 256))
+    }
+
+    /// Writes this map to the given byte buffer. if the given buffer is too
+    /// small, then an error is returned. Upon success, the total number of
+    /// bytes written is returned. The number of bytes written is guaranteed to
+    /// be a multiple of 8.
+    pub(crate) fn write_to(
+        &self,
+        dst: &mut [u8],
+    ) -> Result<usize, SerializeError> {
+        let nwrite = self.write_to_len();
+        if dst.len() < nwrite {
+            return Err(SerializeError::buffer_too_small("start byte map"));
+        }
+        for (i, &start) in self.map.iter().enumerate() {
+            dst[i] = start.as_u8();
+        }
+        Ok(nwrite)
+    }
+
+    /// Returns the total number of bytes written by `write_to`.
+    pub(crate) fn write_to_len(&self) -> usize {
+        256
+    }
+}
+
+impl core::fmt::Debug for StartByteMap {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        use crate::util::escape::DebugByte;
+
+        write!(f, "StartByteMap{{")?;
+        for byte in 0..=255 {
+            if byte > 0 {
+                write!(f, ", ")?;
+            }
+            let start = self.map[usize::from(byte)];
+            write!(f, "{:?} => {:?}", DebugByte(byte), start)?;
+        }
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
+
+/// Represents the six possible starting configurations of a DFA search.
+///
+/// The starting configuration is determined by inspecting the the beginning
+/// of the haystack (up to 1 byte). Ultimately, this along with a pattern ID
+/// (if specified) and the type of search (anchored or not) is what selects the
+/// start state to use in a DFA.
+///
+/// As one example, if a DFA only supports unanchored searches and does not
+/// support anchored searches for each pattern, then it will have at most 6
+/// distinct start states. (Some start states may be reused if determinization
+/// can determine that they will be equivalent.) If the DFA supports both
+/// anchored and unanchored searches, then it will have a maximum of 12
+/// distinct start states. Finally, if the DFA also supports anchored searches
+/// for each pattern, then it can have up to `12 + (N * 6)` start states, where
+/// `N` is the number of patterns.
+///
+/// Handling each of these starting configurations in the context of DFA
+/// determinization can be *quite* tricky and subtle. But the code is small
+/// and can be found at `crate::util::determinize::set_lookbehind_from_start`.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(crate) enum Start {
+    /// This occurs when the starting position is not any of the ones below.
+    NonWordByte = 0,
+    /// This occurs when the byte immediately preceding the start of the search
+    /// is an ASCII word byte.
+    WordByte = 1,
+    /// This occurs when the starting position of the search corresponds to the
+    /// beginning of the haystack.
+    Text = 2,
+    /// This occurs when the byte immediately preceding the start of the search
+    /// is a line terminator. Specifically, `\n`.
+    LineLF = 3,
+    /// This occurs when the byte immediately preceding the start of the search
+    /// is a line terminator. Specifically, `\r`.
+    LineCR = 4,
+    /// This occurs when a custom line terminator has been set via a
+    /// `LookMatcher`, and when that line terminator is neither a `\r` or a
+    /// `\n`.
+    ///
+    /// If the custom line terminator is a word byte, then this start
+    /// configuration is still selected. DFAs that implement word boundary
+    /// assertions will likely need to check whether the custom line terminator
+    /// is a word byte, in which case, it should behave as if the byte
+    /// satisfies `\b` in addition to multi-line anchors.
+    CustomLineTerminator = 5,
+}
+
+impl Start {
+    /// Return the starting state corresponding to the given integer. If no
+    /// starting state exists for the given integer, then None is returned.
+    pub(crate) fn from_usize(n: usize) -> Option<Start> {
+        match n {
+            0 => Some(Start::NonWordByte),
+            1 => Some(Start::WordByte),
+            2 => Some(Start::Text),
+            3 => Some(Start::LineLF),
+            4 => Some(Start::LineCR),
+            5 => Some(Start::CustomLineTerminator),
+            _ => None,
+        }
+    }
+
+    /// Returns the total number of starting state configurations.
+    pub(crate) fn len() -> usize {
+        6
+    }
+
+    /// Return this starting configuration as `u8` integer. It is guaranteed to
+    /// be less than `Start::len()`.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn as_u8(&self) -> u8 {
+        // AFAIK, 'as' is the only way to zero-cost convert an int enum to an
+        // actual int.
+        *self as u8
+    }
+
+    /// Return this starting configuration as a `usize` integer. It is
+    /// guaranteed to be less than `Start::len()`.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    pub(crate) fn as_usize(&self) -> usize {
+        usize::from(self.as_u8())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn start_fwd_done_range() {
+        let smap = StartByteMap::new(&LookMatcher::default());
+        let input = Input::new("").range(1..0);
+        let config = Config::from_input_forward(&input);
+        let start =
+            config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
+        assert_eq!(Start::Text, start);
+    }
+
+    #[test]
+    fn start_rev_done_range() {
+        let smap = StartByteMap::new(&LookMatcher::default());
+        let input = Input::new("").range(1..0);
+        let config = Config::from_input_reverse(&input);
+        let start =
+            config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
+        assert_eq!(Start::Text, start);
+    }
+
+    #[test]
+    fn start_fwd() {
+        let f = |haystack, start, end| {
+            let smap = StartByteMap::new(&LookMatcher::default());
+            let input = Input::new(haystack).range(start..end);
+            let config = Config::from_input_forward(&input);
+            let start =
+                config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
+            start
+        };
+
+        assert_eq!(Start::Text, f("", 0, 0));
+        assert_eq!(Start::Text, f("abc", 0, 3));
+        assert_eq!(Start::Text, f("\nabc", 0, 3));
+
+        assert_eq!(Start::LineLF, f("\nabc", 1, 3));
+
+        assert_eq!(Start::LineCR, f("\rabc", 1, 3));
+
+        assert_eq!(Start::WordByte, f("abc", 1, 3));
+
+        assert_eq!(Start::NonWordByte, f(" abc", 1, 3));
+    }
+
+    #[test]
+    fn start_rev() {
+        let f = |haystack, start, end| {
+            let smap = StartByteMap::new(&LookMatcher::default());
+            let input = Input::new(haystack).range(start..end);
+            let config = Config::from_input_reverse(&input);
+            let start =
+                config.get_look_behind().map_or(Start::Text, |b| smap.get(b));
+            start
+        };
+
+        assert_eq!(Start::Text, f("", 0, 0));
+        assert_eq!(Start::Text, f("abc", 0, 3));
+        assert_eq!(Start::Text, f("abc\n", 0, 4));
+
+        assert_eq!(Start::LineLF, f("abc\nz", 0, 3));
+
+        assert_eq!(Start::LineCR, f("abc\rz", 0, 3));
+
+        assert_eq!(Start::WordByte, f("abc", 0, 2));
+
+        assert_eq!(Start::NonWordByte, f("abc ", 0, 3));
+    }
+}
diff --git a/vendor/regex-automata/src/util/syntax.rs b/vendor/regex-automata/src/util/syntax.rs
new file mode 100644
index 0000000..78e3cf9
--- /dev/null
+++ b/vendor/regex-automata/src/util/syntax.rs
@@ -0,0 +1,482 @@
+/*!
+Utilities for dealing with the syntax of a regular expression.
+
+This module currently only exposes a [`Config`] type that
+itself represents a wrapper around the configuration for a
+[`regex-syntax::ParserBuilder`](regex_syntax::ParserBuilder). The purpose of
+this wrapper is to make configuring syntax options very similar to how other
+configuration is done throughout this crate. Namely, instead of duplicating
+syntax options across every builder (of which there are many), we instead
+create small config objects like this one that can be passed around and
+composed.
+*/
+
+use alloc::{vec, vec::Vec};
+
+use regex_syntax::{
+    ast,
+    hir::{self, Hir},
+    Error, ParserBuilder,
+};
+
+/// A convenience routine for parsing a pattern into an HIR value with the
+/// default configuration.
+///
+/// # Example
+///
+/// This shows how to parse a pattern into an HIR value:
+///
+/// ```
+/// use regex_automata::util::syntax;
+///
+/// let hir = syntax::parse(r"([a-z]+)|([0-9]+)")?;
+/// assert_eq!(Some(1), hir.properties().static_explicit_captures_len());
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+pub fn parse(pattern: &str) -> Result<Hir, Error> {
+    parse_with(pattern, &Config::default())
+}
+
+/// A convenience routine for parsing many patterns into HIR value with the
+/// default configuration.
+///
+/// # Example
+///
+/// This shows how to parse many patterns into an corresponding HIR values:
+///
+/// ```
+/// use {
+///     regex_automata::util::syntax,
+///     regex_syntax::hir::Properties,
+/// };
+///
+/// let hirs = syntax::parse_many(&[
+///     r"([a-z]+)|([0-9]+)",
+///     r"foo(A-Z]+)bar",
+/// ])?;
+/// let props = Properties::union(hirs.iter().map(|h| h.properties()));
+/// assert_eq!(Some(1), props.static_explicit_captures_len());
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+pub fn parse_many<P: AsRef<str>>(patterns: &[P]) -> Result<Vec<Hir>, Error> {
+    parse_many_with(patterns, &Config::default())
+}
+
+/// A convenience routine for parsing a pattern into an HIR value using a
+/// `Config`.
+///
+/// # Example
+///
+/// This shows how to parse a pattern into an HIR value with a non-default
+/// configuration:
+///
+/// ```
+/// use regex_automata::util::syntax;
+///
+/// let hir = syntax::parse_with(
+///     r"^[a-z]+$",
+///     &syntax::Config::new().multi_line(true).crlf(true),
+/// )?;
+/// assert!(hir.properties().look_set().contains_anchor_crlf());
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+pub fn parse_with(pattern: &str, config: &Config) -> Result<Hir, Error> {
+    let mut builder = ParserBuilder::new();
+    config.apply(&mut builder);
+    builder.build().parse(pattern)
+}
+
+/// A convenience routine for parsing many patterns into HIR values using a
+/// `Config`.
+///
+/// # Example
+///
+/// This shows how to parse many patterns into an corresponding HIR values
+/// with a non-default configuration:
+///
+/// ```
+/// use {
+///     regex_automata::util::syntax,
+///     regex_syntax::hir::Properties,
+/// };
+///
+/// let patterns = &[
+///     r"([a-z]+)|([0-9]+)",
+///     r"\W",
+///     r"foo(A-Z]+)bar",
+/// ];
+/// let config = syntax::Config::new().unicode(false).utf8(false);
+/// let hirs = syntax::parse_many_with(patterns, &config)?;
+/// let props = Properties::union(hirs.iter().map(|h| h.properties()));
+/// assert!(!props.is_utf8());
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+pub fn parse_many_with<P: AsRef<str>>(
+    patterns: &[P],
+    config: &Config,
+) -> Result<Vec<Hir>, Error> {
+    let mut builder = ParserBuilder::new();
+    config.apply(&mut builder);
+    let mut hirs = vec![];
+    for p in patterns.iter() {
+        hirs.push(builder.build().parse(p.as_ref())?);
+    }
+    Ok(hirs)
+}
+
+/// A common set of configuration options that apply to the syntax of a regex.
+///
+/// This represents a group of configuration options that specifically apply
+/// to how the concrete syntax of a regular expression is interpreted. In
+/// particular, they are generally forwarded to the
+/// [`ParserBuilder`](https://docs.rs/regex-syntax/*/regex_syntax/struct.ParserBuilder.html)
+/// in the
+/// [`regex-syntax`](https://docs.rs/regex-syntax)
+/// crate when building a regex from its concrete syntax directly.
+///
+/// These options are defined as a group since they apply to every regex engine
+/// in this crate. Instead of re-defining them on every engine's builder, they
+/// are instead provided here as one cohesive unit.
+#[derive(Clone, Copy, Debug)]
+pub struct Config {
+    case_insensitive: bool,
+    multi_line: bool,
+    dot_matches_new_line: bool,
+    crlf: bool,
+    line_terminator: u8,
+    swap_greed: bool,
+    ignore_whitespace: bool,
+    unicode: bool,
+    utf8: bool,
+    nest_limit: u32,
+    octal: bool,
+}
+
+impl Config {
+    /// Return a new default syntax configuration.
+    pub fn new() -> Config {
+        // These defaults match the ones used in regex-syntax.
+        Config {
+            case_insensitive: false,
+            multi_line: false,
+            dot_matches_new_line: false,
+            crlf: false,
+            line_terminator: b'\n',
+            swap_greed: false,
+            ignore_whitespace: false,
+            unicode: true,
+            utf8: true,
+            nest_limit: 250,
+            octal: false,
+        }
+    }
+
+    /// Enable or disable the case insensitive flag by default.
+    ///
+    /// When Unicode mode is enabled, case insensitivity is Unicode-aware.
+    /// Specifically, it will apply the "simple" case folding rules as
+    /// specified by Unicode.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `i` flag.
+    pub fn case_insensitive(mut self, yes: bool) -> Config {
+        self.case_insensitive = yes;
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag by default.
+    ///
+    /// When this is enabled, the `^` and `$` look-around assertions will
+    /// match immediately after and immediately before a new line character,
+    /// respectively. Note that the `\A` and `\z` look-around assertions are
+    /// unaffected by this setting and always correspond to matching at the
+    /// beginning and end of the input.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `m` flag.
+    pub fn multi_line(mut self, yes: bool) -> Config {
+        self.multi_line = yes;
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag by default.
+    ///
+    /// When this is enabled, `.` will match any character. When it's disabled,
+    /// then `.` will match any character except for a new line character.
+    ///
+    /// Note that `.` is impacted by whether the "unicode" setting is enabled
+    /// or not. When Unicode is enabled (the default), `.` will match any UTF-8
+    /// encoding of any Unicode scalar value (sans a new line, depending on
+    /// whether this "dot matches new line" option is enabled). When Unicode
+    /// mode is disabled, `.` will match any byte instead. Because of this,
+    /// when Unicode mode is disabled, `.` can only be used when the "allow
+    /// invalid UTF-8" option is enabled, since `.` could otherwise match
+    /// invalid UTF-8.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `s` flag.
+    pub fn dot_matches_new_line(mut self, yes: bool) -> Config {
+        self.dot_matches_new_line = yes;
+        self
+    }
+
+    /// Enable or disable the "CRLF mode" flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `R` flag.
+    ///
+    /// When CRLF mode is enabled, the following happens:
+    ///
+    /// * Unless `dot_matches_new_line` is enabled, `.` will match any character
+    /// except for `\r` and `\n`.
+    /// * When `multi_line` mode is enabled, `^` and `$` will treat `\r\n`,
+    /// `\r` and `\n` as line terminators. And in particular, neither will
+    /// match between a `\r` and a `\n`.
+    pub fn crlf(mut self, yes: bool) -> Config {
+        self.crlf = yes;
+        self
+    }
+
+    /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
+    ///
+    /// Namely, instead of `.` (by default) matching everything except for `\n`,
+    /// this will cause `.` to match everything except for the byte given.
+    ///
+    /// If `.` is used in a context where Unicode mode is enabled and this byte
+    /// isn't ASCII, then an error will be returned. When Unicode mode is
+    /// disabled, then any byte is permitted, but will return an error if UTF-8
+    /// mode is enabled and it is a non-ASCII byte.
+    ///
+    /// In short, any ASCII value for a line terminator is always okay. But a
+    /// non-ASCII byte might result in an error depending on whether Unicode
+    /// mode or UTF-8 mode are enabled.
+    ///
+    /// Note that if `R` mode is enabled then it always takes precedence and
+    /// the line terminator will be treated as `\r` and `\n` simultaneously.
+    ///
+    /// Note also that this *doesn't* impact the look-around assertions
+    /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
+    /// configuration in the regex engine itself.
+    pub fn line_terminator(mut self, byte: u8) -> Config {
+        self.line_terminator = byte;
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag by default.
+    ///
+    /// When this is enabled, `.*` (for example) will become ungreedy and `.*?`
+    /// will become greedy.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `U` flag.
+    pub fn swap_greed(mut self, yes: bool) -> Config {
+        self.swap_greed = yes;
+        self
+    }
+
+    /// Enable verbose mode in the regular expression.
+    ///
+    /// When enabled, verbose mode permits insigificant whitespace in many
+    /// places in the regular expression, as well as comments. Comments are
+    /// started using `#` and continue until the end of the line.
+    ///
+    /// By default, this is disabled. It may be selectively enabled in the
+    /// regular expression by using the `x` flag regardless of this setting.
+    pub fn ignore_whitespace(mut self, yes: bool) -> Config {
+        self.ignore_whitespace = yes;
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    ///
+    /// By default this is **enabled**. It may alternatively be selectively
+    /// disabled in the regular expression itself via the `u` flag.
+    ///
+    /// Note that unless "allow invalid UTF-8" is enabled (it's disabled by
+    /// default), a regular expression will fail to parse if Unicode mode is
+    /// disabled and a sub-expression could possibly match invalid UTF-8.
+    ///
+    /// **WARNING**: Unicode mode can greatly increase the size of the compiled
+    /// DFA, which can noticeably impact both memory usage and compilation
+    /// time. This is especially noticeable if your regex contains character
+    /// classes like `\w` that are impacted by whether Unicode is enabled or
+    /// not. If Unicode is not necessary, you are encouraged to disable it.
+    pub fn unicode(mut self, yes: bool) -> Config {
+        self.unicode = yes;
+        self
+    }
+
+    /// When disabled, the builder will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// For example, when [`Config::unicode`] is disabled, then
+    /// expressions like `[^a]` may match invalid UTF-8 since they can match
+    /// any single byte that is not `a`. By default, these sub-expressions
+    /// are disallowed to avoid returning offsets that split a UTF-8
+    /// encoded codepoint. However, in cases where matching at arbitrary
+    /// locations is desired, this option can be disabled to permit all such
+    /// sub-expressions.
+    ///
+    /// When enabled (the default), the builder is guaranteed to produce a
+    /// regex that will only ever match valid UTF-8 (otherwise, the builder
+    /// will return an error).
+    pub fn utf8(mut self, yes: bool) -> Config {
+        self.utf8 = yes;
+        self
+    }
+
+    /// Set the nesting limit used for the regular expression parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow when building a finite automaton from a regular expression's
+    /// abstract syntax tree. In particular, construction currently uses
+    /// recursion. In the future, the implementation may stop using recursion
+    /// and this option will no longer be necessary.
+    ///
+    /// This limit is not checked until the entire AST is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since the parser will
+    /// limit itself to heap space proportional to the length of the pattern
+    /// string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation AST item, which results
+    /// in a nest depth of `1`. In general, a nest limit is not something that
+    /// manifests in an obvious way in the concrete syntax, therefore, it
+    /// should not be used in a granular way.
+    pub fn nest_limit(mut self, limit: u32) -> Config {
+        self.nest_limit = limit;
+        self
+    }
+
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\1` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(mut self, yes: bool) -> Config {
+        self.octal = yes;
+        self
+    }
+
+    /// Returns whether "unicode" mode is enabled.
+    pub fn get_unicode(&self) -> bool {
+        self.unicode
+    }
+
+    /// Returns whether "case insensitive" mode is enabled.
+    pub fn get_case_insensitive(&self) -> bool {
+        self.case_insensitive
+    }
+
+    /// Returns whether "multi line" mode is enabled.
+    pub fn get_multi_line(&self) -> bool {
+        self.multi_line
+    }
+
+    /// Returns whether "dot matches new line" mode is enabled.
+    pub fn get_dot_matches_new_line(&self) -> bool {
+        self.dot_matches_new_line
+    }
+
+    /// Returns whether "CRLF" mode is enabled.
+    pub fn get_crlf(&self) -> bool {
+        self.crlf
+    }
+
+    /// Returns the line terminator in this syntax configuration.
+    pub fn get_line_terminator(&self) -> u8 {
+        self.line_terminator
+    }
+
+    /// Returns whether "swap greed" mode is enabled.
+    pub fn get_swap_greed(&self) -> bool {
+        self.swap_greed
+    }
+
+    /// Returns whether "ignore whitespace" mode is enabled.
+    pub fn get_ignore_whitespace(&self) -> bool {
+        self.ignore_whitespace
+    }
+
+    /// Returns whether UTF-8 mode is enabled.
+    pub fn get_utf8(&self) -> bool {
+        self.utf8
+    }
+
+    /// Returns the "nest limit" setting.
+    pub fn get_nest_limit(&self) -> u32 {
+        self.nest_limit
+    }
+
+    /// Returns whether "octal" mode is enabled.
+    pub fn get_octal(&self) -> bool {
+        self.octal
+    }
+
+    /// Applies this configuration to the given parser.
+    pub(crate) fn apply(&self, builder: &mut ParserBuilder) {
+        builder
+            .unicode(self.unicode)
+            .case_insensitive(self.case_insensitive)
+            .multi_line(self.multi_line)
+            .dot_matches_new_line(self.dot_matches_new_line)
+            .crlf(self.crlf)
+            .line_terminator(self.line_terminator)
+            .swap_greed(self.swap_greed)
+            .ignore_whitespace(self.ignore_whitespace)
+            .utf8(self.utf8)
+            .nest_limit(self.nest_limit)
+            .octal(self.octal);
+    }
+
+    /// Applies this configuration to the given AST parser.
+    pub(crate) fn apply_ast(&self, builder: &mut ast::parse::ParserBuilder) {
+        builder
+            .ignore_whitespace(self.ignore_whitespace)
+            .nest_limit(self.nest_limit)
+            .octal(self.octal);
+    }
+
+    /// Applies this configuration to the given AST-to-HIR translator.
+    pub(crate) fn apply_hir(
+        &self,
+        builder: &mut hir::translate::TranslatorBuilder,
+    ) {
+        builder
+            .unicode(self.unicode)
+            .case_insensitive(self.case_insensitive)
+            .multi_line(self.multi_line)
+            .crlf(self.crlf)
+            .dot_matches_new_line(self.dot_matches_new_line)
+            .line_terminator(self.line_terminator)
+            .swap_greed(self.swap_greed)
+            .utf8(self.utf8);
+    }
+}
+
+impl Default for Config {
+    fn default() -> Config {
+        Config::new()
+    }
+}
diff --git a/vendor/regex-automata/src/util/unicode_data/mod.rs b/vendor/regex-automata/src/util/unicode_data/mod.rs
new file mode 100644
index 0000000..fc7b1c7
--- /dev/null
+++ b/vendor/regex-automata/src/util/unicode_data/mod.rs
@@ -0,0 +1,17 @@
+// This cfg should match the one in src/util/look.rs that uses perl_word.
+#[cfg(all(
+    // We have to explicitly want to support Unicode word boundaries.
+    feature = "unicode-word-boundary",
+    not(all(
+        // If we don't have regex-syntax at all, then we definitely need to
+        // bring our own \w data table.
+        feature = "syntax",
+        // If unicode-perl is enabled, then regex-syntax/unicode-perl is
+        // also enabled, which in turn means we can use regex-syntax's
+        // is_word_character routine (and thus use its data tables). But if
+        // unicode-perl is not enabled, even if syntax is, then we need to
+        // bring our own.
+        feature = "unicode-perl",
+    )),
+))]
+pub(crate) mod perl_word;
diff --git a/vendor/regex-automata/src/util/unicode_data/perl_word.rs b/vendor/regex-automata/src/util/unicode_data/perl_word.rs
new file mode 100644
index 0000000..74d6265
--- /dev/null
+++ b/vendor/regex-automata/src/util/unicode_data/perl_word.rs
@@ -0,0 +1,781 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//   ucd-generate perl-word tmp/ucd-15.0.0/ --chars
+//
+// Unicode version: 15.0.0.
+//
+// ucd-generate 0.2.15 is available on crates.io.
+
+pub const PERL_WORD: &'static [(char, char)] = &[
+    ('0', '9'),
+    ('A', 'Z'),
+    ('_', '_'),
+    ('a', 'z'),
+    ('ª', 'ª'),
+    ('µ', 'µ'),
+    ('º', 'º'),
+    ('À', 'Ö'),
+    ('Ø', 'ö'),
+    ('ø', 'ˁ'),
+    ('ˆ', 'ˑ'),
+    ('ˠ', 'ˤ'),
+    ('ˬ', 'ˬ'),
+    ('ˮ', 'ˮ'),
+    ('\u{300}', 'ʹ'),
+    ('Ͷ', 'ͷ'),
+    ('ͺ', 'ͽ'),
+    ('Ϳ', 'Ϳ'),
+    ('Ά', 'Ά'),
+    ('Έ', 'Ί'),
+    ('Ό', 'Ό'),
+    ('Ύ', 'Ρ'),
+    ('Σ', 'ϵ'),
+    ('Ϸ', 'ҁ'),
+    ('\u{483}', 'ԯ'),
+    ('Ա', 'Ֆ'),
+    ('ՙ', 'ՙ'),
+    ('ՠ', 'ֈ'),
+    ('\u{591}', '\u{5bd}'),
+    ('\u{5bf}', '\u{5bf}'),
+    ('\u{5c1}', '\u{5c2}'),
+    ('\u{5c4}', '\u{5c5}'),
+    ('\u{5c7}', '\u{5c7}'),
+    ('א', 'ת'),
+    ('ׯ', 'ײ'),
+    ('\u{610}', '\u{61a}'),
+    ('ؠ', '٩'),
+    ('ٮ', 'ۓ'),
+    ('ە', '\u{6dc}'),
+    ('\u{6df}', '\u{6e8}'),
+    ('\u{6ea}', 'ۼ'),
+    ('ۿ', 'ۿ'),
+    ('ܐ', '\u{74a}'),
+    ('ݍ', 'ޱ'),
+    ('߀', 'ߵ'),
+    ('ߺ', 'ߺ'),
+    ('\u{7fd}', '\u{7fd}'),
+    ('ࠀ', '\u{82d}'),
+    ('ࡀ', '\u{85b}'),
+    ('ࡠ', 'ࡪ'),
+    ('ࡰ', 'ࢇ'),
+    ('ࢉ', 'ࢎ'),
+    ('\u{898}', '\u{8e1}'),
+    ('\u{8e3}', '\u{963}'),
+    ('०', '९'),
+    ('ॱ', 'ঃ'),
+    ('অ', 'ঌ'),
+    ('এ', 'ঐ'),
+    ('ও', 'ন'),
+    ('প', 'র'),
+    ('ল', 'ল'),
+    ('শ', 'হ'),
+    ('\u{9bc}', '\u{9c4}'),
+    ('ে', 'ৈ'),
+    ('ো', 'ৎ'),
+    ('\u{9d7}', '\u{9d7}'),
+    ('ড়', 'ঢ়'),
+    ('য়', '\u{9e3}'),
+    ('০', 'ৱ'),
+    ('ৼ', 'ৼ'),
+    ('\u{9fe}', '\u{9fe}'),
+    ('\u{a01}', 'ਃ'),
+    ('ਅ', 'ਊ'),
+    ('ਏ', 'ਐ'),
+    ('ਓ', 'ਨ'),
+    ('ਪ', 'ਰ'),
+    ('ਲ', 'ਲ਼'),
+    ('ਵ', 'ਸ਼'),
+    ('ਸ', 'ਹ'),
+    ('\u{a3c}', '\u{a3c}'),
+    ('ਾ', '\u{a42}'),
+    ('\u{a47}', '\u{a48}'),
+    ('\u{a4b}', '\u{a4d}'),
+    ('\u{a51}', '\u{a51}'),
+    ('ਖ਼', 'ੜ'),
+    ('ਫ਼', 'ਫ਼'),
+    ('੦', '\u{a75}'),
+    ('\u{a81}', 'ઃ'),
+    ('અ', 'ઍ'),
+    ('એ', 'ઑ'),
+    ('ઓ', 'ન'),
+    ('પ', 'ર'),
+    ('લ', 'ળ'),
+    ('વ', 'હ'),
+    ('\u{abc}', '\u{ac5}'),
+    ('\u{ac7}', 'ૉ'),
+    ('ો', '\u{acd}'),
+    ('ૐ', 'ૐ'),
+    ('ૠ', '\u{ae3}'),
+    ('૦', '૯'),
+    ('ૹ', '\u{aff}'),
+    ('\u{b01}', 'ଃ'),
+    ('ଅ', 'ଌ'),
+    ('ଏ', 'ଐ'),
+    ('ଓ', 'ନ'),
+    ('ପ', 'ର'),
+    ('ଲ', 'ଳ'),
+    ('ଵ', 'ହ'),
+    ('\u{b3c}', '\u{b44}'),
+    ('େ', 'ୈ'),
+    ('ୋ', '\u{b4d}'),
+    ('\u{b55}', '\u{b57}'),
+    ('ଡ଼', 'ଢ଼'),
+    ('ୟ', '\u{b63}'),
+    ('୦', '୯'),
+    ('ୱ', 'ୱ'),
+    ('\u{b82}', 'ஃ'),
+    ('அ', 'ஊ'),
+    ('எ', 'ஐ'),
+    ('ஒ', 'க'),
+    ('ங', 'ச'),
+    ('ஜ', 'ஜ'),
+    ('ஞ', 'ட'),
+    ('ண', 'த'),
+    ('ந', 'ப'),
+    ('ம', 'ஹ'),
+    ('\u{bbe}', 'ூ'),
+    ('ெ', 'ை'),
+    ('ொ', '\u{bcd}'),
+    ('ௐ', 'ௐ'),
+    ('\u{bd7}', '\u{bd7}'),
+    ('௦', '௯'),
+    ('\u{c00}', 'ఌ'),
+    ('ఎ', 'ఐ'),
+    ('ఒ', 'న'),
+    ('ప', 'హ'),
+    ('\u{c3c}', 'ౄ'),
+    ('\u{c46}', '\u{c48}'),
+    ('\u{c4a}', '\u{c4d}'),
+    ('\u{c55}', '\u{c56}'),
+    ('ౘ', 'ౚ'),
+    ('ౝ', 'ౝ'),
+    ('ౠ', '\u{c63}'),
+    ('౦', '౯'),
+    ('ಀ', 'ಃ'),
+    ('ಅ', 'ಌ'),
+    ('ಎ', 'ಐ'),
+    ('ಒ', 'ನ'),
+    ('ಪ', 'ಳ'),
+    ('ವ', 'ಹ'),
+    ('\u{cbc}', 'ೄ'),
+    ('\u{cc6}', 'ೈ'),
+    ('ೊ', '\u{ccd}'),
+    ('\u{cd5}', '\u{cd6}'),
+    ('ೝ', 'ೞ'),
+    ('ೠ', '\u{ce3}'),
+    ('೦', '೯'),
+    ('ೱ', 'ೳ'),
+    ('\u{d00}', 'ഌ'),
+    ('എ', 'ഐ'),
+    ('ഒ', '\u{d44}'),
+    ('െ', 'ൈ'),
+    ('ൊ', 'ൎ'),
+    ('ൔ', '\u{d57}'),
+    ('ൟ', '\u{d63}'),
+    ('൦', '൯'),
+    ('ൺ', 'ൿ'),
+    ('\u{d81}', 'ඃ'),
+    ('අ', 'ඖ'),
+    ('ක', 'න'),
+    ('ඳ', 'ර'),
+    ('ල', 'ල'),
+    ('ව', 'ෆ'),
+    ('\u{dca}', '\u{dca}'),
+    ('\u{dcf}', '\u{dd4}'),
+    ('\u{dd6}', '\u{dd6}'),
+    ('ෘ', '\u{ddf}'),
+    ('෦', '෯'),
+    ('ෲ', 'ෳ'),
+    ('ก', '\u{e3a}'),
+    ('เ', '\u{e4e}'),
+    ('๐', '๙'),
+    ('ກ', 'ຂ'),
+    ('ຄ', 'ຄ'),
+    ('ຆ', 'ຊ'),
+    ('ຌ', 'ຣ'),
+    ('ລ', 'ລ'),
+    ('ວ', 'ຽ'),
+    ('ເ', 'ໄ'),
+    ('ໆ', 'ໆ'),
+    ('\u{ec8}', '\u{ece}'),
+    ('໐', '໙'),
+    ('ໜ', 'ໟ'),
+    ('ༀ', 'ༀ'),
+    ('\u{f18}', '\u{f19}'),
+    ('༠', '༩'),
+    ('\u{f35}', '\u{f35}'),
+    ('\u{f37}', '\u{f37}'),
+    ('\u{f39}', '\u{f39}'),
+    ('༾', 'ཇ'),
+    ('ཉ', 'ཬ'),
+    ('\u{f71}', '\u{f84}'),
+    ('\u{f86}', '\u{f97}'),
+    ('\u{f99}', '\u{fbc}'),
+    ('\u{fc6}', '\u{fc6}'),
+    ('က', '၉'),
+    ('ၐ', '\u{109d}'),
+    ('Ⴀ', 'Ⴥ'),
+    ('Ⴧ', 'Ⴧ'),
+    ('Ⴭ', 'Ⴭ'),
+    ('ა', 'ჺ'),
+    ('ჼ', 'ቈ'),
+    ('ቊ', 'ቍ'),
+    ('ቐ', 'ቖ'),
+    ('ቘ', 'ቘ'),
+    ('ቚ', 'ቝ'),
+    ('በ', 'ኈ'),
+    ('ኊ', 'ኍ'),
+    ('ነ', 'ኰ'),
+    ('ኲ', 'ኵ'),
+    ('ኸ', 'ኾ'),
+    ('ዀ', 'ዀ'),
+    ('ዂ', 'ዅ'),
+    ('ወ', 'ዖ'),
+    ('ዘ', 'ጐ'),
+    ('ጒ', 'ጕ'),
+    ('ጘ', 'ፚ'),
+    ('\u{135d}', '\u{135f}'),
+    ('ᎀ', 'ᎏ'),
+    ('Ꭰ', 'Ᏽ'),
+    ('ᏸ', 'ᏽ'),
+    ('ᐁ', 'ᙬ'),
+    ('ᙯ', 'ᙿ'),
+    ('ᚁ', 'ᚚ'),
+    ('ᚠ', 'ᛪ'),
+    ('ᛮ', 'ᛸ'),
+    ('ᜀ', '᜕'),
+    ('ᜟ', '᜴'),
+    ('ᝀ', '\u{1753}'),
+    ('ᝠ', 'ᝬ'),
+    ('ᝮ', 'ᝰ'),
+    ('\u{1772}', '\u{1773}'),
+    ('ក', '\u{17d3}'),
+    ('ៗ', 'ៗ'),
+    ('ៜ', '\u{17dd}'),
+    ('០', '៩'),
+    ('\u{180b}', '\u{180d}'),
+    ('\u{180f}', '᠙'),
+    ('ᠠ', 'ᡸ'),
+    ('ᢀ', 'ᢪ'),
+    ('ᢰ', 'ᣵ'),
+    ('ᤀ', 'ᤞ'),
+    ('\u{1920}', 'ᤫ'),
+    ('ᤰ', '\u{193b}'),
+    ('᥆', 'ᥭ'),
+    ('ᥰ', 'ᥴ'),
+    ('ᦀ', 'ᦫ'),
+    ('ᦰ', 'ᧉ'),
+    ('᧐', '᧙'),
+    ('ᨀ', '\u{1a1b}'),
+    ('ᨠ', '\u{1a5e}'),
+    ('\u{1a60}', '\u{1a7c}'),
+    ('\u{1a7f}', '᪉'),
+    ('᪐', '᪙'),
+    ('ᪧ', 'ᪧ'),
+    ('\u{1ab0}', '\u{1ace}'),
+    ('\u{1b00}', 'ᭌ'),
+    ('᭐', '᭙'),
+    ('\u{1b6b}', '\u{1b73}'),
+    ('\u{1b80}', '᯳'),
+    ('ᰀ', '\u{1c37}'),
+    ('᱀', '᱉'),
+    ('ᱍ', 'ᱽ'),
+    ('ᲀ', 'ᲈ'),
+    ('Ა', 'Ჺ'),
+    ('Ჽ', 'Ჿ'),
+    ('\u{1cd0}', '\u{1cd2}'),
+    ('\u{1cd4}', 'ᳺ'),
+    ('ᴀ', 'ἕ'),
+    ('Ἐ', 'Ἕ'),
+    ('ἠ', 'ὅ'),
+    ('Ὀ', 'Ὅ'),
+    ('ὐ', 'ὗ'),
+    ('Ὑ', 'Ὑ'),
+    ('Ὓ', 'Ὓ'),
+    ('Ὕ', 'Ὕ'),
+    ('Ὗ', 'ώ'),
+    ('ᾀ', 'ᾴ'),
+    ('ᾶ', 'ᾼ'),
+    ('ι', 'ι'),
+    ('ῂ', 'ῄ'),
+    ('ῆ', 'ῌ'),
+    ('ῐ', 'ΐ'),
+    ('ῖ', 'Ί'),
+    ('ῠ', 'Ῥ'),
+    ('ῲ', 'ῴ'),
+    ('ῶ', 'ῼ'),
+    ('\u{200c}', '\u{200d}'),
+    ('‿', '⁀'),
+    ('⁔', '⁔'),
+    ('ⁱ', 'ⁱ'),
+    ('ⁿ', 'ⁿ'),
+    ('ₐ', 'ₜ'),
+    ('\u{20d0}', '\u{20f0}'),
+    ('ℂ', 'ℂ'),
+    ('ℇ', 'ℇ'),
+    ('ℊ', 'ℓ'),
+    ('ℕ', 'ℕ'),
+    ('ℙ', 'ℝ'),
+    ('ℤ', 'ℤ'),
+    ('Ω', 'Ω'),
+    ('ℨ', 'ℨ'),
+    ('K', 'ℭ'),
+    ('ℯ', 'ℹ'),
+    ('ℼ', 'ℿ'),
+    ('ⅅ', 'ⅉ'),
+    ('ⅎ', 'ⅎ'),
+    ('Ⅰ', 'ↈ'),
+    ('Ⓐ', 'ⓩ'),
+    ('Ⰰ', 'ⳤ'),
+    ('Ⳬ', 'ⳳ'),
+    ('ⴀ', 'ⴥ'),
+    ('ⴧ', 'ⴧ'),
+    ('ⴭ', 'ⴭ'),
+    ('ⴰ', 'ⵧ'),
+    ('ⵯ', 'ⵯ'),
+    ('\u{2d7f}', 'ⶖ'),
+    ('ⶠ', 'ⶦ'),
+    ('ⶨ', 'ⶮ'),
+    ('ⶰ', 'ⶶ'),
+    ('ⶸ', 'ⶾ'),
+    ('ⷀ', 'ⷆ'),
+    ('ⷈ', 'ⷎ'),
+    ('ⷐ', 'ⷖ'),
+    ('ⷘ', 'ⷞ'),
+    ('\u{2de0}', '\u{2dff}'),
+    ('ⸯ', 'ⸯ'),
+    ('々', '〇'),
+    ('〡', '\u{302f}'),
+    ('〱', '〵'),
+    ('〸', '〼'),
+    ('ぁ', 'ゖ'),
+    ('\u{3099}', '\u{309a}'),
+    ('ゝ', 'ゟ'),
+    ('ァ', 'ヺ'),
+    ('ー', 'ヿ'),
+    ('ㄅ', 'ㄯ'),
+    ('ㄱ', 'ㆎ'),
+    ('ㆠ', 'ㆿ'),
+    ('ㇰ', 'ㇿ'),
+    ('㐀', '䶿'),
+    ('一', 'ꒌ'),
+    ('ꓐ', 'ꓽ'),
+    ('ꔀ', 'ꘌ'),
+    ('ꘐ', 'ꘫ'),
+    ('Ꙁ', '\u{a672}'),
+    ('\u{a674}', '\u{a67d}'),
+    ('ꙿ', '\u{a6f1}'),
+    ('ꜗ', 'ꜟ'),
+    ('Ꜣ', 'ꞈ'),
+    ('Ꞌ', 'ꟊ'),
+    ('Ꟑ', 'ꟑ'),
+    ('ꟓ', 'ꟓ'),
+    ('ꟕ', 'ꟙ'),
+    ('ꟲ', 'ꠧ'),
+    ('\u{a82c}', '\u{a82c}'),
+    ('ꡀ', 'ꡳ'),
+    ('ꢀ', '\u{a8c5}'),
+    ('꣐', '꣙'),
+    ('\u{a8e0}', 'ꣷ'),
+    ('ꣻ', 'ꣻ'),
+    ('ꣽ', '\u{a92d}'),
+    ('ꤰ', '꥓'),
+    ('ꥠ', 'ꥼ'),
+    ('\u{a980}', '꧀'),
+    ('ꧏ', '꧙'),
+    ('ꧠ', 'ꧾ'),
+    ('ꨀ', '\u{aa36}'),
+    ('ꩀ', 'ꩍ'),
+    ('꩐', '꩙'),
+    ('ꩠ', 'ꩶ'),
+    ('ꩺ', 'ꫂ'),
+    ('ꫛ', 'ꫝ'),
+    ('ꫠ', 'ꫯ'),
+    ('ꫲ', '\u{aaf6}'),
+    ('ꬁ', 'ꬆ'),
+    ('ꬉ', 'ꬎ'),
+    ('ꬑ', 'ꬖ'),
+    ('ꬠ', 'ꬦ'),
+    ('ꬨ', 'ꬮ'),
+    ('ꬰ', 'ꭚ'),
+    ('ꭜ', 'ꭩ'),
+    ('ꭰ', 'ꯪ'),
+    ('꯬', '\u{abed}'),
+    ('꯰', '꯹'),
+    ('가', '힣'),
+    ('ힰ', 'ퟆ'),
+    ('ퟋ', 'ퟻ'),
+    ('豈', '舘'),
+    ('並', '龎'),
+    ('ﬀ', 'ﬆ'),
+    ('ﬓ', 'ﬗ'),
+    ('יִ', 'ﬨ'),
+    ('שׁ', 'זּ'),
+    ('טּ', 'לּ'),
+    ('מּ', 'מּ'),
+    ('נּ', 'סּ'),
+    ('ףּ', 'פּ'),
+    ('צּ', 'ﮱ'),
+    ('ﯓ', 'ﴽ'),
+    ('ﵐ', 'ﶏ'),
+    ('ﶒ', 'ﷇ'),
+    ('ﷰ', 'ﷻ'),
+    ('\u{fe00}', '\u{fe0f}'),
+    ('\u{fe20}', '\u{fe2f}'),
+    ('︳', '︴'),
+    ('﹍', '﹏'),
+    ('ﹰ', 'ﹴ'),
+    ('ﹶ', 'ﻼ'),
+    ('０', '９'),
+    ('Ａ', 'Ｚ'),
+    ('＿', '＿'),
+    ('ａ', 'ｚ'),
+    ('ｦ', 'ﾾ'),
+    ('ￂ', 'ￇ'),
+    ('ￊ', 'ￏ'),
+    ('ￒ', 'ￗ'),
+    ('ￚ', 'ￜ'),
+    ('𐀀', '𐀋'),
+    ('𐀍', '𐀦'),
+    ('𐀨', '𐀺'),
+    ('𐀼', '𐀽'),
+    ('𐀿', '𐁍'),
+    ('𐁐', '𐁝'),
+    ('𐂀', '𐃺'),
+    ('𐅀', '𐅴'),
+    ('\u{101fd}', '\u{101fd}'),
+    ('𐊀', '𐊜'),
+    ('𐊠', '𐋐'),
+    ('\u{102e0}', '\u{102e0}'),
+    ('𐌀', '𐌟'),
+    ('𐌭', '𐍊'),
+    ('𐍐', '\u{1037a}'),
+    ('𐎀', '𐎝'),
+    ('𐎠', '𐏃'),
+    ('𐏈', '𐏏'),
+    ('𐏑', '𐏕'),
+    ('𐐀', '𐒝'),
+    ('𐒠', '𐒩'),
+    ('𐒰', '𐓓'),
+    ('𐓘', '𐓻'),
+    ('𐔀', '𐔧'),
+    ('𐔰', '𐕣'),
+    ('𐕰', '𐕺'),
+    ('𐕼', '𐖊'),
+    ('𐖌', '𐖒'),
+    ('𐖔', '𐖕'),
+    ('𐖗', '𐖡'),
+    ('𐖣', '𐖱'),
+    ('𐖳', '𐖹'),
+    ('𐖻', '𐖼'),
+    ('𐘀', '𐜶'),
+    ('𐝀', '𐝕'),
+    ('𐝠', '𐝧'),
+    ('𐞀', '𐞅'),
+    ('𐞇', '𐞰'),
+    ('𐞲', '𐞺'),
+    ('𐠀', '𐠅'),
+    ('𐠈', '𐠈'),
+    ('𐠊', '𐠵'),
+    ('𐠷', '𐠸'),
+    ('𐠼', '𐠼'),
+    ('𐠿', '𐡕'),
+    ('𐡠', '𐡶'),
+    ('𐢀', '𐢞'),
+    ('𐣠', '𐣲'),
+    ('𐣴', '𐣵'),
+    ('𐤀', '𐤕'),
+    ('𐤠', '𐤹'),
+    ('𐦀', '𐦷'),
+    ('𐦾', '𐦿'),
+    ('𐨀', '\u{10a03}'),
+    ('\u{10a05}', '\u{10a06}'),
+    ('\u{10a0c}', '𐨓'),
+    ('𐨕', '𐨗'),
+    ('𐨙', '𐨵'),
+    ('\u{10a38}', '\u{10a3a}'),
+    ('\u{10a3f}', '\u{10a3f}'),
+    ('𐩠', '𐩼'),
+    ('𐪀', '𐪜'),
+    ('𐫀', '𐫇'),
+    ('𐫉', '\u{10ae6}'),
+    ('𐬀', '𐬵'),
+    ('𐭀', '𐭕'),
+    ('𐭠', '𐭲'),
+    ('𐮀', '𐮑'),
+    ('𐰀', '𐱈'),
+    ('𐲀', '𐲲'),
+    ('𐳀', '𐳲'),
+    ('𐴀', '\u{10d27}'),
+    ('𐴰', '𐴹'),
+    ('𐺀', '𐺩'),
+    ('\u{10eab}', '\u{10eac}'),
+    ('𐺰', '𐺱'),
+    ('\u{10efd}', '𐼜'),
+    ('𐼧', '𐼧'),
+    ('𐼰', '\u{10f50}'),
+    ('𐽰', '\u{10f85}'),
+    ('𐾰', '𐿄'),
+    ('𐿠', '𐿶'),
+    ('𑀀', '\u{11046}'),
+    ('𑁦', '𑁵'),
+    ('\u{1107f}', '\u{110ba}'),
+    ('\u{110c2}', '\u{110c2}'),
+    ('𑃐', '𑃨'),
+    ('𑃰', '𑃹'),
+    ('\u{11100}', '\u{11134}'),
+    ('𑄶', '𑄿'),
+    ('𑅄', '𑅇'),
+    ('𑅐', '\u{11173}'),
+    ('𑅶', '𑅶'),
+    ('\u{11180}', '𑇄'),
+    ('\u{111c9}', '\u{111cc}'),
+    ('𑇎', '𑇚'),
+    ('𑇜', '𑇜'),
+    ('𑈀', '𑈑'),
+    ('𑈓', '\u{11237}'),
+    ('\u{1123e}', '\u{11241}'),
+    ('𑊀', '𑊆'),
+    ('𑊈', '𑊈'),
+    ('𑊊', '𑊍'),
+    ('𑊏', '𑊝'),
+    ('𑊟', '𑊨'),
+    ('𑊰', '\u{112ea}'),
+    ('𑋰', '𑋹'),
+    ('\u{11300}', '𑌃'),
+    ('𑌅', '𑌌'),
+    ('𑌏', '𑌐'),
+    ('𑌓', '𑌨'),
+    ('𑌪', '𑌰'),
+    ('𑌲', '𑌳'),
+    ('𑌵', '𑌹'),
+    ('\u{1133b}', '𑍄'),
+    ('𑍇', '𑍈'),
+    ('𑍋', '𑍍'),
+    ('𑍐', '𑍐'),
+    ('\u{11357}', '\u{11357}'),
+    ('𑍝', '𑍣'),
+    ('\u{11366}', '\u{1136c}'),
+    ('\u{11370}', '\u{11374}'),
+    ('𑐀', '𑑊'),
+    ('𑑐', '𑑙'),
+    ('\u{1145e}', '𑑡'),
+    ('𑒀', '𑓅'),
+    ('𑓇', '𑓇'),
+    ('𑓐', '𑓙'),
+    ('𑖀', '\u{115b5}'),
+    ('𑖸', '\u{115c0}'),
+    ('𑗘', '\u{115dd}'),
+    ('𑘀', '\u{11640}'),
+    ('𑙄', '𑙄'),
+    ('𑙐', '𑙙'),
+    ('𑚀', '𑚸'),
+    ('𑛀', '𑛉'),
+    ('𑜀', '𑜚'),
+    ('\u{1171d}', '\u{1172b}'),
+    ('𑜰', '𑜹'),
+    ('𑝀', '𑝆'),
+    ('𑠀', '\u{1183a}'),
+    ('𑢠', '𑣩'),
+    ('𑣿', '𑤆'),
+    ('𑤉', '𑤉'),
+    ('𑤌', '𑤓'),
+    ('𑤕', '𑤖'),
+    ('𑤘', '𑤵'),
+    ('𑤷', '𑤸'),
+    ('\u{1193b}', '\u{11943}'),
+    ('𑥐', '𑥙'),
+    ('𑦠', '𑦧'),
+    ('𑦪', '\u{119d7}'),
+    ('\u{119da}', '𑧡'),
+    ('𑧣', '𑧤'),
+    ('𑨀', '\u{11a3e}'),
+    ('\u{11a47}', '\u{11a47}'),
+    ('𑩐', '\u{11a99}'),
+    ('𑪝', '𑪝'),
+    ('𑪰', '𑫸'),
+    ('𑰀', '𑰈'),
+    ('𑰊', '\u{11c36}'),
+    ('\u{11c38}', '𑱀'),
+    ('𑱐', '𑱙'),
+    ('𑱲', '𑲏'),
+    ('\u{11c92}', '\u{11ca7}'),
+    ('𑲩', '\u{11cb6}'),
+    ('𑴀', '𑴆'),
+    ('𑴈', '𑴉'),
+    ('𑴋', '\u{11d36}'),
+    ('\u{11d3a}', '\u{11d3a}'),
+    ('\u{11d3c}', '\u{11d3d}'),
+    ('\u{11d3f}', '\u{11d47}'),
+    ('𑵐', '𑵙'),
+    ('𑵠', '𑵥'),
+    ('𑵧', '𑵨'),
+    ('𑵪', '𑶎'),
+    ('\u{11d90}', '\u{11d91}'),
+    ('𑶓', '𑶘'),
+    ('𑶠', '𑶩'),
+    ('𑻠', '𑻶'),
+    ('\u{11f00}', '𑼐'),
+    ('𑼒', '\u{11f3a}'),
+    ('𑼾', '\u{11f42}'),
+    ('𑽐', '𑽙'),
+    ('𑾰', '𑾰'),
+    ('𒀀', '𒎙'),
+    ('𒐀', '𒑮'),
+    ('𒒀', '𒕃'),
+    ('𒾐', '𒿰'),
+    ('𓀀', '𓐯'),
+    ('\u{13440}', '\u{13455}'),
+    ('𔐀', '𔙆'),
+    ('𖠀', '𖨸'),
+    ('𖩀', '𖩞'),
+    ('𖩠', '𖩩'),
+    ('𖩰', '𖪾'),
+    ('𖫀', '𖫉'),
+    ('𖫐', '𖫭'),
+    ('\u{16af0}', '\u{16af4}'),
+    ('𖬀', '\u{16b36}'),
+    ('𖭀', '𖭃'),
+    ('𖭐', '𖭙'),
+    ('𖭣', '𖭷'),
+    ('𖭽', '𖮏'),
+    ('𖹀', '𖹿'),
+    ('𖼀', '𖽊'),
+    ('\u{16f4f}', '𖾇'),
+    ('\u{16f8f}', '𖾟'),
+    ('𖿠', '𖿡'),
+    ('𖿣', '\u{16fe4}'),
+    ('𖿰', '𖿱'),
+    ('𗀀', '𘟷'),
+    ('𘠀', '𘳕'),
+    ('𘴀', '𘴈'),
+    ('𚿰', '𚿳'),
+    ('𚿵', '𚿻'),
+    ('𚿽', '𚿾'),
+    ('𛀀', '𛄢'),
+    ('𛄲', '𛄲'),
+    ('𛅐', '𛅒'),
+    ('𛅕', '𛅕'),
+    ('𛅤', '𛅧'),
+    ('𛅰', '𛋻'),
+    ('𛰀', '𛱪'),
+    ('𛱰', '𛱼'),
+    ('𛲀', '𛲈'),
+    ('𛲐', '𛲙'),
+    ('\u{1bc9d}', '\u{1bc9e}'),
+    ('\u{1cf00}', '\u{1cf2d}'),
+    ('\u{1cf30}', '\u{1cf46}'),
+    ('\u{1d165}', '\u{1d169}'),
+    ('𝅭', '\u{1d172}'),
+    ('\u{1d17b}', '\u{1d182}'),
+    ('\u{1d185}', '\u{1d18b}'),
+    ('\u{1d1aa}', '\u{1d1ad}'),
+    ('\u{1d242}', '\u{1d244}'),
+    ('𝐀', '𝑔'),
+    ('𝑖', '𝒜'),
+    ('𝒞', '𝒟'),
+    ('𝒢', '𝒢'),
+    ('𝒥', '𝒦'),
+    ('𝒩', '𝒬'),
+    ('𝒮', '𝒹'),
+    ('𝒻', '𝒻'),
+    ('𝒽', '𝓃'),
+    ('𝓅', '𝔅'),
+    ('𝔇', '𝔊'),
+    ('𝔍', '𝔔'),
+    ('𝔖', '𝔜'),
+    ('𝔞', '𝔹'),
+    ('𝔻', '𝔾'),
+    ('𝕀', '𝕄'),
+    ('𝕆', '𝕆'),
+    ('𝕊', '𝕐'),
+    ('𝕒', '𝚥'),
+    ('𝚨', '𝛀'),
+    ('𝛂', '𝛚'),
+    ('𝛜', '𝛺'),
+    ('𝛼', '𝜔'),
+    ('𝜖', '𝜴'),
+    ('𝜶', '𝝎'),
+    ('𝝐', '𝝮'),
+    ('𝝰', '𝞈'),
+    ('𝞊', '𝞨'),
+    ('𝞪', '𝟂'),
+    ('𝟄', '𝟋'),
+    ('𝟎', '𝟿'),
+    ('\u{1da00}', '\u{1da36}'),
+    ('\u{1da3b}', '\u{1da6c}'),
+    ('\u{1da75}', '\u{1da75}'),
+    ('\u{1da84}', '\u{1da84}'),
+    ('\u{1da9b}', '\u{1da9f}'),
+    ('\u{1daa1}', '\u{1daaf}'),
+    ('𝼀', '𝼞'),
+    ('𝼥', '𝼪'),
+    ('\u{1e000}', '\u{1e006}'),
+    ('\u{1e008}', '\u{1e018}'),
+    ('\u{1e01b}', '\u{1e021}'),
+    ('\u{1e023}', '\u{1e024}'),
+    ('\u{1e026}', '\u{1e02a}'),
+    ('𞀰', '𞁭'),
+    ('\u{1e08f}', '\u{1e08f}'),
+    ('𞄀', '𞄬'),
+    ('\u{1e130}', '𞄽'),
+    ('𞅀', '𞅉'),
+    ('𞅎', '𞅎'),
+    ('𞊐', '\u{1e2ae}'),
+    ('𞋀', '𞋹'),
+    ('𞓐', '𞓹'),
+    ('𞟠', '𞟦'),
+    ('𞟨', '𞟫'),
+    ('𞟭', '𞟮'),
+    ('𞟰', '𞟾'),
+    ('𞠀', '𞣄'),
+    ('\u{1e8d0}', '\u{1e8d6}'),
+    ('𞤀', '𞥋'),
+    ('𞥐', '𞥙'),
+    ('𞸀', '𞸃'),
+    ('𞸅', '𞸟'),
+    ('𞸡', '𞸢'),
+    ('𞸤', '𞸤'),
+    ('𞸧', '𞸧'),
+    ('𞸩', '𞸲'),
+    ('𞸴', '𞸷'),
+    ('𞸹', '𞸹'),
+    ('𞸻', '𞸻'),
+    ('𞹂', '𞹂'),
+    ('𞹇', '𞹇'),
+    ('𞹉', '𞹉'),
+    ('𞹋', '𞹋'),
+    ('𞹍', '𞹏'),
+    ('𞹑', '𞹒'),
+    ('𞹔', '𞹔'),
+    ('𞹗', '𞹗'),
+    ('𞹙', '𞹙'),
+    ('𞹛', '𞹛'),
+    ('𞹝', '𞹝'),
+    ('𞹟', '𞹟'),
+    ('𞹡', '𞹢'),
+    ('𞹤', '𞹤'),
+    ('𞹧', '𞹪'),
+    ('𞹬', '𞹲'),
+    ('𞹴', '𞹷'),
+    ('𞹹', '𞹼'),
+    ('𞹾', '𞹾'),
+    ('𞺀', '𞺉'),
+    ('𞺋', '𞺛'),
+    ('𞺡', '𞺣'),
+    ('𞺥', '𞺩'),
+    ('𞺫', '𞺻'),
+    ('🄰', '🅉'),
+    ('🅐', '🅩'),
+    ('🅰', '🆉'),
+    ('🯰', '🯹'),
+    ('𠀀', '𪛟'),
+    ('𪜀', '𫜹'),
+    ('𫝀', '𫠝'),
+    ('𫠠', '𬺡'),
+    ('𬺰', '𮯠'),
+    ('丽', '𪘀'),
+    ('𰀀', '𱍊'),
+    ('𱍐', '𲎯'),
+    ('\u{e0100}', '\u{e01ef}'),
+];
diff --git a/vendor/regex-automata/src/util/utf8.rs b/vendor/regex-automata/src/util/utf8.rs
new file mode 100644
index 0000000..91b27ef
--- /dev/null
+++ b/vendor/regex-automata/src/util/utf8.rs
@@ -0,0 +1,196 @@
+/*!
+Utilities for dealing with UTF-8.
+
+This module provides some UTF-8 related helper routines, including an
+incremental decoder.
+*/
+
+/// Returns true if and only if the given byte is considered a word character.
+/// This only applies to ASCII.
+///
+/// This was copied from regex-syntax so that we can use it to determine the
+/// starting DFA state while searching without depending on regex-syntax. The
+/// definition is never going to change, so there's no maintenance/bit-rot
+/// hazard here.
+#[cfg_attr(feature = "perf-inline", inline(always))]
+pub(crate) fn is_word_byte(b: u8) -> bool {
+    const fn mkwordset() -> [bool; 256] {
+        // FIXME: Use as_usize() once const functions in traits are stable.
+        let mut set = [false; 256];
+        set[b'_' as usize] = true;
+
+        let mut byte = b'0';
+        while byte <= b'9' {
+            set[byte as usize] = true;
+            byte += 1;
+        }
+        byte = b'A';
+        while byte <= b'Z' {
+            set[byte as usize] = true;
+            byte += 1;
+        }
+        byte = b'a';
+        while byte <= b'z' {
+            set[byte as usize] = true;
+            byte += 1;
+        }
+        set
+    }
+    const WORD: [bool; 256] = mkwordset();
+    WORD[b as usize]
+}
+
+/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
+///
+/// If no valid encoding of a codepoint exists at the beginning of the given
+/// byte slice, then the first byte is returned instead.
+///
+/// This returns `None` if and only if `bytes` is empty.
+///
+/// This never panics.
+///
+/// *WARNING*: This is not designed for performance. If you're looking for a
+/// fast UTF-8 decoder, this is not it. If you feel like you need one in this
+/// crate, then please file an issue and discuss your use case.
+#[cfg_attr(feature = "perf-inline", inline(always))]
+pub(crate) fn decode(bytes: &[u8]) -> Option<Result<char, u8>> {
+    if bytes.is_empty() {
+        return None;
+    }
+    let len = match len(bytes[0]) {
+        None => return Some(Err(bytes[0])),
+        Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
+        Some(1) => return Some(Ok(char::from(bytes[0]))),
+        Some(len) => len,
+    };
+    match core::str::from_utf8(&bytes[..len]) {
+        Ok(s) => Some(Ok(s.chars().next().unwrap())),
+        Err(_) => Some(Err(bytes[0])),
+    }
+}
+
+/// Decodes the last UTF-8 encoded codepoint from the given byte slice.
+///
+/// If no valid encoding of a codepoint exists at the end of the given byte
+/// slice, then the last byte is returned instead.
+///
+/// This returns `None` if and only if `bytes` is empty.
+#[cfg_attr(feature = "perf-inline", inline(always))]
+pub(crate) fn decode_last(bytes: &[u8]) -> Option<Result<char, u8>> {
+    if bytes.is_empty() {
+        return None;
+    }
+    let mut start = bytes.len() - 1;
+    let limit = bytes.len().saturating_sub(4);
+    while start > limit && !is_leading_or_invalid_byte(bytes[start]) {
+        start -= 1;
+    }
+    match decode(&bytes[start..]) {
+        None => None,
+        Some(Ok(ch)) => Some(Ok(ch)),
+        Some(Err(_)) => Some(Err(bytes[bytes.len() - 1])),
+    }
+}
+
+/// Given a UTF-8 leading byte, this returns the total number of code units
+/// in the following encoded codepoint.
+///
+/// If the given byte is not a valid UTF-8 leading byte, then this returns
+/// `None`.
+#[cfg_attr(feature = "perf-inline", inline(always))]
+fn len(byte: u8) -> Option<usize> {
+    if byte <= 0x7F {
+        return Some(1);
+    } else if byte & 0b1100_0000 == 0b1000_0000 {
+        return None;
+    } else if byte <= 0b1101_1111 {
+        Some(2)
+    } else if byte <= 0b1110_1111 {
+        Some(3)
+    } else if byte <= 0b1111_0111 {
+        Some(4)
+    } else {
+        None
+    }
+}
+
+/// Returns true if and only if the given offset in the given bytes falls on a
+/// valid UTF-8 encoded codepoint boundary.
+///
+/// If `bytes` is not valid UTF-8, then the behavior of this routine is
+/// unspecified.
+#[cfg_attr(feature = "perf-inline", inline(always))]
+pub(crate) fn is_boundary(bytes: &[u8], i: usize) -> bool {
+    match bytes.get(i) {
+        // The position at the end of the bytes always represents an empty
+        // string, which is a valid boundary. But anything after that doesn't
+        // make much sense to call valid a boundary.
+        None => i == bytes.len(),
+        // Other than ASCII (where the most significant bit is never set),
+        // valid starting bytes always have their most significant two bits
+        // set, where as continuation bytes never have their second most
+        // significant bit set. Therefore, this only returns true when bytes[i]
+        // corresponds to a byte that begins a valid UTF-8 encoding of a
+        // Unicode scalar value.
+        Some(&b) => b <= 0b0111_1111 || b >= 0b1100_0000,
+    }
+}
+
+/// Returns true if and only if the given byte is either a valid leading UTF-8
+/// byte, or is otherwise an invalid byte that can never appear anywhere in a
+/// valid UTF-8 sequence.
+#[cfg_attr(feature = "perf-inline", inline(always))]
+fn is_leading_or_invalid_byte(b: u8) -> bool {
+    // In the ASCII case, the most significant bit is never set. The leading
+    // byte of a 2/3/4-byte sequence always has the top two most significant
+    // bits set. For bytes that can never appear anywhere in valid UTF-8, this
+    // also returns true, since every such byte has its two most significant
+    // bits set:
+    //
+    //     \xC0 :: 11000000
+    //     \xC1 :: 11000001
+    //     \xF5 :: 11110101
+    //     \xF6 :: 11110110
+    //     \xF7 :: 11110111
+    //     \xF8 :: 11111000
+    //     \xF9 :: 11111001
+    //     \xFA :: 11111010
+    //     \xFB :: 11111011
+    //     \xFC :: 11111100
+    //     \xFD :: 11111101
+    //     \xFE :: 11111110
+    //     \xFF :: 11111111
+    (b & 0b1100_0000) != 0b1000_0000
+}
+
+/*
+/// Returns the smallest possible index of the next valid UTF-8 sequence
+/// starting after `i`.
+///
+/// For all inputs, including invalid UTF-8 and any value of `i`, the return
+/// value is guaranteed to be greater than `i`. (If there is no value greater
+/// than `i` that fits in `usize`, then this panics.)
+///
+/// Generally speaking, this should only be called on `text` when it is
+/// permitted to assume that it is valid UTF-8 and where either `i >=
+/// text.len()` or where `text[i]` is a leading byte of a UTF-8 sequence.
+///
+/// NOTE: This method was used in a previous conception of iterators where we
+/// specifically tried to skip over empty matches that split a codepoint by
+/// simply requiring that our next search begin at the beginning of codepoint.
+/// But we ended up changing that technique to always advance by 1 byte and
+/// then filter out matches that split a codepoint after-the-fact. Thus, we no
+/// longer use this method. But I've kept it around in case we want to switch
+/// back to this approach. Its guarantees are a little subtle, so I'd prefer
+/// not to rebuild it from whole cloth.
+pub(crate) fn next(text: &[u8], i: usize) -> usize {
+    let b = match text.get(i) {
+        None => return i.checked_add(1).unwrap(),
+        Some(&b) => b,
+    };
+    // For cases where we see an invalid UTF-8 byte, there isn't much we can do
+    // other than just start at the next byte.
+    let inc = len(b).unwrap_or(1);
+    i.checked_add(inc).unwrap()
+}
+*/
diff --git a/vendor/regex-automata/src/util/wire.rs b/vendor/regex-automata/src/util/wire.rs
new file mode 100644
index 0000000..ecf4fd8
--- /dev/null
+++ b/vendor/regex-automata/src/util/wire.rs
@@ -0,0 +1,975 @@
+/*!
+Types and routines that support the wire format of finite automata.
+
+Currently, this module just exports a few error types and some small helpers
+for deserializing [dense DFAs](crate::dfa::dense::DFA) using correct alignment.
+*/
+
+/*
+A collection of helper functions, types and traits for serializing automata.
+
+This crate defines its own bespoke serialization mechanism for some structures
+provided in the public API, namely, DFAs. A bespoke mechanism was developed
+primarily because structures like automata demand a specific binary format.
+Attempting to encode their rich structure in an existing serialization
+format is just not feasible. Moreover, the format for each structure is
+generally designed such that deserialization is cheap. More specifically, that
+deserialization can be done in constant time. (The idea being that you can
+embed it into your binary or mmap it, and then use it immediately.)
+
+In order to achieve this, the dense and sparse DFAs in this crate use an
+in-memory representation that very closely corresponds to its binary serialized
+form. This pervades and complicates everything, and in some cases, requires
+dealing with alignment and reasoning about safety.
+
+This technique does have major advantages. In particular, it permits doing
+the potentially costly work of compiling a finite state machine in an offline
+manner, and then loading it at runtime not only without having to re-compile
+the regex, but even without the code required to do the compilation. This, for
+example, permits one to use a pre-compiled DFA not only in environments without
+Rust's standard library, but also in environments without a heap.
+
+In the code below, whenever we insert some kind of padding, it's to enforce a
+4-byte alignment, unless otherwise noted. Namely, u32 is the only state ID type
+supported. (In a previous version of this library, DFAs were generic over the
+state ID representation.)
+
+Also, serialization generally requires the caller to specify endianness,
+where as deserialization always assumes native endianness (otherwise cheap
+deserialization would be impossible). This implies that serializing a structure
+generally requires serializing both its big-endian and little-endian variants,
+and then loading the correct one based on the target's endianness.
+*/
+
+use core::{
+    cmp,
+    convert::{TryFrom, TryInto},
+    mem::size_of,
+};
+
+#[cfg(feature = "alloc")]
+use alloc::{vec, vec::Vec};
+
+use crate::util::{
+    int::Pointer,
+    primitives::{PatternID, PatternIDError, StateID, StateIDError},
+};
+
+/// A hack to align a smaller type `B` with a bigger type `T`.
+///
+/// The usual use of this is with `B = [u8]` and `T = u32`. That is,
+/// it permits aligning a sequence of bytes on a 4-byte boundary. This
+/// is useful in contexts where one wants to embed a serialized [dense
+/// DFA](crate::dfa::dense::DFA) into a Rust a program while guaranteeing the
+/// alignment required for the DFA.
+///
+/// See [`dense::DFA::from_bytes`](crate::dfa::dense::DFA::from_bytes) for an
+/// example of how to use this type.
+#[repr(C)]
+#[derive(Debug)]
+pub struct AlignAs<B: ?Sized, T> {
+    /// A zero-sized field indicating the alignment we want.
+    pub _align: [T; 0],
+    /// A possibly non-sized field containing a sequence of bytes.
+    pub bytes: B,
+}
+
+/// An error that occurs when serializing an object from this crate.
+///
+/// Serialization, as used in this crate, universally refers to the process
+/// of transforming a structure (like a DFA) into a custom binary format
+/// represented by `&[u8]`. To this end, serialization is generally infallible.
+/// However, it can fail when caller provided buffer sizes are too small. When
+/// that occurs, a serialization error is reported.
+///
+/// A `SerializeError` provides no introspection capabilities. Its only
+/// supported operation is conversion to a human readable error message.
+///
+/// This error type implements the `std::error::Error` trait only when the
+/// `std` feature is enabled. Otherwise, this type is defined in all
+/// configurations.
+#[derive(Debug)]
+pub struct SerializeError {
+    /// The name of the thing that a buffer is too small for.
+    ///
+    /// Currently, the only kind of serialization error is one that is
+    /// committed by a caller: providing a destination buffer that is too
+    /// small to fit the serialized object. This makes sense conceptually,
+    /// since every valid inhabitant of a type should be serializable.
+    ///
+    /// This is somewhat exposed in the public API of this crate. For example,
+    /// the `to_bytes_{big,little}_endian` APIs return a `Vec<u8>` and are
+    /// guaranteed to never panic or error. This is only possible because the
+    /// implementation guarantees that it will allocate a `Vec<u8>` that is
+    /// big enough.
+    ///
+    /// In summary, if a new serialization error kind needs to be added, then
+    /// it will need careful consideration.
+    what: &'static str,
+}
+
+impl SerializeError {
+    pub(crate) fn buffer_too_small(what: &'static str) -> SerializeError {
+        SerializeError { what }
+    }
+}
+
+impl core::fmt::Display for SerializeError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "destination buffer is too small to write {}", self.what)
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for SerializeError {}
+
+/// An error that occurs when deserializing an object defined in this crate.
+///
+/// Serialization, as used in this crate, universally refers to the process
+/// of transforming a structure (like a DFA) into a custom binary format
+/// represented by `&[u8]`. Deserialization, then, refers to the process of
+/// cheaply converting this binary format back to the object's in-memory
+/// representation as defined in this crate. To the extent possible,
+/// deserialization will report this error whenever this process fails.
+///
+/// A `DeserializeError` provides no introspection capabilities. Its only
+/// supported operation is conversion to a human readable error message.
+///
+/// This error type implements the `std::error::Error` trait only when the
+/// `std` feature is enabled. Otherwise, this type is defined in all
+/// configurations.
+#[derive(Debug)]
+pub struct DeserializeError(DeserializeErrorKind);
+
+#[derive(Debug)]
+enum DeserializeErrorKind {
+    Generic { msg: &'static str },
+    BufferTooSmall { what: &'static str },
+    InvalidUsize { what: &'static str },
+    VersionMismatch { expected: u32, found: u32 },
+    EndianMismatch { expected: u32, found: u32 },
+    AlignmentMismatch { alignment: usize, address: usize },
+    LabelMismatch { expected: &'static str },
+    ArithmeticOverflow { what: &'static str },
+    PatternID { err: PatternIDError, what: &'static str },
+    StateID { err: StateIDError, what: &'static str },
+}
+
+impl DeserializeError {
+    pub(crate) fn generic(msg: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::Generic { msg })
+    }
+
+    pub(crate) fn buffer_too_small(what: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::BufferTooSmall { what })
+    }
+
+    fn invalid_usize(what: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::InvalidUsize { what })
+    }
+
+    fn version_mismatch(expected: u32, found: u32) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::VersionMismatch {
+            expected,
+            found,
+        })
+    }
+
+    fn endian_mismatch(expected: u32, found: u32) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::EndianMismatch {
+            expected,
+            found,
+        })
+    }
+
+    fn alignment_mismatch(
+        alignment: usize,
+        address: usize,
+    ) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::AlignmentMismatch {
+            alignment,
+            address,
+        })
+    }
+
+    fn label_mismatch(expected: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::LabelMismatch { expected })
+    }
+
+    fn arithmetic_overflow(what: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::ArithmeticOverflow { what })
+    }
+
+    fn pattern_id_error(
+        err: PatternIDError,
+        what: &'static str,
+    ) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::PatternID { err, what })
+    }
+
+    pub(crate) fn state_id_error(
+        err: StateIDError,
+        what: &'static str,
+    ) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::StateID { err, what })
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for DeserializeError {}
+
+impl core::fmt::Display for DeserializeError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        use self::DeserializeErrorKind::*;
+
+        match self.0 {
+            Generic { msg } => write!(f, "{}", msg),
+            BufferTooSmall { what } => {
+                write!(f, "buffer is too small to read {}", what)
+            }
+            InvalidUsize { what } => {
+                write!(f, "{} is too big to fit in a usize", what)
+            }
+            VersionMismatch { expected, found } => write!(
+                f,
+                "unsupported version: \
+                 expected version {} but found version {}",
+                expected, found,
+            ),
+            EndianMismatch { expected, found } => write!(
+                f,
+                "endianness mismatch: expected 0x{:X} but got 0x{:X}. \
+                 (Are you trying to load an object serialized with a \
+                 different endianness?)",
+                expected, found,
+            ),
+            AlignmentMismatch { alignment, address } => write!(
+                f,
+                "alignment mismatch: slice starts at address \
+                 0x{:X}, which is not aligned to a {} byte boundary",
+                address, alignment,
+            ),
+            LabelMismatch { expected } => write!(
+                f,
+                "label mismatch: start of serialized object should \
+                 contain a NUL terminated {:?} label, but a different \
+                 label was found",
+                expected,
+            ),
+            ArithmeticOverflow { what } => {
+                write!(f, "arithmetic overflow for {}", what)
+            }
+            PatternID { ref err, what } => {
+                write!(f, "failed to read pattern ID for {}: {}", what, err)
+            }
+            StateID { ref err, what } => {
+                write!(f, "failed to read state ID for {}: {}", what, err)
+            }
+        }
+    }
+}
+
+/// Safely converts a `&[u32]` to `&[StateID]` with zero cost.
+#[cfg_attr(feature = "perf-inline", inline(always))]
+pub(crate) fn u32s_to_state_ids(slice: &[u32]) -> &[StateID] {
+    // SAFETY: This is safe because StateID is defined to have the same memory
+    // representation as a u32 (it is repr(transparent)). While not every u32
+    // is a "valid" StateID, callers are not permitted to rely on the validity
+    // of StateIDs for memory safety. It can only lead to logical errors. (This
+    // is why StateID::new_unchecked is safe.)
+    unsafe {
+        core::slice::from_raw_parts(
+            slice.as_ptr().cast::<StateID>(),
+            slice.len(),
+        )
+    }
+}
+
+/// Safely converts a `&mut [u32]` to `&mut [StateID]` with zero cost.
+pub(crate) fn u32s_to_state_ids_mut(slice: &mut [u32]) -> &mut [StateID] {
+    // SAFETY: This is safe because StateID is defined to have the same memory
+    // representation as a u32 (it is repr(transparent)). While not every u32
+    // is a "valid" StateID, callers are not permitted to rely on the validity
+    // of StateIDs for memory safety. It can only lead to logical errors. (This
+    // is why StateID::new_unchecked is safe.)
+    unsafe {
+        core::slice::from_raw_parts_mut(
+            slice.as_mut_ptr().cast::<StateID>(),
+            slice.len(),
+        )
+    }
+}
+
+/// Safely converts a `&[u32]` to `&[PatternID]` with zero cost.
+#[cfg_attr(feature = "perf-inline", inline(always))]
+pub(crate) fn u32s_to_pattern_ids(slice: &[u32]) -> &[PatternID] {
+    // SAFETY: This is safe because PatternID is defined to have the same
+    // memory representation as a u32 (it is repr(transparent)). While not
+    // every u32 is a "valid" PatternID, callers are not permitted to rely
+    // on the validity of PatternIDs for memory safety. It can only lead to
+    // logical errors. (This is why PatternID::new_unchecked is safe.)
+    unsafe {
+        core::slice::from_raw_parts(
+            slice.as_ptr().cast::<PatternID>(),
+            slice.len(),
+        )
+    }
+}
+
+/// Checks that the given slice has an alignment that matches `T`.
+///
+/// This is useful for checking that a slice has an appropriate alignment
+/// before casting it to a &[T]. Note though that alignment is not itself
+/// sufficient to perform the cast for any `T`.
+pub(crate) fn check_alignment<T>(
+    slice: &[u8],
+) -> Result<(), DeserializeError> {
+    let alignment = core::mem::align_of::<T>();
+    let address = slice.as_ptr().as_usize();
+    if address % alignment == 0 {
+        return Ok(());
+    }
+    Err(DeserializeError::alignment_mismatch(alignment, address))
+}
+
+/// Reads a possibly empty amount of padding, up to 7 bytes, from the beginning
+/// of the given slice. All padding bytes must be NUL bytes.
+///
+/// This is useful because it can be theoretically necessary to pad the
+/// beginning of a serialized object with NUL bytes to ensure that it starts
+/// at a correctly aligned address. These padding bytes should come immediately
+/// before the label.
+///
+/// This returns the number of bytes read from the given slice.
+pub(crate) fn skip_initial_padding(slice: &[u8]) -> usize {
+    let mut nread = 0;
+    while nread < 7 && nread < slice.len() && slice[nread] == 0 {
+        nread += 1;
+    }
+    nread
+}
+
+/// Allocate a byte buffer of the given size, along with some initial padding
+/// such that `buf[padding..]` has the same alignment as `T`, where the
+/// alignment of `T` must be at most `8`. In particular, callers should treat
+/// the first N bytes (second return value) as padding bytes that must not be
+/// overwritten. In all cases, the following identity holds:
+///
+/// ```ignore
+/// let (buf, padding) = alloc_aligned_buffer::<StateID>(SIZE);
+/// assert_eq!(SIZE, buf[padding..].len());
+/// ```
+///
+/// In practice, padding is often zero.
+///
+/// The requirement for `8` as a maximum here is somewhat arbitrary. In
+/// practice, we never need anything bigger in this crate, and so this function
+/// does some sanity asserts under the assumption of a max alignment of `8`.
+#[cfg(feature = "alloc")]
+pub(crate) fn alloc_aligned_buffer<T>(size: usize) -> (Vec<u8>, usize) {
+    // NOTE: This is a kludge because there's no easy way to allocate a Vec<u8>
+    // with an alignment guaranteed to be greater than 1. We could create a
+    // Vec<u32>, but this cannot be safely transmuted to a Vec<u8> without
+    // concern, since reallocing or dropping the Vec<u8> is UB (different
+    // alignment than the initial allocation). We could define a wrapper type
+    // to manage this for us, but it seems like more machinery than it's worth.
+    let buf = vec![0; size];
+    let align = core::mem::align_of::<T>();
+    let address = buf.as_ptr().as_usize();
+    if address % align == 0 {
+        return (buf, 0);
+    }
+    // Let's try this again. We have to create a totally new alloc with
+    // the maximum amount of bytes we might need. We can't just extend our
+    // pre-existing 'buf' because that might create a new alloc with a
+    // different alignment.
+    let extra = align - 1;
+    let mut buf = vec![0; size + extra];
+    let address = buf.as_ptr().as_usize();
+    // The code below handles the case where 'address' is aligned to T, so if
+    // we got lucky and 'address' is now aligned to T (when it previously
+    // wasn't), then we're done.
+    if address % align == 0 {
+        buf.truncate(size);
+        return (buf, 0);
+    }
+    let padding = ((address & !(align - 1)).checked_add(align).unwrap())
+        .checked_sub(address)
+        .unwrap();
+    assert!(padding <= 7, "padding of {} is bigger than 7", padding);
+    assert!(
+        padding <= extra,
+        "padding of {} is bigger than extra {} bytes",
+        padding,
+        extra
+    );
+    buf.truncate(size + padding);
+    assert_eq!(size + padding, buf.len());
+    assert_eq!(
+        0,
+        buf[padding..].as_ptr().as_usize() % align,
+        "expected end of initial padding to be aligned to {}",
+        align,
+    );
+    (buf, padding)
+}
+
+/// Reads a NUL terminated label starting at the beginning of the given slice.
+///
+/// If a NUL terminated label could not be found, then an error is returned.
+/// Similarly, if a label is found but doesn't match the expected label, then
+/// an error is returned.
+///
+/// Upon success, the total number of bytes read (including padding bytes) is
+/// returned.
+pub(crate) fn read_label(
+    slice: &[u8],
+    expected_label: &'static str,
+) -> Result<usize, DeserializeError> {
+    // Set an upper bound on how many bytes we scan for a NUL. Since no label
+    // in this crate is longer than 256 bytes, if we can't find one within that
+    // range, then we have corrupted data.
+    let first_nul =
+        slice[..cmp::min(slice.len(), 256)].iter().position(|&b| b == 0);
+    let first_nul = match first_nul {
+        Some(first_nul) => first_nul,
+        None => {
+            return Err(DeserializeError::generic(
+                "could not find NUL terminated label \
+                 at start of serialized object",
+            ));
+        }
+    };
+    let len = first_nul + padding_len(first_nul);
+    if slice.len() < len {
+        return Err(DeserializeError::generic(
+            "could not find properly sized label at start of serialized object"
+        ));
+    }
+    if expected_label.as_bytes() != &slice[..first_nul] {
+        return Err(DeserializeError::label_mismatch(expected_label));
+    }
+    Ok(len)
+}
+
+/// Writes the given label to the buffer as a NUL terminated string. The label
+/// given must not contain NUL, otherwise this will panic. Similarly, the label
+/// must not be longer than 255 bytes, otherwise this will panic.
+///
+/// Additional NUL bytes are written as necessary to ensure that the number of
+/// bytes written is always a multiple of 4.
+///
+/// Upon success, the total number of bytes written (including padding) is
+/// returned.
+pub(crate) fn write_label(
+    label: &str,
+    dst: &mut [u8],
+) -> Result<usize, SerializeError> {
+    let nwrite = write_label_len(label);
+    if dst.len() < nwrite {
+        return Err(SerializeError::buffer_too_small("label"));
+    }
+    dst[..label.len()].copy_from_slice(label.as_bytes());
+    for i in 0..(nwrite - label.len()) {
+        dst[label.len() + i] = 0;
+    }
+    assert_eq!(nwrite % 4, 0);
+    Ok(nwrite)
+}
+
+/// Returns the total number of bytes (including padding) that would be written
+/// for the given label. This panics if the given label contains a NUL byte or
+/// is longer than 255 bytes. (The size restriction exists so that searching
+/// for a label during deserialization can be done in small bounded space.)
+pub(crate) fn write_label_len(label: &str) -> usize {
+    if label.len() > 255 {
+        panic!("label must not be longer than 255 bytes");
+    }
+    if label.as_bytes().iter().position(|&b| b == 0).is_some() {
+        panic!("label must not contain NUL bytes");
+    }
+    let label_len = label.len() + 1; // +1 for the NUL terminator
+    label_len + padding_len(label_len)
+}
+
+/// Reads the endianness check from the beginning of the given slice and
+/// confirms that the endianness of the serialized object matches the expected
+/// endianness. If the slice is too small or if the endianness check fails,
+/// this returns an error.
+///
+/// Upon success, the total number of bytes read is returned.
+pub(crate) fn read_endianness_check(
+    slice: &[u8],
+) -> Result<usize, DeserializeError> {
+    let (n, nr) = try_read_u32(slice, "endianness check")?;
+    assert_eq!(nr, write_endianness_check_len());
+    if n != 0xFEFF {
+        return Err(DeserializeError::endian_mismatch(0xFEFF, n));
+    }
+    Ok(nr)
+}
+
+/// Writes 0xFEFF as an integer using the given endianness.
+///
+/// This is useful for writing into the header of a serialized object. It can
+/// be read during deserialization as a sanity check to ensure the proper
+/// endianness is used.
+///
+/// Upon success, the total number of bytes written is returned.
+pub(crate) fn write_endianness_check<E: Endian>(
+    dst: &mut [u8],
+) -> Result<usize, SerializeError> {
+    let nwrite = write_endianness_check_len();
+    if dst.len() < nwrite {
+        return Err(SerializeError::buffer_too_small("endianness check"));
+    }
+    E::write_u32(0xFEFF, dst);
+    Ok(nwrite)
+}
+
+/// Returns the number of bytes written by the endianness check.
+pub(crate) fn write_endianness_check_len() -> usize {
+    size_of::<u32>()
+}
+
+/// Reads a version number from the beginning of the given slice and confirms
+/// that is matches the expected version number given. If the slice is too
+/// small or if the version numbers aren't equivalent, this returns an error.
+///
+/// Upon success, the total number of bytes read is returned.
+///
+/// N.B. Currently, we require that the version number is exactly equivalent.
+/// In the future, if we bump the version number without a semver bump, then
+/// we'll need to relax this a bit and support older versions.
+pub(crate) fn read_version(
+    slice: &[u8],
+    expected_version: u32,
+) -> Result<usize, DeserializeError> {
+    let (n, nr) = try_read_u32(slice, "version")?;
+    assert_eq!(nr, write_version_len());
+    if n != expected_version {
+        return Err(DeserializeError::version_mismatch(expected_version, n));
+    }
+    Ok(nr)
+}
+
+/// Writes the given version number to the beginning of the given slice.
+///
+/// This is useful for writing into the header of a serialized object. It can
+/// be read during deserialization as a sanity check to ensure that the library
+/// code supports the format of the serialized object.
+///
+/// Upon success, the total number of bytes written is returned.
+pub(crate) fn write_version<E: Endian>(
+    version: u32,
+    dst: &mut [u8],
+) -> Result<usize, SerializeError> {
+    let nwrite = write_version_len();
+    if dst.len() < nwrite {
+        return Err(SerializeError::buffer_too_small("version number"));
+    }
+    E::write_u32(version, dst);
+    Ok(nwrite)
+}
+
+/// Returns the number of bytes written by writing the version number.
+pub(crate) fn write_version_len() -> usize {
+    size_of::<u32>()
+}
+
+/// Reads a pattern ID from the given slice. If the slice has insufficient
+/// length, then this panics. If the deserialized integer exceeds the pattern
+/// ID limit for the current target, then this returns an error.
+///
+/// Upon success, this also returns the number of bytes read.
+pub(crate) fn read_pattern_id(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(PatternID, usize), DeserializeError> {
+    let bytes: [u8; PatternID::SIZE] =
+        slice[..PatternID::SIZE].try_into().unwrap();
+    let pid = PatternID::from_ne_bytes(bytes)
+        .map_err(|err| DeserializeError::pattern_id_error(err, what))?;
+    Ok((pid, PatternID::SIZE))
+}
+
+/// Reads a pattern ID from the given slice. If the slice has insufficient
+/// length, then this panics. Otherwise, the deserialized integer is assumed
+/// to be a valid pattern ID.
+///
+/// This also returns the number of bytes read.
+pub(crate) fn read_pattern_id_unchecked(slice: &[u8]) -> (PatternID, usize) {
+    let pid = PatternID::from_ne_bytes_unchecked(
+        slice[..PatternID::SIZE].try_into().unwrap(),
+    );
+    (pid, PatternID::SIZE)
+}
+
+/// Write the given pattern ID to the beginning of the given slice of bytes
+/// using the specified endianness. The given slice must have length at least
+/// `PatternID::SIZE`, or else this panics. Upon success, the total number of
+/// bytes written is returned.
+pub(crate) fn write_pattern_id<E: Endian>(
+    pid: PatternID,
+    dst: &mut [u8],
+) -> usize {
+    E::write_u32(pid.as_u32(), dst);
+    PatternID::SIZE
+}
+
+/// Attempts to read a state ID from the given slice. If the slice has an
+/// insufficient number of bytes or if the state ID exceeds the limit for
+/// the current target, then this returns an error.
+///
+/// Upon success, this also returns the number of bytes read.
+pub(crate) fn try_read_state_id(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(StateID, usize), DeserializeError> {
+    if slice.len() < StateID::SIZE {
+        return Err(DeserializeError::buffer_too_small(what));
+    }
+    read_state_id(slice, what)
+}
+
+/// Reads a state ID from the given slice. If the slice has insufficient
+/// length, then this panics. If the deserialized integer exceeds the state ID
+/// limit for the current target, then this returns an error.
+///
+/// Upon success, this also returns the number of bytes read.
+pub(crate) fn read_state_id(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(StateID, usize), DeserializeError> {
+    let bytes: [u8; StateID::SIZE] =
+        slice[..StateID::SIZE].try_into().unwrap();
+    let sid = StateID::from_ne_bytes(bytes)
+        .map_err(|err| DeserializeError::state_id_error(err, what))?;
+    Ok((sid, StateID::SIZE))
+}
+
+/// Reads a state ID from the given slice. If the slice has insufficient
+/// length, then this panics. Otherwise, the deserialized integer is assumed
+/// to be a valid state ID.
+///
+/// This also returns the number of bytes read.
+pub(crate) fn read_state_id_unchecked(slice: &[u8]) -> (StateID, usize) {
+    let sid = StateID::from_ne_bytes_unchecked(
+        slice[..StateID::SIZE].try_into().unwrap(),
+    );
+    (sid, StateID::SIZE)
+}
+
+/// Write the given state ID to the beginning of the given slice of bytes
+/// using the specified endianness. The given slice must have length at least
+/// `StateID::SIZE`, or else this panics. Upon success, the total number of
+/// bytes written is returned.
+pub(crate) fn write_state_id<E: Endian>(
+    sid: StateID,
+    dst: &mut [u8],
+) -> usize {
+    E::write_u32(sid.as_u32(), dst);
+    StateID::SIZE
+}
+
+/// Try to read a u16 as a usize from the beginning of the given slice in
+/// native endian format. If the slice has fewer than 2 bytes or if the
+/// deserialized number cannot be represented by usize, then this returns an
+/// error. The error message will include the `what` description of what is
+/// being deserialized, for better error messages. `what` should be a noun in
+/// singular form.
+///
+/// Upon success, this also returns the number of bytes read.
+pub(crate) fn try_read_u16_as_usize(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(usize, usize), DeserializeError> {
+    try_read_u16(slice, what).and_then(|(n, nr)| {
+        usize::try_from(n)
+            .map(|n| (n, nr))
+            .map_err(|_| DeserializeError::invalid_usize(what))
+    })
+}
+
+/// Try to read a u32 as a usize from the beginning of the given slice in
+/// native endian format. If the slice has fewer than 4 bytes or if the
+/// deserialized number cannot be represented by usize, then this returns an
+/// error. The error message will include the `what` description of what is
+/// being deserialized, for better error messages. `what` should be a noun in
+/// singular form.
+///
+/// Upon success, this also returns the number of bytes read.
+pub(crate) fn try_read_u32_as_usize(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(usize, usize), DeserializeError> {
+    try_read_u32(slice, what).and_then(|(n, nr)| {
+        usize::try_from(n)
+            .map(|n| (n, nr))
+            .map_err(|_| DeserializeError::invalid_usize(what))
+    })
+}
+
+/// Try to read a u16 from the beginning of the given slice in native endian
+/// format. If the slice has fewer than 2 bytes, then this returns an error.
+/// The error message will include the `what` description of what is being
+/// deserialized, for better error messages. `what` should be a noun in
+/// singular form.
+///
+/// Upon success, this also returns the number of bytes read.
+pub(crate) fn try_read_u16(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(u16, usize), DeserializeError> {
+    check_slice_len(slice, size_of::<u16>(), what)?;
+    Ok((read_u16(slice), size_of::<u16>()))
+}
+
+/// Try to read a u32 from the beginning of the given slice in native endian
+/// format. If the slice has fewer than 4 bytes, then this returns an error.
+/// The error message will include the `what` description of what is being
+/// deserialized, for better error messages. `what` should be a noun in
+/// singular form.
+///
+/// Upon success, this also returns the number of bytes read.
+pub(crate) fn try_read_u32(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(u32, usize), DeserializeError> {
+    check_slice_len(slice, size_of::<u32>(), what)?;
+    Ok((read_u32(slice), size_of::<u32>()))
+}
+
+/// Try to read a u128 from the beginning of the given slice in native endian
+/// format. If the slice has fewer than 16 bytes, then this returns an error.
+/// The error message will include the `what` description of what is being
+/// deserialized, for better error messages. `what` should be a noun in
+/// singular form.
+///
+/// Upon success, this also returns the number of bytes read.
+pub(crate) fn try_read_u128(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(u128, usize), DeserializeError> {
+    check_slice_len(slice, size_of::<u128>(), what)?;
+    Ok((read_u128(slice), size_of::<u128>()))
+}
+
+/// Read a u16 from the beginning of the given slice in native endian format.
+/// If the slice has fewer than 2 bytes, then this panics.
+///
+/// Marked as inline to speed up sparse searching which decodes integers from
+/// its automaton at search time.
+#[cfg_attr(feature = "perf-inline", inline(always))]
+pub(crate) fn read_u16(slice: &[u8]) -> u16 {
+    let bytes: [u8; 2] = slice[..size_of::<u16>()].try_into().unwrap();
+    u16::from_ne_bytes(bytes)
+}
+
+/// Read a u32 from the beginning of the given slice in native endian format.
+/// If the slice has fewer than 4 bytes, then this panics.
+///
+/// Marked as inline to speed up sparse searching which decodes integers from
+/// its automaton at search time.
+#[cfg_attr(feature = "perf-inline", inline(always))]
+pub(crate) fn read_u32(slice: &[u8]) -> u32 {
+    let bytes: [u8; 4] = slice[..size_of::<u32>()].try_into().unwrap();
+    u32::from_ne_bytes(bytes)
+}
+
+/// Read a u128 from the beginning of the given slice in native endian format.
+/// If the slice has fewer than 16 bytes, then this panics.
+pub(crate) fn read_u128(slice: &[u8]) -> u128 {
+    let bytes: [u8; 16] = slice[..size_of::<u128>()].try_into().unwrap();
+    u128::from_ne_bytes(bytes)
+}
+
+/// Checks that the given slice has some minimal length. If it's smaller than
+/// the bound given, then a "buffer too small" error is returned with `what`
+/// describing what the buffer represents.
+pub(crate) fn check_slice_len<T>(
+    slice: &[T],
+    at_least_len: usize,
+    what: &'static str,
+) -> Result<(), DeserializeError> {
+    if slice.len() < at_least_len {
+        return Err(DeserializeError::buffer_too_small(what));
+    }
+    Ok(())
+}
+
+/// Multiply the given numbers, and on overflow, return an error that includes
+/// 'what' in the error message.
+///
+/// This is useful when doing arithmetic with untrusted data.
+pub(crate) fn mul(
+    a: usize,
+    b: usize,
+    what: &'static str,
+) -> Result<usize, DeserializeError> {
+    match a.checked_mul(b) {
+        Some(c) => Ok(c),
+        None => Err(DeserializeError::arithmetic_overflow(what)),
+    }
+}
+
+/// Add the given numbers, and on overflow, return an error that includes
+/// 'what' in the error message.
+///
+/// This is useful when doing arithmetic with untrusted data.
+pub(crate) fn add(
+    a: usize,
+    b: usize,
+    what: &'static str,
+) -> Result<usize, DeserializeError> {
+    match a.checked_add(b) {
+        Some(c) => Ok(c),
+        None => Err(DeserializeError::arithmetic_overflow(what)),
+    }
+}
+
+/// Shift `a` left by `b`, and on overflow, return an error that includes
+/// 'what' in the error message.
+///
+/// This is useful when doing arithmetic with untrusted data.
+pub(crate) fn shl(
+    a: usize,
+    b: usize,
+    what: &'static str,
+) -> Result<usize, DeserializeError> {
+    let amount = u32::try_from(b)
+        .map_err(|_| DeserializeError::arithmetic_overflow(what))?;
+    match a.checked_shl(amount) {
+        Some(c) => Ok(c),
+        None => Err(DeserializeError::arithmetic_overflow(what)),
+    }
+}
+
+/// Returns the number of additional bytes required to add to the given length
+/// in order to make the total length a multiple of 4. The return value is
+/// always less than 4.
+pub(crate) fn padding_len(non_padding_len: usize) -> usize {
+    (4 - (non_padding_len & 0b11)) & 0b11
+}
+
+/// A simple trait for writing code generic over endianness.
+///
+/// This is similar to what byteorder provides, but we only need a very small
+/// subset.
+pub(crate) trait Endian {
+    /// Writes a u16 to the given destination buffer in a particular
+    /// endianness. If the destination buffer has a length smaller than 2, then
+    /// this panics.
+    fn write_u16(n: u16, dst: &mut [u8]);
+
+    /// Writes a u32 to the given destination buffer in a particular
+    /// endianness. If the destination buffer has a length smaller than 4, then
+    /// this panics.
+    fn write_u32(n: u32, dst: &mut [u8]);
+
+    /// Writes a u64 to the given destination buffer in a particular
+    /// endianness. If the destination buffer has a length smaller than 8, then
+    /// this panics.
+    fn write_u64(n: u64, dst: &mut [u8]);
+
+    /// Writes a u128 to the given destination buffer in a particular
+    /// endianness. If the destination buffer has a length smaller than 16,
+    /// then this panics.
+    fn write_u128(n: u128, dst: &mut [u8]);
+}
+
+/// Little endian writing.
+pub(crate) enum LE {}
+/// Big endian writing.
+pub(crate) enum BE {}
+
+#[cfg(target_endian = "little")]
+pub(crate) type NE = LE;
+#[cfg(target_endian = "big")]
+pub(crate) type NE = BE;
+
+impl Endian for LE {
+    fn write_u16(n: u16, dst: &mut [u8]) {
+        dst[..2].copy_from_slice(&n.to_le_bytes());
+    }
+
+    fn write_u32(n: u32, dst: &mut [u8]) {
+        dst[..4].copy_from_slice(&n.to_le_bytes());
+    }
+
+    fn write_u64(n: u64, dst: &mut [u8]) {
+        dst[..8].copy_from_slice(&n.to_le_bytes());
+    }
+
+    fn write_u128(n: u128, dst: &mut [u8]) {
+        dst[..16].copy_from_slice(&n.to_le_bytes());
+    }
+}
+
+impl Endian for BE {
+    fn write_u16(n: u16, dst: &mut [u8]) {
+        dst[..2].copy_from_slice(&n.to_be_bytes());
+    }
+
+    fn write_u32(n: u32, dst: &mut [u8]) {
+        dst[..4].copy_from_slice(&n.to_be_bytes());
+    }
+
+    fn write_u64(n: u64, dst: &mut [u8]) {
+        dst[..8].copy_from_slice(&n.to_be_bytes());
+    }
+
+    fn write_u128(n: u128, dst: &mut [u8]) {
+        dst[..16].copy_from_slice(&n.to_be_bytes());
+    }
+}
+
+#[cfg(all(test, feature = "alloc"))]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn labels() {
+        let mut buf = [0; 1024];
+
+        let nwrite = write_label("fooba", &mut buf).unwrap();
+        assert_eq!(nwrite, 8);
+        assert_eq!(&buf[..nwrite], b"fooba\x00\x00\x00");
+
+        let nread = read_label(&buf, "fooba").unwrap();
+        assert_eq!(nread, 8);
+    }
+
+    #[test]
+    #[should_panic]
+    fn bad_label_interior_nul() {
+        // interior NULs are not allowed
+        write_label("foo\x00bar", &mut [0; 1024]).unwrap();
+    }
+
+    #[test]
+    fn bad_label_almost_too_long() {
+        // ok
+        write_label(&"z".repeat(255), &mut [0; 1024]).unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn bad_label_too_long() {
+        // labels longer than 255 bytes are banned
+        write_label(&"z".repeat(256), &mut [0; 1024]).unwrap();
+    }
+
+    #[test]
+    fn padding() {
+        assert_eq!(0, padding_len(8));
+        assert_eq!(3, padding_len(9));
+        assert_eq!(2, padding_len(10));
+        assert_eq!(1, padding_len(11));
+        assert_eq!(0, padding_len(12));
+        assert_eq!(3, padding_len(13));
+        assert_eq!(2, padding_len(14));
+        assert_eq!(1, padding_len(15));
+        assert_eq!(0, padding_len(16));
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:47:55 +0000
commit	2aadc03ef15cb5ca5cc2af8a7c08e070742f0ac4 (patch)
tree	033cc839730fda84ff08db877037977be94e5e3a /vendor/regex-automata/src/util
parent	Initial commit. (diff)
download	cargo-upstream.tar.xz cargo-upstream.zip