Merging upstream version 1.67.1+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:18:32 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:18:32 +0000
commit: 4547b622d8d29df964fa2914213088b148c498fc (patch)
tree: 9fc6b25f3c3add6b745be9a2400a6e96140046e9 /vendor/regex-automata/src/util
parent: Releasing progress-linux version 1.66.0+dfsg1-1~progress7.99u1. (diff)
download: rustc-4547b622d8d29df964fa2914213088b148c498fc.tar.xz
rustc-4547b622d8d29df964fa2914213088b148c498fc.zip
12 files changed, 5267 insertions, 0 deletions
diff --git a/vendor/regex-automata/src/util/alphabet.rs b/vendor/regex-automata/src/util/alphabet.rs
new file mode 100644
index 000000000..0bc1ece58
--- /dev/null
+++ b/vendor/regex-automata/src/util/alphabet.rs
@@ -0,0 +1,790 @@
+use core::convert::TryFrom;
+
+use crate::util::{
+    bytes::{DeserializeError, SerializeError},
+    DebugByte,
+};
+
+/// Unit represents a single unit of input for DFA based regex engines.
+///
+/// **NOTE:** It is not expected for consumers of this crate to need to use
+/// this type unless they are implementing their own DFA. And even then, it's
+/// not required: implementors may use other techniques to handle input.
+///
+/// Typically, a single unit of input for a DFA would be a single byte.
+/// However, for the DFAs in this crate, matches are delayed by a single byte
+/// in order to handle look-ahead assertions (`\b`, `$` and `\z`). Thus, once
+/// we have consumed the haystack, we must run the DFA through one additional
+/// transition using an input that indicates the haystack has ended.
+///
+/// Since there is no way to represent a sentinel with a `u8` since all
+/// possible values *may* be valid inputs to a DFA, this type explicitly adds
+/// room for a sentinel value.
+///
+/// The sentinel EOI value is always its own equivalence class and is
+/// ultimately represented by adding 1 to the maximum equivalence class value.
+/// So for example, the regex `^[a-z]+$` might be split into the following
+/// equivalence classes:
+///
+/// ```text
+/// 0 => [\x00-`]
+/// 1 => [a-z]
+/// 2 => [{-\xFF]
+/// 3 => [EOI]
+/// ```
+///
+/// Where EOI is the special sentinel value that is always in its own
+/// singleton equivalence class.
+#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)]
+pub enum Unit {
+    U8(u8),
+    EOI(u16),
+}
+
+impl Unit {
+    /// Create a new input unit from a byte value.
+    ///
+    /// All possible byte values are legal. However, when creating an input
+    /// unit for a specific DFA, one should be careful to only construct input
+    /// units that are in that DFA's alphabet. Namely, one way to compact a
+    /// DFA's in-memory representation is to collapse its transitions to a set
+    /// of equivalence classes into a set of all possible byte values. If a
+    /// DFA uses equivalence classes instead of byte values, then the byte
+    /// given here should be the equivalence class.
+    pub fn u8(byte: u8) -> Unit {
+        Unit::U8(byte)
+    }
+
+    pub fn eoi(num_byte_equiv_classes: usize) -> Unit {
+        assert!(
+            num_byte_equiv_classes <= 256,
+            "max number of byte-based equivalent classes is 256, but got {}",
+            num_byte_equiv_classes,
+        );
+        Unit::EOI(u16::try_from(num_byte_equiv_classes).unwrap())
+    }
+
+    pub fn as_u8(self) -> Option<u8> {
+        match self {
+            Unit::U8(b) => Some(b),
+            Unit::EOI(_) => None,
+        }
+    }
+
+    #[cfg(feature = "alloc")]
+    pub fn as_eoi(self) -> Option<usize> {
+        match self {
+            Unit::U8(_) => None,
+            Unit::EOI(eoi) => Some(eoi as usize),
+        }
+    }
+
+    pub fn as_usize(self) -> usize {
+        match self {
+            Unit::U8(b) => b as usize,
+            Unit::EOI(eoi) => eoi as usize,
+        }
+    }
+
+    pub fn is_eoi(&self) -> bool {
+        match *self {
+            Unit::EOI(_) => true,
+            _ => false,
+        }
+    }
+
+    #[cfg(feature = "alloc")]
+    pub fn is_word_byte(&self) -> bool {
+        self.as_u8().map_or(false, crate::util::is_word_byte)
+    }
+}
+
+impl core::fmt::Debug for Unit {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        match *self {
+            Unit::U8(b) => write!(f, "{:?}", DebugByte(b)),
+            Unit::EOI(_) => write!(f, "EOI"),
+        }
+    }
+}
+
+/// A representation of byte oriented equivalence classes.
+///
+/// This is used in a DFA to reduce the size of the transition table. This can
+/// have a particularly large impact not only on the total size of a dense DFA,
+/// but also on compile times.
+#[derive(Clone, Copy)]
+pub struct ByteClasses([u8; 256]);
+
+impl ByteClasses {
+    /// Creates a new set of equivalence classes where all bytes are mapped to
+    /// the same class.
+    pub fn empty() -> ByteClasses {
+        ByteClasses([0; 256])
+    }
+
+    /// Creates a new set of equivalence classes where each byte belongs to
+    /// its own equivalence class.
+    #[cfg(feature = "alloc")]
+    pub fn singletons() -> ByteClasses {
+        let mut classes = ByteClasses::empty();
+        for i in 0..256 {
+            classes.set(i as u8, i as u8);
+        }
+        classes
+    }
+
+    /// Deserializes a byte class map from the given slice. If the slice is of
+    /// insufficient length or otherwise contains an impossible mapping, then
+    /// an error is returned. Upon success, the number of bytes read along with
+    /// the map are returned. The number of bytes read is always a multiple of
+    /// 8.
+    pub fn from_bytes(
+        slice: &[u8],
+    ) -> Result<(ByteClasses, usize), DeserializeError> {
+        if slice.len() < 256 {
+            return Err(DeserializeError::buffer_too_small("byte class map"));
+        }
+        let mut classes = ByteClasses::empty();
+        for (b, &class) in slice[..256].iter().enumerate() {
+            classes.set(b as u8, class);
+        }
+        for b in classes.iter() {
+            if b.as_usize() >= classes.alphabet_len() {
+                return Err(DeserializeError::generic(
+                    "found equivalence class greater than alphabet len",
+                ));
+            }
+        }
+        Ok((classes, 256))
+    }
+
+    /// Writes this byte class map to the given byte buffer. if the given
+    /// buffer is too small, then an error is returned. Upon success, the total
+    /// number of bytes written is returned. The number of bytes written is
+    /// guaranteed to be a multiple of 8.
+    pub fn write_to(
+        &self,
+        mut dst: &mut [u8],
+    ) -> Result<usize, SerializeError> {
+        let nwrite = self.write_to_len();
+        if dst.len() < nwrite {
+            return Err(SerializeError::buffer_too_small("byte class map"));
+        }
+        for b in 0..=255 {
+            dst[0] = self.get(b);
+            dst = &mut dst[1..];
+        }
+        Ok(nwrite)
+    }
+
+    /// Returns the total number of bytes written by `write_to`.
+    pub fn write_to_len(&self) -> usize {
+        256
+    }
+
+    /// Set the equivalence class for the given byte.
+    #[inline]
+    pub fn set(&mut self, byte: u8, class: u8) {
+        self.0[byte as usize] = class;
+    }
+
+    /// Get the equivalence class for the given byte.
+    #[inline]
+    pub fn get(&self, byte: u8) -> u8 {
+        self.0[byte as usize]
+    }
+
+    /// Get the equivalence class for the given byte while forcefully
+    /// eliding bounds checks.
+    #[inline]
+    pub unsafe fn get_unchecked(&self, byte: u8) -> u8 {
+        *self.0.get_unchecked(byte as usize)
+    }
+
+    /// Get the equivalence class for the given input unit and return the
+    /// class as a `usize`.
+    #[inline]
+    pub fn get_by_unit(&self, unit: Unit) -> usize {
+        match unit {
+            Unit::U8(b) => usize::try_from(self.get(b)).unwrap(),
+            Unit::EOI(b) => usize::try_from(b).unwrap(),
+        }
+    }
+
+    #[inline]
+    pub fn eoi(&self) -> Unit {
+        Unit::eoi(self.alphabet_len().checked_sub(1).unwrap())
+    }
+
+    /// Return the total number of elements in the alphabet represented by
+    /// these equivalence classes. Equivalently, this returns the total number
+    /// of equivalence classes.
+    #[inline]
+    pub fn alphabet_len(&self) -> usize {
+        // Add one since the number of equivalence classes is one bigger than
+        // the last one. But add another to account for the final EOI class
+        // that isn't explicitly represented.
+        self.0[255] as usize + 1 + 1
+    }
+
+    /// Returns the stride, as a base-2 exponent, required for these
+    /// equivalence classes.
+    ///
+    /// The stride is always the smallest power of 2 that is greater than or
+    /// equal to the alphabet length. This is done so that converting between
+    /// state IDs and indices can be done with shifts alone, which is much
+    /// faster than integer division.
+    #[cfg(feature = "alloc")]
+    pub fn stride2(&self) -> usize {
+        self.alphabet_len().next_power_of_two().trailing_zeros() as usize
+    }
+
+    /// Returns true if and only if every byte in this class maps to its own
+    /// equivalence class. Equivalently, there are 257 equivalence classes
+    /// and each class contains exactly one byte (plus the special EOI class).
+    #[inline]
+    pub fn is_singleton(&self) -> bool {
+        self.alphabet_len() == 257
+    }
+
+    /// Returns an iterator over all equivalence classes in this set.
+    pub fn iter(&self) -> ByteClassIter<'_> {
+        ByteClassIter { classes: self, i: 0 }
+    }
+
+    /// Returns an iterator over a sequence of representative bytes from each
+    /// equivalence class. Namely, this yields exactly N items, where N is
+    /// equivalent to the number of equivalence classes. Each item is an
+    /// arbitrary byte drawn from each equivalence class.
+    ///
+    /// This is useful when one is determinizing an NFA and the NFA's alphabet
+    /// hasn't been converted to equivalence classes yet. Picking an arbitrary
+    /// byte from each equivalence class then permits a full exploration of
+    /// the NFA instead of using every possible byte value.
+    #[cfg(feature = "alloc")]
+    pub fn representatives(&self) -> ByteClassRepresentatives<'_> {
+        ByteClassRepresentatives { classes: self, byte: 0, last_class: None }
+    }
+
+    /// Returns an iterator of the bytes in the given equivalence class.
+    pub fn elements(&self, class: Unit) -> ByteClassElements {
+        ByteClassElements { classes: self, class, byte: 0 }
+    }
+
+    /// Returns an iterator of byte ranges in the given equivalence class.
+    ///
+    /// That is, a sequence of contiguous ranges are returned. Typically, every
+    /// class maps to a single contiguous range.
+    fn element_ranges(&self, class: Unit) -> ByteClassElementRanges {
+        ByteClassElementRanges { elements: self.elements(class), range: None }
+    }
+}
+
+impl core::fmt::Debug for ByteClasses {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        if self.is_singleton() {
+            write!(f, "ByteClasses({{singletons}})")
+        } else {
+            write!(f, "ByteClasses(")?;
+            for (i, class) in self.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{:?} => [", class.as_usize())?;
+                for (start, end) in self.element_ranges(class) {
+                    if start == end {
+                        write!(f, "{:?}", start)?;
+                    } else {
+                        write!(f, "{:?}-{:?}", start, end)?;
+                    }
+                }
+                write!(f, "]")?;
+            }
+            write!(f, ")")
+        }
+    }
+}
+
+/// An iterator over each equivalence class.
+#[derive(Debug)]
+pub struct ByteClassIter<'a> {
+    classes: &'a ByteClasses,
+    i: usize,
+}
+
+impl<'a> Iterator for ByteClassIter<'a> {
+    type Item = Unit;
+
+    fn next(&mut self) -> Option<Unit> {
+        if self.i + 1 == self.classes.alphabet_len() {
+            self.i += 1;
+            Some(self.classes.eoi())
+        } else if self.i < self.classes.alphabet_len() {
+            let class = self.i as u8;
+            self.i += 1;
+            Some(Unit::u8(class))
+        } else {
+            None
+        }
+    }
+}
+
+/// An iterator over representative bytes from each equivalence class.
+#[cfg(feature = "alloc")]
+#[derive(Debug)]
+pub struct ByteClassRepresentatives<'a> {
+    classes: &'a ByteClasses,
+    byte: usize,
+    last_class: Option<u8>,
+}
+
+#[cfg(feature = "alloc")]
+impl<'a> Iterator for ByteClassRepresentatives<'a> {
+    type Item = Unit;
+
+    fn next(&mut self) -> Option<Unit> {
+        while self.byte < 256 {
+            let byte = self.byte as u8;
+            let class = self.classes.get(byte);
+            self.byte += 1;
+
+            if self.last_class != Some(class) {
+                self.last_class = Some(class);
+                return Some(Unit::u8(byte));
+            }
+        }
+        if self.byte == 256 {
+            self.byte += 1;
+            return Some(self.classes.eoi());
+        }
+        None
+    }
+}
+
+/// An iterator over all elements in an equivalence class.
+#[derive(Debug)]
+pub struct ByteClassElements<'a> {
+    classes: &'a ByteClasses,
+    class: Unit,
+    byte: usize,
+}
+
+impl<'a> Iterator for ByteClassElements<'a> {
+    type Item = Unit;
+
+    fn next(&mut self) -> Option<Unit> {
+        while self.byte < 256 {
+            let byte = self.byte as u8;
+            self.byte += 1;
+            if self.class.as_u8() == Some(self.classes.get(byte)) {
+                return Some(Unit::u8(byte));
+            }
+        }
+        if self.byte < 257 {
+            self.byte += 1;
+            if self.class.is_eoi() {
+                return Some(Unit::eoi(256));
+            }
+        }
+        None
+    }
+}
+
+/// An iterator over all elements in an equivalence class expressed as a
+/// sequence of contiguous ranges.
+#[derive(Debug)]
+pub struct ByteClassElementRanges<'a> {
+    elements: ByteClassElements<'a>,
+    range: Option<(Unit, Unit)>,
+}
+
+impl<'a> Iterator for ByteClassElementRanges<'a> {
+    type Item = (Unit, Unit);
+
+    fn next(&mut self) -> Option<(Unit, Unit)> {
+        loop {
+            let element = match self.elements.next() {
+                None => return self.range.take(),
+                Some(element) => element,
+            };
+            match self.range.take() {
+                None => {
+                    self.range = Some((element, element));
+                }
+                Some((start, end)) => {
+                    if end.as_usize() + 1 != element.as_usize()
+                        || element.is_eoi()
+                    {
+                        self.range = Some((element, element));
+                        return Some((start, end));
+                    }
+                    self.range = Some((start, element));
+                }
+            }
+        }
+    }
+}
+
+/// A byte class set keeps track of an *approximation* of equivalence classes
+/// of bytes during NFA construction. That is, every byte in an equivalence
+/// class cannot discriminate between a match and a non-match.
+///
+/// For example, in the regex `[ab]+`, the bytes `a` and `b` would be in the
+/// same equivalence class because it never matters whether an `a` or a `b` is
+/// seen, and no combination of `a`s and `b`s in the text can discriminate a
+/// match.
+///
+/// Note though that this does not compute the minimal set of equivalence
+/// classes. For example, in the regex `[ac]+`, both `a` and `c` are in the
+/// same equivalence class for the same reason that `a` and `b` are in the
+/// same equivalence class in the aforementioned regex. However, in this
+/// implementation, `a` and `c` are put into distinct equivalence classes. The
+/// reason for this is implementation complexity. In the future, we should
+/// endeavor to compute the minimal equivalence classes since they can have a
+/// rather large impact on the size of the DFA. (Doing this will likely require
+/// rethinking how equivalence classes are computed, including changing the
+/// representation here, which is only able to group contiguous bytes into the
+/// same equivalence class.)
+#[derive(Clone, Debug)]
+pub struct ByteClassSet(ByteSet);
+
+impl ByteClassSet {
+    /// Create a new set of byte classes where all bytes are part of the same
+    /// equivalence class.
+    #[cfg(feature = "alloc")]
+    pub fn empty() -> Self {
+        ByteClassSet(ByteSet::empty())
+    }
+
+    /// Indicate the the range of byte given (inclusive) can discriminate a
+    /// match between it and all other bytes outside of the range.
+    #[cfg(feature = "alloc")]
+    pub fn set_range(&mut self, start: u8, end: u8) {
+        debug_assert!(start <= end);
+        if start > 0 {
+            self.0.add(start - 1);
+        }
+        self.0.add(end);
+    }
+
+    /// Add the contiguous ranges in the set given to this byte class set.
+    #[cfg(feature = "alloc")]
+    pub fn add_set(&mut self, set: &ByteSet) {
+        for (start, end) in set.iter_ranges() {
+            self.set_range(start, end);
+        }
+    }
+
+    /// Convert this boolean set to a map that maps all byte values to their
+    /// corresponding equivalence class. The last mapping indicates the largest
+    /// equivalence class identifier (which is never bigger than 255).
+    #[cfg(feature = "alloc")]
+    pub fn byte_classes(&self) -> ByteClasses {
+        let mut classes = ByteClasses::empty();
+        let mut class = 0u8;
+        let mut b = 0u8;
+        loop {
+            classes.set(b, class);
+            if b == 255 {
+                break;
+            }
+            if self.0.contains(b) {
+                class = class.checked_add(1).unwrap();
+            }
+            b = b.checked_add(1).unwrap();
+        }
+        classes
+    }
+}
+
+/// A simple set of bytes that is reasonably cheap to copy and allocation free.
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
+pub struct ByteSet {
+    bits: BitSet,
+}
+
+/// The representation of a byte set. Split out so that we can define a
+/// convenient Debug impl for it while keeping "ByteSet" in the output.
+#[derive(Clone, Copy, Default, Eq, PartialEq)]
+struct BitSet([u128; 2]);
+
+impl ByteSet {
+    /// Create an empty set of bytes.
+    #[cfg(feature = "alloc")]
+    pub fn empty() -> ByteSet {
+        ByteSet { bits: BitSet([0; 2]) }
+    }
+
+    /// Add a byte to this set.
+    ///
+    /// If the given byte already belongs to this set, then this is a no-op.
+    #[cfg(feature = "alloc")]
+    pub fn add(&mut self, byte: u8) {
+        let bucket = byte / 128;
+        let bit = byte % 128;
+        self.bits.0[bucket as usize] |= 1 << bit;
+    }
+
+    /// Add an inclusive range of bytes.
+    #[cfg(feature = "alloc")]
+    pub fn add_all(&mut self, start: u8, end: u8) {
+        for b in start..=end {
+            self.add(b);
+        }
+    }
+
+    /// Remove a byte from this set.
+    ///
+    /// If the given byte is not in this set, then this is a no-op.
+    #[cfg(feature = "alloc")]
+    pub fn remove(&mut self, byte: u8) {
+        let bucket = byte / 128;
+        let bit = byte % 128;
+        self.bits.0[bucket as usize] &= !(1 << bit);
+    }
+
+    /// Remove an inclusive range of bytes.
+    #[cfg(feature = "alloc")]
+    pub fn remove_all(&mut self, start: u8, end: u8) {
+        for b in start..=end {
+            self.remove(b);
+        }
+    }
+
+    /// Return true if and only if the given byte is in this set.
+    pub fn contains(&self, byte: u8) -> bool {
+        let bucket = byte / 128;
+        let bit = byte % 128;
+        self.bits.0[bucket as usize] & (1 << bit) > 0
+    }
+
+    /// Return true if and only if the given inclusive range of bytes is in
+    /// this set.
+    #[cfg(feature = "alloc")]
+    pub fn contains_range(&self, start: u8, end: u8) -> bool {
+        (start..=end).all(|b| self.contains(b))
+    }
+
+    /// Returns an iterator over all bytes in this set.
+    #[cfg(feature = "alloc")]
+    pub fn iter(&self) -> ByteSetIter {
+        ByteSetIter { set: self, b: 0 }
+    }
+
+    /// Returns an iterator over all contiguous ranges of bytes in this set.
+    #[cfg(feature = "alloc")]
+    pub fn iter_ranges(&self) -> ByteSetRangeIter {
+        ByteSetRangeIter { set: self, b: 0 }
+    }
+
+    /// Return the number of bytes in this set.
+    #[cfg(feature = "alloc")]
+    pub fn len(&self) -> usize {
+        (self.bits.0[0].count_ones() + self.bits.0[1].count_ones()) as usize
+    }
+
+    /// Return true if and only if this set is empty.
+    #[cfg(feature = "alloc")]
+    pub fn is_empty(&self) -> bool {
+        self.bits.0 == [0, 0]
+    }
+}
+
+impl core::fmt::Debug for BitSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let mut fmtd = f.debug_set();
+        for b in (0..256).map(|b| b as u8) {
+            if (ByteSet { bits: *self }).contains(b) {
+                fmtd.entry(&b);
+            }
+        }
+        fmtd.finish()
+    }
+}
+
+#[derive(Debug)]
+pub struct ByteSetIter<'a> {
+    set: &'a ByteSet,
+    b: usize,
+}
+
+impl<'a> Iterator for ByteSetIter<'a> {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<u8> {
+        while self.b <= 255 {
+            let b = self.b as u8;
+            self.b += 1;
+            if self.set.contains(b) {
+                return Some(b);
+            }
+        }
+        None
+    }
+}
+
+#[derive(Debug)]
+pub struct ByteSetRangeIter<'a> {
+    set: &'a ByteSet,
+    b: usize,
+}
+
+impl<'a> Iterator for ByteSetRangeIter<'a> {
+    type Item = (u8, u8);
+
+    fn next(&mut self) -> Option<(u8, u8)> {
+        while self.b <= 255 {
+            let start = self.b as u8;
+            self.b += 1;
+            if !self.set.contains(start) {
+                continue;
+            }
+
+            let mut end = start;
+            while self.b <= 255 && self.set.contains(self.b as u8) {
+                end = self.b as u8;
+                self.b += 1;
+            }
+            return Some((start, end));
+        }
+        None
+    }
+}
+
+#[cfg(test)]
+#[cfg(feature = "alloc")]
+mod tests {
+    use alloc::{vec, vec::Vec};
+
+    use super::*;
+
+    #[test]
+    fn byte_classes() {
+        let mut set = ByteClassSet::empty();
+        set.set_range(b'a', b'z');
+
+        let classes = set.byte_classes();
+        assert_eq!(classes.get(0), 0);
+        assert_eq!(classes.get(1), 0);
+        assert_eq!(classes.get(2), 0);
+        assert_eq!(classes.get(b'a' - 1), 0);
+        assert_eq!(classes.get(b'a'), 1);
+        assert_eq!(classes.get(b'm'), 1);
+        assert_eq!(classes.get(b'z'), 1);
+        assert_eq!(classes.get(b'z' + 1), 2);
+        assert_eq!(classes.get(254), 2);
+        assert_eq!(classes.get(255), 2);
+
+        let mut set = ByteClassSet::empty();
+        set.set_range(0, 2);
+        set.set_range(4, 6);
+        let classes = set.byte_classes();
+        assert_eq!(classes.get(0), 0);
+        assert_eq!(classes.get(1), 0);
+        assert_eq!(classes.get(2), 0);
+        assert_eq!(classes.get(3), 1);
+        assert_eq!(classes.get(4), 2);
+        assert_eq!(classes.get(5), 2);
+        assert_eq!(classes.get(6), 2);
+        assert_eq!(classes.get(7), 3);
+        assert_eq!(classes.get(255), 3);
+    }
+
+    #[test]
+    fn full_byte_classes() {
+        let mut set = ByteClassSet::empty();
+        for i in 0..256u16 {
+            set.set_range(i as u8, i as u8);
+        }
+        assert_eq!(set.byte_classes().alphabet_len(), 257);
+    }
+
+    #[test]
+    fn elements_typical() {
+        let mut set = ByteClassSet::empty();
+        set.set_range(b'b', b'd');
+        set.set_range(b'g', b'm');
+        set.set_range(b'z', b'z');
+        let classes = set.byte_classes();
+        // class 0: \x00-a
+        // class 1: b-d
+        // class 2: e-f
+        // class 3: g-m
+        // class 4: n-y
+        // class 5: z-z
+        // class 6: \x7B-\xFF
+        // class 7: EOI
+        assert_eq!(classes.alphabet_len(), 8);
+
+        let elements = classes.elements(Unit::u8(0)).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 98);
+        assert_eq!(elements[0], Unit::u8(b'\x00'));
+        assert_eq!(elements[97], Unit::u8(b'a'));
+
+        let elements = classes.elements(Unit::u8(1)).collect::<Vec<_>>();
+        assert_eq!(
+            elements,
+            vec![Unit::u8(b'b'), Unit::u8(b'c'), Unit::u8(b'd')],
+        );
+
+        let elements = classes.elements(Unit::u8(2)).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::u8(b'e'), Unit::u8(b'f')],);
+
+        let elements = classes.elements(Unit::u8(3)).collect::<Vec<_>>();
+        assert_eq!(
+            elements,
+            vec![
+                Unit::u8(b'g'),
+                Unit::u8(b'h'),
+                Unit::u8(b'i'),
+                Unit::u8(b'j'),
+                Unit::u8(b'k'),
+                Unit::u8(b'l'),
+                Unit::u8(b'm'),
+            ],
+        );
+
+        let elements = classes.elements(Unit::u8(4)).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 12);
+        assert_eq!(elements[0], Unit::u8(b'n'));
+        assert_eq!(elements[11], Unit::u8(b'y'));
+
+        let elements = classes.elements(Unit::u8(5)).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::u8(b'z')]);
+
+        let elements = classes.elements(Unit::u8(6)).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 133);
+        assert_eq!(elements[0], Unit::u8(b'\x7B'));
+        assert_eq!(elements[132], Unit::u8(b'\xFF'));
+
+        let elements = classes.elements(Unit::eoi(7)).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::eoi(256)]);
+    }
+
+    #[test]
+    fn elements_singletons() {
+        let classes = ByteClasses::singletons();
+        assert_eq!(classes.alphabet_len(), 257);
+
+        let elements = classes.elements(Unit::u8(b'a')).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::u8(b'a')]);
+
+        let elements = classes.elements(Unit::eoi(5)).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::eoi(256)]);
+    }
+
+    #[test]
+    fn elements_empty() {
+        let classes = ByteClasses::empty();
+        assert_eq!(classes.alphabet_len(), 2);
+
+        let elements = classes.elements(Unit::u8(0)).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 256);
+        assert_eq!(elements[0], Unit::u8(b'\x00'));
+        assert_eq!(elements[255], Unit::u8(b'\xFF'));
+
+        let elements = classes.elements(Unit::eoi(1)).collect::<Vec<_>>();
+        assert_eq!(elements, vec![Unit::eoi(256)]);
+    }
+}
diff --git a/vendor/regex-automata/src/util/bytes.rs b/vendor/regex-automata/src/util/bytes.rs
new file mode 100644
index 000000000..5877bb149
--- /dev/null
+++ b/vendor/regex-automata/src/util/bytes.rs
@@ -0,0 +1,950 @@
+/*
+A collection of helper functions, types and traits for serializing automata.
+
+This crate defines its own bespoke serialization mechanism for some structures
+provided in the public API, namely, DFAs. A bespoke mechanism was developed
+primarily because structures like automata demand a specific binary format.
+Attempting to encode their rich structure in an existing serialization
+format is just not feasible. Moreover, the format for each structure is
+generally designed such that deserialization is cheap. More specifically, that
+deserialization can be done in constant time. (The idea being that you can
+embed it into your binary or mmap it, and then use it immediately.)
+
+In order to achieve this, most of the structures in this crate use an in-memory
+representation that very closely corresponds to its binary serialized form.
+This pervades and complicates everything, and in some cases, requires dealing
+with alignment and reasoning about safety.
+
+This technique does have major advantages. In particular, it permits doing
+the potentially costly work of compiling a finite state machine in an offline
+manner, and then loading it at runtime not only without having to re-compile
+the regex, but even without the code required to do the compilation. This, for
+example, permits one to use a pre-compiled DFA not only in environments without
+Rust's standard library, but also in environments without a heap.
+
+In the code below, whenever we insert some kind of padding, it's to enforce a
+4-byte alignment, unless otherwise noted. Namely, u32 is the only state ID type
+supported. (In a previous version of this library, DFAs were generic over the
+state ID representation.)
+
+Also, serialization generally requires the caller to specify endianness,
+where as deserialization always assumes native endianness (otherwise cheap
+deserialization would be impossible). This implies that serializing a structure
+generally requires serializing both its big-endian and little-endian variants,
+and then loading the correct one based on the target's endianness.
+*/
+
+use core::{
+    cmp,
+    convert::{TryFrom, TryInto},
+    mem::size_of,
+};
+
+#[cfg(feature = "alloc")]
+use alloc::{vec, vec::Vec};
+
+use crate::util::id::{PatternID, PatternIDError, StateID, StateIDError};
+
+/// An error that occurs when serializing an object from this crate.
+///
+/// Serialization, as used in this crate, universally refers to the process
+/// of transforming a structure (like a DFA) into a custom binary format
+/// represented by `&[u8]`. To this end, serialization is generally infallible.
+/// However, it can fail when caller provided buffer sizes are too small. When
+/// that occurs, a serialization error is reported.
+///
+/// A `SerializeError` provides no introspection capabilities. Its only
+/// supported operation is conversion to a human readable error message.
+///
+/// This error type implements the `std::error::Error` trait only when the
+/// `std` feature is enabled. Otherwise, this type is defined in all
+/// configurations.
+#[derive(Debug)]
+pub struct SerializeError {
+    /// The name of the thing that a buffer is too small for.
+    ///
+    /// Currently, the only kind of serialization error is one that is
+    /// committed by a caller: providing a destination buffer that is too
+    /// small to fit the serialized object. This makes sense conceptually,
+    /// since every valid inhabitant of a type should be serializable.
+    ///
+    /// This is somewhat exposed in the public API of this crate. For example,
+    /// the `to_bytes_{big,little}_endian` APIs return a `Vec<u8>` and are
+    /// guaranteed to never panic or error. This is only possible because the
+    /// implementation guarantees that it will allocate a `Vec<u8>` that is
+    /// big enough.
+    ///
+    /// In summary, if a new serialization error kind needs to be added, then
+    /// it will need careful consideration.
+    what: &'static str,
+}
+
+impl SerializeError {
+    pub(crate) fn buffer_too_small(what: &'static str) -> SerializeError {
+        SerializeError { what }
+    }
+}
+
+impl core::fmt::Display for SerializeError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "destination buffer is too small to write {}", self.what)
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for SerializeError {}
+
+/// An error that occurs when deserializing an object defined in this crate.
+///
+/// Serialization, as used in this crate, universally refers to the process
+/// of transforming a structure (like a DFA) into a custom binary format
+/// represented by `&[u8]`. Deserialization, then, refers to the process of
+/// cheaply converting this binary format back to the object's in-memory
+/// representation as defined in this crate. To the extent possible,
+/// deserialization will report this error whenever this process fails.
+///
+/// A `DeserializeError` provides no introspection capabilities. Its only
+/// supported operation is conversion to a human readable error message.
+///
+/// This error type implements the `std::error::Error` trait only when the
+/// `std` feature is enabled. Otherwise, this type is defined in all
+/// configurations.
+#[derive(Debug)]
+pub struct DeserializeError(DeserializeErrorKind);
+
+#[derive(Debug)]
+enum DeserializeErrorKind {
+    Generic { msg: &'static str },
+    BufferTooSmall { what: &'static str },
+    InvalidUsize { what: &'static str },
+    InvalidVarint { what: &'static str },
+    VersionMismatch { expected: u32, found: u32 },
+    EndianMismatch { expected: u32, found: u32 },
+    AlignmentMismatch { alignment: usize, address: usize },
+    LabelMismatch { expected: &'static str },
+    ArithmeticOverflow { what: &'static str },
+    PatternID { err: PatternIDError, what: &'static str },
+    StateID { err: StateIDError, what: &'static str },
+}
+
+impl DeserializeError {
+    pub(crate) fn generic(msg: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::Generic { msg })
+    }
+
+    pub(crate) fn buffer_too_small(what: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::BufferTooSmall { what })
+    }
+
+    pub(crate) fn invalid_usize(what: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::InvalidUsize { what })
+    }
+
+    fn invalid_varint(what: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::InvalidVarint { what })
+    }
+
+    fn version_mismatch(expected: u32, found: u32) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::VersionMismatch {
+            expected,
+            found,
+        })
+    }
+
+    fn endian_mismatch(expected: u32, found: u32) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::EndianMismatch {
+            expected,
+            found,
+        })
+    }
+
+    fn alignment_mismatch(
+        alignment: usize,
+        address: usize,
+    ) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::AlignmentMismatch {
+            alignment,
+            address,
+        })
+    }
+
+    fn label_mismatch(expected: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::LabelMismatch { expected })
+    }
+
+    fn arithmetic_overflow(what: &'static str) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::ArithmeticOverflow { what })
+    }
+
+    pub(crate) fn pattern_id_error(
+        err: PatternIDError,
+        what: &'static str,
+    ) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::PatternID { err, what })
+    }
+
+    pub(crate) fn state_id_error(
+        err: StateIDError,
+        what: &'static str,
+    ) -> DeserializeError {
+        DeserializeError(DeserializeErrorKind::StateID { err, what })
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for DeserializeError {}
+
+impl core::fmt::Display for DeserializeError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        use self::DeserializeErrorKind::*;
+
+        match self.0 {
+            Generic { msg } => write!(f, "{}", msg),
+            BufferTooSmall { what } => {
+                write!(f, "buffer is too small to read {}", what)
+            }
+            InvalidUsize { what } => {
+                write!(f, "{} is too big to fit in a usize", what)
+            }
+            InvalidVarint { what } => {
+                write!(f, "could not decode valid varint for {}", what)
+            }
+            VersionMismatch { expected, found } => write!(
+                f,
+                "unsupported version: \
+                 expected version {} but found version {}",
+                expected, found,
+            ),
+            EndianMismatch { expected, found } => write!(
+                f,
+                "endianness mismatch: expected 0x{:X} but got 0x{:X}. \
+                 (Are you trying to load an object serialized with a \
+                 different endianness?)",
+                expected, found,
+            ),
+            AlignmentMismatch { alignment, address } => write!(
+                f,
+                "alignment mismatch: slice starts at address \
+                 0x{:X}, which is not aligned to a {} byte boundary",
+                address, alignment,
+            ),
+            LabelMismatch { expected } => write!(
+                f,
+                "label mismatch: start of serialized object should \
+                 contain a NUL terminated {:?} label, but a different \
+                 label was found",
+                expected,
+            ),
+            ArithmeticOverflow { what } => {
+                write!(f, "arithmetic overflow for {}", what)
+            }
+            PatternID { ref err, what } => {
+                write!(f, "failed to read pattern ID for {}: {}", what, err)
+            }
+            StateID { ref err, what } => {
+                write!(f, "failed to read state ID for {}: {}", what, err)
+            }
+        }
+    }
+}
+
+/// Checks that the given slice has an alignment that matches `T`.
+///
+/// This is useful for checking that a slice has an appropriate alignment
+/// before casting it to a &[T]. Note though that alignment is not itself
+/// sufficient to perform the cast for any `T`.
+pub fn check_alignment<T>(slice: &[u8]) -> Result<(), DeserializeError> {
+    let alignment = core::mem::align_of::<T>();
+    let address = slice.as_ptr() as usize;
+    if address % alignment == 0 {
+        return Ok(());
+    }
+    Err(DeserializeError::alignment_mismatch(alignment, address))
+}
+
+/// Reads a possibly empty amount of padding, up to 7 bytes, from the beginning
+/// of the given slice. All padding bytes must be NUL bytes.
+///
+/// This is useful because it can be theoretically necessary to pad the
+/// beginning of a serialized object with NUL bytes to ensure that it starts
+/// at a correctly aligned address. These padding bytes should come immediately
+/// before the label.
+///
+/// This returns the number of bytes read from the given slice.
+pub fn skip_initial_padding(slice: &[u8]) -> usize {
+    let mut nread = 0;
+    while nread < 7 && nread < slice.len() && slice[nread] == 0 {
+        nread += 1;
+    }
+    nread
+}
+
+/// Allocate a byte buffer of the given size, along with some initial padding
+/// such that `buf[padding..]` has the same alignment as `T`, where the
+/// alignment of `T` must be at most `8`. In particular, callers should treat
+/// the first N bytes (second return value) as padding bytes that must not be
+/// overwritten. In all cases, the following identity holds:
+///
+/// ```ignore
+/// let (buf, padding) = alloc_aligned_buffer::<StateID>(SIZE);
+/// assert_eq!(SIZE, buf[padding..].len());
+/// ```
+///
+/// In practice, padding is often zero.
+///
+/// The requirement for `8` as a maximum here is somewhat arbitrary. In
+/// practice, we never need anything bigger in this crate, and so this function
+/// does some sanity asserts under the assumption of a max alignment of `8`.
+#[cfg(feature = "alloc")]
+pub fn alloc_aligned_buffer<T>(size: usize) -> (Vec<u8>, usize) {
+    // FIXME: This is a kludge because there's no easy way to allocate a
+    // Vec<u8> with an alignment guaranteed to be greater than 1. We could
+    // create a Vec<u32>, but this cannot be safely transmuted to a Vec<u8>
+    // without concern, since reallocing or dropping the Vec<u8> is UB
+    // (different alignment than the initial allocation). We could define a
+    // wrapper type to manage this for us, but it seems like more machinery
+    // than it's worth.
+    let mut buf = vec![0; size];
+    let align = core::mem::align_of::<T>();
+    let address = buf.as_ptr() as usize;
+    if address % align == 0 {
+        return (buf, 0);
+    }
+    // It's not quite clear how to robustly test this code, since the allocator
+    // in my environment appears to always return addresses aligned to at
+    // least 8 bytes, even when the alignment requirement is smaller. A feeble
+    // attempt at ensuring correctness is provided with asserts.
+    let padding = ((address & !0b111).checked_add(8).unwrap())
+        .checked_sub(address)
+        .unwrap();
+    assert!(padding <= 7, "padding of {} is bigger than 7", padding);
+    buf.extend(core::iter::repeat(0).take(padding));
+    assert_eq!(size + padding, buf.len());
+    assert_eq!(
+        0,
+        buf[padding..].as_ptr() as usize % align,
+        "expected end of initial padding to be aligned to {}",
+        align,
+    );
+    (buf, padding)
+}
+
+/// Reads a NUL terminated label starting at the beginning of the given slice.
+///
+/// If a NUL terminated label could not be found, then an error is returned.
+/// Similary, if a label is found but doesn't match the expected label, then
+/// an error is returned.
+///
+/// Upon success, the total number of bytes read (including padding bytes) is
+/// returned.
+pub fn read_label(
+    slice: &[u8],
+    expected_label: &'static str,
+) -> Result<usize, DeserializeError> {
+    // Set an upper bound on how many bytes we scan for a NUL. Since no label
+    // in this crate is longer than 256 bytes, if we can't find one within that
+    // range, then we have corrupted data.
+    let first_nul =
+        slice[..cmp::min(slice.len(), 256)].iter().position(|&b| b == 0);
+    let first_nul = match first_nul {
+        Some(first_nul) => first_nul,
+        None => {
+            return Err(DeserializeError::generic(
+                "could not find NUL terminated label \
+                 at start of serialized object",
+            ));
+        }
+    };
+    let len = first_nul + padding_len(first_nul);
+    if slice.len() < len {
+        return Err(DeserializeError::generic(
+            "could not find properly sized label at start of serialized object"
+        ));
+    }
+    if expected_label.as_bytes() != &slice[..first_nul] {
+        return Err(DeserializeError::label_mismatch(expected_label));
+    }
+    Ok(len)
+}
+
+/// Writes the given label to the buffer as a NUL terminated string. The label
+/// given must not contain NUL, otherwise this will panic. Similarly, the label
+/// must not be longer than 255 bytes, otherwise this will panic.
+///
+/// Additional NUL bytes are written as necessary to ensure that the number of
+/// bytes written is always a multiple of 4.
+///
+/// Upon success, the total number of bytes written (including padding) is
+/// returned.
+pub fn write_label(
+    label: &str,
+    dst: &mut [u8],
+) -> Result<usize, SerializeError> {
+    let nwrite = write_label_len(label);
+    if dst.len() < nwrite {
+        return Err(SerializeError::buffer_too_small("label"));
+    }
+    dst[..label.len()].copy_from_slice(label.as_bytes());
+    for i in 0..(nwrite - label.len()) {
+        dst[label.len() + i] = 0;
+    }
+    assert_eq!(nwrite % 4, 0);
+    Ok(nwrite)
+}
+
+/// Returns the total number of bytes (including padding) that would be written
+/// for the given label. This panics if the given label contains a NUL byte or
+/// is longer than 255 bytes. (The size restriction exists so that searching
+/// for a label during deserialization can be done in small bounded space.)
+pub fn write_label_len(label: &str) -> usize {
+    if label.len() > 255 {
+        panic!("label must not be longer than 255 bytes");
+    }
+    if label.as_bytes().iter().position(|&b| b == 0).is_some() {
+        panic!("label must not contain NUL bytes");
+    }
+    let label_len = label.len() + 1; // +1 for the NUL terminator
+    label_len + padding_len(label_len)
+}
+
+/// Reads the endianness check from the beginning of the given slice and
+/// confirms that the endianness of the serialized object matches the expected
+/// endianness. If the slice is too small or if the endianness check fails,
+/// this returns an error.
+///
+/// Upon success, the total number of bytes read is returned.
+pub fn read_endianness_check(slice: &[u8]) -> Result<usize, DeserializeError> {
+    let (n, nr) = try_read_u32(slice, "endianness check")?;
+    assert_eq!(nr, write_endianness_check_len());
+    if n != 0xFEFF {
+        return Err(DeserializeError::endian_mismatch(0xFEFF, n));
+    }
+    Ok(nr)
+}
+
+/// Writes 0xFEFF as an integer using the given endianness.
+///
+/// This is useful for writing into the header of a serialized object. It can
+/// be read during deserialization as a sanity check to ensure the proper
+/// endianness is used.
+///
+/// Upon success, the total number of bytes written is returned.
+pub fn write_endianness_check<E: Endian>(
+    dst: &mut [u8],
+) -> Result<usize, SerializeError> {
+    let nwrite = write_endianness_check_len();
+    if dst.len() < nwrite {
+        return Err(SerializeError::buffer_too_small("endianness check"));
+    }
+    E::write_u32(0xFEFF, dst);
+    Ok(nwrite)
+}
+
+/// Returns the number of bytes written by the endianness check.
+pub fn write_endianness_check_len() -> usize {
+    size_of::<u32>()
+}
+
+/// Reads a version number from the beginning of the given slice and confirms
+/// that is matches the expected version number given. If the slice is too
+/// small or if the version numbers aren't equivalent, this returns an error.
+///
+/// Upon success, the total number of bytes read is returned.
+///
+/// N.B. Currently, we require that the version number is exactly equivalent.
+/// In the future, if we bump the version number without a semver bump, then
+/// we'll need to relax this a bit and support older versions.
+pub fn read_version(
+    slice: &[u8],
+    expected_version: u32,
+) -> Result<usize, DeserializeError> {
+    let (n, nr) = try_read_u32(slice, "version")?;
+    assert_eq!(nr, write_version_len());
+    if n != expected_version {
+        return Err(DeserializeError::version_mismatch(expected_version, n));
+    }
+    Ok(nr)
+}
+
+/// Writes the given version number to the beginning of the given slice.
+///
+/// This is useful for writing into the header of a serialized object. It can
+/// be read during deserialization as a sanity check to ensure that the library
+/// code supports the format of the serialized object.
+///
+/// Upon success, the total number of bytes written is returned.
+pub fn write_version<E: Endian>(
+    version: u32,
+    dst: &mut [u8],
+) -> Result<usize, SerializeError> {
+    let nwrite = write_version_len();
+    if dst.len() < nwrite {
+        return Err(SerializeError::buffer_too_small("version number"));
+    }
+    E::write_u32(version, dst);
+    Ok(nwrite)
+}
+
+/// Returns the number of bytes written by writing the version number.
+pub fn write_version_len() -> usize {
+    size_of::<u32>()
+}
+
+/// Reads a pattern ID from the given slice. If the slice has insufficient
+/// length, then this panics. If the deserialized integer exceeds the pattern
+/// ID limit for the current target, then this returns an error.
+///
+/// Upon success, this also returns the number of bytes read.
+pub fn read_pattern_id(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(PatternID, usize), DeserializeError> {
+    let bytes: [u8; PatternID::SIZE] =
+        slice[..PatternID::SIZE].try_into().unwrap();
+    let pid = PatternID::from_ne_bytes(bytes)
+        .map_err(|err| DeserializeError::pattern_id_error(err, what))?;
+    Ok((pid, PatternID::SIZE))
+}
+
+/// Reads a pattern ID from the given slice. If the slice has insufficient
+/// length, then this panics. Otherwise, the deserialized integer is assumed
+/// to be a valid pattern ID.
+///
+/// This also returns the number of bytes read.
+pub fn read_pattern_id_unchecked(slice: &[u8]) -> (PatternID, usize) {
+    let pid = PatternID::from_ne_bytes_unchecked(
+        slice[..PatternID::SIZE].try_into().unwrap(),
+    );
+    (pid, PatternID::SIZE)
+}
+
+/// Write the given pattern ID to the beginning of the given slice of bytes
+/// using the specified endianness. The given slice must have length at least
+/// `PatternID::SIZE`, or else this panics. Upon success, the total number of
+/// bytes written is returned.
+pub fn write_pattern_id<E: Endian>(pid: PatternID, dst: &mut [u8]) -> usize {
+    E::write_u32(pid.as_u32(), dst);
+    PatternID::SIZE
+}
+
+/// Attempts to read a state ID from the given slice. If the slice has an
+/// insufficient number of bytes or if the state ID exceeds the limit for
+/// the current target, then this returns an error.
+///
+/// Upon success, this also returns the number of bytes read.
+pub fn try_read_state_id(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(StateID, usize), DeserializeError> {
+    if slice.len() < StateID::SIZE {
+        return Err(DeserializeError::buffer_too_small(what));
+    }
+    read_state_id(slice, what)
+}
+
+/// Reads a state ID from the given slice. If the slice has insufficient
+/// length, then this panics. If the deserialized integer exceeds the state ID
+/// limit for the current target, then this returns an error.
+///
+/// Upon success, this also returns the number of bytes read.
+pub fn read_state_id(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(StateID, usize), DeserializeError> {
+    let bytes: [u8; StateID::SIZE] =
+        slice[..StateID::SIZE].try_into().unwrap();
+    let sid = StateID::from_ne_bytes(bytes)
+        .map_err(|err| DeserializeError::state_id_error(err, what))?;
+    Ok((sid, StateID::SIZE))
+}
+
+/// Reads a state ID from the given slice. If the slice has insufficient
+/// length, then this panics. Otherwise, the deserialized integer is assumed
+/// to be a valid state ID.
+///
+/// This also returns the number of bytes read.
+pub fn read_state_id_unchecked(slice: &[u8]) -> (StateID, usize) {
+    let sid = StateID::from_ne_bytes_unchecked(
+        slice[..StateID::SIZE].try_into().unwrap(),
+    );
+    (sid, StateID::SIZE)
+}
+
+/// Write the given state ID to the beginning of the given slice of bytes
+/// using the specified endianness. The given slice must have length at least
+/// `StateID::SIZE`, or else this panics. Upon success, the total number of
+/// bytes written is returned.
+pub fn write_state_id<E: Endian>(sid: StateID, dst: &mut [u8]) -> usize {
+    E::write_u32(sid.as_u32(), dst);
+    StateID::SIZE
+}
+
+/// Try to read a u16 as a usize from the beginning of the given slice in
+/// native endian format. If the slice has fewer than 2 bytes or if the
+/// deserialized number cannot be represented by usize, then this returns an
+/// error. The error message will include the `what` description of what is
+/// being deserialized, for better error messages. `what` should be a noun in
+/// singular form.
+///
+/// Upon success, this also returns the number of bytes read.
+pub fn try_read_u16_as_usize(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(usize, usize), DeserializeError> {
+    try_read_u16(slice, what).and_then(|(n, nr)| {
+        usize::try_from(n)
+            .map(|n| (n, nr))
+            .map_err(|_| DeserializeError::invalid_usize(what))
+    })
+}
+
+/// Try to read a u32 as a usize from the beginning of the given slice in
+/// native endian format. If the slice has fewer than 4 bytes or if the
+/// deserialized number cannot be represented by usize, then this returns an
+/// error. The error message will include the `what` description of what is
+/// being deserialized, for better error messages. `what` should be a noun in
+/// singular form.
+///
+/// Upon success, this also returns the number of bytes read.
+pub fn try_read_u32_as_usize(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(usize, usize), DeserializeError> {
+    try_read_u32(slice, what).and_then(|(n, nr)| {
+        usize::try_from(n)
+            .map(|n| (n, nr))
+            .map_err(|_| DeserializeError::invalid_usize(what))
+    })
+}
+
+/// Try to read a u16 from the beginning of the given slice in native endian
+/// format. If the slice has fewer than 2 bytes, then this returns an error.
+/// The error message will include the `what` description of what is being
+/// deserialized, for better error messages. `what` should be a noun in
+/// singular form.
+///
+/// Upon success, this also returns the number of bytes read.
+pub fn try_read_u16(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(u16, usize), DeserializeError> {
+    if slice.len() < size_of::<u16>() {
+        return Err(DeserializeError::buffer_too_small(what));
+    }
+    Ok((read_u16(slice), size_of::<u16>()))
+}
+
+/// Try to read a u32 from the beginning of the given slice in native endian
+/// format. If the slice has fewer than 4 bytes, then this returns an error.
+/// The error message will include the `what` description of what is being
+/// deserialized, for better error messages. `what` should be a noun in
+/// singular form.
+///
+/// Upon success, this also returns the number of bytes read.
+pub fn try_read_u32(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(u32, usize), DeserializeError> {
+    if slice.len() < size_of::<u32>() {
+        return Err(DeserializeError::buffer_too_small(what));
+    }
+    Ok((read_u32(slice), size_of::<u32>()))
+}
+
+/// Read a u16 from the beginning of the given slice in native endian format.
+/// If the slice has fewer than 2 bytes, then this panics.
+///
+/// Marked as inline to speed up sparse searching which decodes integers from
+/// its automaton at search time.
+#[inline(always)]
+pub fn read_u16(slice: &[u8]) -> u16 {
+    let bytes: [u8; 2] = slice[..size_of::<u16>()].try_into().unwrap();
+    u16::from_ne_bytes(bytes)
+}
+
+/// Read a u32 from the beginning of the given slice in native endian format.
+/// If the slice has fewer than 4 bytes, then this panics.
+///
+/// Marked as inline to speed up sparse searching which decodes integers from
+/// its automaton at search time.
+#[inline(always)]
+pub fn read_u32(slice: &[u8]) -> u32 {
+    let bytes: [u8; 4] = slice[..size_of::<u32>()].try_into().unwrap();
+    u32::from_ne_bytes(bytes)
+}
+
+/// Read a u64 from the beginning of the given slice in native endian format.
+/// If the slice has fewer than 8 bytes, then this panics.
+///
+/// Marked as inline to speed up sparse searching which decodes integers from
+/// its automaton at search time.
+#[inline(always)]
+pub fn read_u64(slice: &[u8]) -> u64 {
+    let bytes: [u8; 8] = slice[..size_of::<u64>()].try_into().unwrap();
+    u64::from_ne_bytes(bytes)
+}
+
+/// Write a variable sized integer and return the total number of bytes
+/// written. If the slice was not big enough to contain the bytes, then this
+/// returns an error including the "what" description in it. This does no
+/// padding.
+///
+/// See: https://developers.google.com/protocol-buffers/docs/encoding#varints
+#[allow(dead_code)]
+pub fn write_varu64(
+    mut n: u64,
+    what: &'static str,
+    dst: &mut [u8],
+) -> Result<usize, SerializeError> {
+    let mut i = 0;
+    while n >= 0b1000_0000 {
+        if i >= dst.len() {
+            return Err(SerializeError::buffer_too_small(what));
+        }
+        dst[i] = (n as u8) | 0b1000_0000;
+        n >>= 7;
+        i += 1;
+    }
+    if i >= dst.len() {
+        return Err(SerializeError::buffer_too_small(what));
+    }
+    dst[i] = n as u8;
+    Ok(i + 1)
+}
+
+/// Returns the total number of bytes that would be writen to encode n as a
+/// variable sized integer.
+///
+/// See: https://developers.google.com/protocol-buffers/docs/encoding#varints
+#[allow(dead_code)]
+pub fn write_varu64_len(mut n: u64) -> usize {
+    let mut i = 0;
+    while n >= 0b1000_0000 {
+        n >>= 7;
+        i += 1;
+    }
+    i + 1
+}
+
+/// Like read_varu64, but attempts to cast the result to usize. If the integer
+/// cannot fit into a usize, then an error is returned.
+#[allow(dead_code)]
+pub fn read_varu64_as_usize(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(usize, usize), DeserializeError> {
+    let (n, nread) = read_varu64(slice, what)?;
+    let n = usize::try_from(n)
+        .map_err(|_| DeserializeError::invalid_usize(what))?;
+    Ok((n, nread))
+}
+
+/// Reads a variable sized integer from the beginning of slice, and returns the
+/// integer along with the total number of bytes read. If a valid variable
+/// sized integer could not be found, then an error is returned that includes
+/// the "what" description in it.
+///
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+#[allow(dead_code)]
+pub fn read_varu64(
+    slice: &[u8],
+    what: &'static str,
+) -> Result<(u64, usize), DeserializeError> {
+    let mut n: u64 = 0;
+    let mut shift: u32 = 0;
+    // The biggest possible value is u64::MAX, which needs all 64 bits which
+    // requires 10 bytes (because 7 * 9 < 64). We use a limit to avoid reading
+    // an unnecessary number of bytes.
+    let limit = cmp::min(slice.len(), 10);
+    for (i, &b) in slice[..limit].iter().enumerate() {
+        if b < 0b1000_0000 {
+            return match (b as u64).checked_shl(shift) {
+                None => Err(DeserializeError::invalid_varint(what)),
+                Some(b) => Ok((n | b, i + 1)),
+            };
+        }
+        match ((b as u64) & 0b0111_1111).checked_shl(shift) {
+            None => return Err(DeserializeError::invalid_varint(what)),
+            Some(b) => n |= b,
+        }
+        shift += 7;
+    }
+    Err(DeserializeError::invalid_varint(what))
+}
+
+/// Checks that the given slice has some minimal length. If it's smaller than
+/// the bound given, then a "buffer too small" error is returned with `what`
+/// describing what the buffer represents.
+pub fn check_slice_len<T>(
+    slice: &[T],
+    at_least_len: usize,
+    what: &'static str,
+) -> Result<(), DeserializeError> {
+    if slice.len() < at_least_len {
+        return Err(DeserializeError::buffer_too_small(what));
+    }
+    Ok(())
+}
+
+/// Multiply the given numbers, and on overflow, return an error that includes
+/// 'what' in the error message.
+///
+/// This is useful when doing arithmetic with untrusted data.
+pub fn mul(
+    a: usize,
+    b: usize,
+    what: &'static str,
+) -> Result<usize, DeserializeError> {
+    match a.checked_mul(b) {
+        Some(c) => Ok(c),
+        None => Err(DeserializeError::arithmetic_overflow(what)),
+    }
+}
+
+/// Add the given numbers, and on overflow, return an error that includes
+/// 'what' in the error message.
+///
+/// This is useful when doing arithmetic with untrusted data.
+pub fn add(
+    a: usize,
+    b: usize,
+    what: &'static str,
+) -> Result<usize, DeserializeError> {
+    match a.checked_add(b) {
+        Some(c) => Ok(c),
+        None => Err(DeserializeError::arithmetic_overflow(what)),
+    }
+}
+
+/// Shift `a` left by `b`, and on overflow, return an error that includes
+/// 'what' in the error message.
+///
+/// This is useful when doing arithmetic with untrusted data.
+pub fn shl(
+    a: usize,
+    b: usize,
+    what: &'static str,
+) -> Result<usize, DeserializeError> {
+    let amount = u32::try_from(b)
+        .map_err(|_| DeserializeError::arithmetic_overflow(what))?;
+    match a.checked_shl(amount) {
+        Some(c) => Ok(c),
+        None => Err(DeserializeError::arithmetic_overflow(what)),
+    }
+}
+
+/// A simple trait for writing code generic over endianness.
+///
+/// This is similar to what byteorder provides, but we only need a very small
+/// subset.
+pub trait Endian {
+    /// Writes a u16 to the given destination buffer in a particular
+    /// endianness. If the destination buffer has a length smaller than 2, then
+    /// this panics.
+    fn write_u16(n: u16, dst: &mut [u8]);
+
+    /// Writes a u32 to the given destination buffer in a particular
+    /// endianness. If the destination buffer has a length smaller than 4, then
+    /// this panics.
+    fn write_u32(n: u32, dst: &mut [u8]);
+
+    /// Writes a u64 to the given destination buffer in a particular
+    /// endianness. If the destination buffer has a length smaller than 8, then
+    /// this panics.
+    fn write_u64(n: u64, dst: &mut [u8]);
+}
+
+/// Little endian writing.
+pub enum LE {}
+/// Big endian writing.
+pub enum BE {}
+
+#[cfg(target_endian = "little")]
+pub type NE = LE;
+#[cfg(target_endian = "big")]
+pub type NE = BE;
+
+impl Endian for LE {
+    fn write_u16(n: u16, dst: &mut [u8]) {
+        dst[..2].copy_from_slice(&n.to_le_bytes());
+    }
+
+    fn write_u32(n: u32, dst: &mut [u8]) {
+        dst[..4].copy_from_slice(&n.to_le_bytes());
+    }
+
+    fn write_u64(n: u64, dst: &mut [u8]) {
+        dst[..8].copy_from_slice(&n.to_le_bytes());
+    }
+}
+
+impl Endian for BE {
+    fn write_u16(n: u16, dst: &mut [u8]) {
+        dst[..2].copy_from_slice(&n.to_be_bytes());
+    }
+
+    fn write_u32(n: u32, dst: &mut [u8]) {
+        dst[..4].copy_from_slice(&n.to_be_bytes());
+    }
+
+    fn write_u64(n: u64, dst: &mut [u8]) {
+        dst[..8].copy_from_slice(&n.to_be_bytes());
+    }
+}
+
+/// Returns the number of additional bytes required to add to the given length
+/// in order to make the total length a multiple of 4. The return value is
+/// always less than 4.
+pub fn padding_len(non_padding_len: usize) -> usize {
+    (4 - (non_padding_len & 0b11)) & 0b11
+}
+
+#[cfg(all(test, feature = "alloc"))]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn labels() {
+        let mut buf = [0; 1024];
+
+        let nwrite = write_label("fooba", &mut buf).unwrap();
+        assert_eq!(nwrite, 8);
+        assert_eq!(&buf[..nwrite], b"fooba\x00\x00\x00");
+
+        let nread = read_label(&buf, "fooba").unwrap();
+        assert_eq!(nread, 8);
+    }
+
+    #[test]
+    #[should_panic]
+    fn bad_label_interior_nul() {
+        // interior NULs are not allowed
+        write_label("foo\x00bar", &mut [0; 1024]).unwrap();
+    }
+
+    #[test]
+    fn bad_label_almost_too_long() {
+        // ok
+        write_label(&"z".repeat(255), &mut [0; 1024]).unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn bad_label_too_long() {
+        // labels longer than 255 bytes are banned
+        write_label(&"z".repeat(256), &mut [0; 1024]).unwrap();
+    }
+
+    #[test]
+    fn padding() {
+        assert_eq!(0, padding_len(8));
+        assert_eq!(3, padding_len(9));
+        assert_eq!(2, padding_len(10));
+        assert_eq!(1, padding_len(11));
+        assert_eq!(0, padding_len(12));
+        assert_eq!(3, padding_len(13));
+        assert_eq!(2, padding_len(14));
+        assert_eq!(1, padding_len(15));
+        assert_eq!(0, padding_len(16));
+    }
+}
diff --git a/vendor/regex-automata/src/util/determinize/mod.rs b/vendor/regex-automata/src/util/determinize/mod.rs
new file mode 100644
index 000000000..b384de8e1
--- /dev/null
+++ b/vendor/regex-automata/src/util/determinize/mod.rs
@@ -0,0 +1,493 @@
+/*!
+This module contains types and routines for implementing determinization.
+
+In this crate, there are at least two places where we implement
+determinization: fully ahead-of-time compiled DFAs in the `dfa` module and
+lazily compiled DFAs in the `hybrid` module. The stuff in this module
+corresponds to the things that are in common between these implementations.
+
+There are three broad things that our implementations of determinization have
+in common, as defined by this module:
+
+* The classification of start states. That is, whether we're dealing with
+word boundaries, line boundaries, etc., is all the same. This also includes
+the look-behind assertions that are satisfied by each starting state
+classification.
+
+* The representation of DFA states as sets of NFA states, including
+convenience types for building these DFA states that are amenable to reusing
+allocations.
+
+* Routines for the "classical" parts of determinization: computing the
+epsilon closure, tracking match states (with corresponding pattern IDs, since
+we support multi-pattern finite automata) and, of course, computing the
+transition function between states for units of input.
+
+I did consider a couple of alternatives to this particular form of code reuse:
+
+1. Don't do any code reuse. The problem here is that we *really* want both
+forms of determinization to do exactly identical things when it comes to
+their handling of NFA states. While our tests generally ensure this, the code
+is tricky and large enough where not reusing code is a pretty big bummer.
+
+2. Implement all of determinization once and make it generic over fully
+compiled DFAs and lazily compiled DFAs. While I didn't actually try this
+approach, my instinct is that it would be more complex than is needed here.
+And the interface required would be pretty hairy. Instead, I think splitting
+it into logical sub-components works better.
+*/
+
+use alloc::vec::Vec;
+
+pub(crate) use self::state::{
+    State, StateBuilderEmpty, StateBuilderMatches, StateBuilderNFA,
+};
+
+use crate::{
+    nfa::thompson::{self, Look, LookSet},
+    util::{
+        alphabet,
+        id::StateID,
+        matchtypes::MatchKind,
+        sparse_set::{SparseSet, SparseSets},
+        start::Start,
+    },
+};
+
+mod state;
+
+/// Compute the set of all eachable NFA states, including the full epsilon
+/// closure, from a DFA state for a single unit of input. The set of reachable
+/// states is returned as a `StateBuilderNFA`. The `StateBuilderNFA` returned
+/// also includes any look-behind assertions satisfied by `unit`, in addition
+/// to whether it is a match state. For multi-pattern DFAs, the builder will
+/// also include the pattern IDs that match (in the order seen).
+///
+/// `nfa` must be able to resolve any NFA state in `state` and any NFA state
+/// reachable via the epsilon closure of any NFA state in `state`. `sparses`
+/// must have capacity equivalent to `nfa.len()`.
+///
+/// `match_kind` should correspond to the match semantics implemented by the
+/// DFA being built. Generally speaking, for leftmost-first match semantics,
+/// states that appear after the first NFA match state will not be included in
+/// the `StateBuilderNFA` returned since they are impossible to visit.
+///
+/// `sparses` is used as scratch space for NFA traversal. Other than their
+/// capacity requirements (detailed above), there are no requirements on what's
+/// contained within them (if anything). Similarly, what's inside of them once
+/// this routine returns is unspecified.
+///
+/// `stack` must have length 0. It is used as scratch space for depth first
+/// traversal. After returning, it is guaranteed that `stack` will have length
+/// 0.
+///
+/// `state` corresponds to the current DFA state on which one wants to compute
+/// the transition for the input `unit`.
+///
+/// `empty_builder` corresponds to the builder allocation to use to produce a
+/// complete `StateBuilderNFA` state. If the state is not needed (or is already
+/// cached), then it can be cleared and reused without needing to create a new
+/// `State`. The `StateBuilderNFA` state returned is final and ready to be
+/// turned into a `State` if necessary.
+pub(crate) fn next(
+    nfa: &thompson::NFA,
+    match_kind: MatchKind,
+    sparses: &mut SparseSets,
+    stack: &mut Vec<StateID>,
+    state: &State,
+    unit: alphabet::Unit,
+    empty_builder: StateBuilderEmpty,
+) -> StateBuilderNFA {
+    sparses.clear();
+
+    // Put the NFA state IDs into a sparse set in case we need to
+    // re-compute their epsilon closure.
+    //
+    // Doing this state shuffling is technically not necessary unless some
+    // kind of look-around is used in the DFA. Some ad hoc experiments
+    // suggested that avoiding this didn't lead to much of an improvement,
+    // but perhaps more rigorous experimentation should be done. And in
+    // particular, avoiding this check requires some light refactoring of
+    // the code below.
+    state.iter_nfa_state_ids(|nfa_id| {
+        sparses.set1.insert(nfa_id);
+    });
+
+    // Compute look-ahead assertions originating from the current state.
+    // Based on the input unit we're transitioning over, some additional
+    // set of assertions may be true. Thus, we re-compute this state's
+    // epsilon closure (but only if necessary).
+    if !state.look_need().is_empty() {
+        // Add look-ahead assertions that are now true based on the current
+        // input unit.
+        let mut look_have = state.look_have().clone();
+        match unit.as_u8() {
+            Some(b'\n') => {
+                look_have.insert(Look::EndLine);
+            }
+            Some(_) => {}
+            None => {
+                look_have.insert(Look::EndText);
+                look_have.insert(Look::EndLine);
+            }
+        }
+        if state.is_from_word() == unit.is_word_byte() {
+            look_have.insert(Look::WordBoundaryUnicodeNegate);
+            look_have.insert(Look::WordBoundaryAsciiNegate);
+        } else {
+            look_have.insert(Look::WordBoundaryUnicode);
+            look_have.insert(Look::WordBoundaryAscii);
+        }
+        // If we have new assertions satisfied that are among the set of
+        // assertions that exist in this state (that is, just because
+        // we added an EndLine assertion above doesn't mean there is an
+        // EndLine conditional epsilon transition in this state), then we
+        // re-compute this state's epsilon closure using the updated set of
+        // assertions.
+        if !look_have
+            .subtract(state.look_have())
+            .intersect(state.look_need())
+            .is_empty()
+        {
+            for nfa_id in &sparses.set1 {
+                epsilon_closure(
+                    nfa,
+                    nfa_id,
+                    look_have,
+                    stack,
+                    &mut sparses.set2,
+                );
+            }
+            sparses.swap();
+            sparses.set2.clear();
+        }
+    }
+
+    // Convert our empty builder into one that can record assertions and match
+    // pattern IDs.
+    let mut builder = empty_builder.into_matches();
+    // Set whether the StartLine look-behind assertion is true for this
+    // transition or not. The look-behind assertion for ASCII word boundaries
+    // is handled below.
+    if nfa.has_any_anchor() {
+        if unit.as_u8().map_or(false, |b| b == b'\n') {
+            // Why only handle StartLine here and not StartText? That's
+            // because StartText can only impact the starting state, which
+            // is speical cased in start state handling.
+            builder.look_have().insert(Look::StartLine);
+        }
+    }
+    for nfa_id in &sparses.set1 {
+        match *nfa.state(nfa_id) {
+            thompson::State::Union { .. }
+            | thompson::State::Fail
+            | thompson::State::Look { .. }
+            | thompson::State::Capture { .. } => {}
+            thompson::State::Match { id } => {
+                // Notice here that we are calling the NEW state a match
+                // state if the OLD state we are transitioning from
+                // contains an NFA match state. This is precisely how we
+                // delay all matches by one byte and also what therefore
+                // guarantees that starting states cannot be match states.
+                //
+                // If we didn't delay matches by one byte, then whether
+                // a DFA is a matching state or not would be determined
+                // by whether one of its own constituent NFA states
+                // was a match state. (And that would be done in
+                // 'add_nfa_states'.)
+                //
+                // Also, 'add_match_pattern_id' requires that callers never
+                // pass duplicative pattern IDs. We do in fact uphold that
+                // guarantee here, but it's subtle. In particular, a Thompson
+                // NFA guarantees that each pattern has exactly one match
+                // state. Moreover, since we're iterating over the NFA state
+                // IDs in a set, we are guarateed not to have any duplicative
+                // match states. Thus, it is impossible to add the same pattern
+                // ID more than once.
+                builder.add_match_pattern_id(id);
+                if !match_kind.continue_past_first_match() {
+                    break;
+                }
+            }
+            thompson::State::Range { range: ref r } => {
+                if r.matches_unit(unit) {
+                    epsilon_closure(
+                        nfa,
+                        r.next,
+                        *builder.look_have(),
+                        stack,
+                        &mut sparses.set2,
+                    );
+                }
+            }
+            thompson::State::Sparse(ref sparse) => {
+                if let Some(next) = sparse.matches_unit(unit) {
+                    epsilon_closure(
+                        nfa,
+                        next,
+                        *builder.look_have(),
+                        stack,
+                        &mut sparses.set2,
+                    );
+                }
+            }
+        }
+    }
+    // We only set the word byte if there's a word boundary look-around
+    // anywhere in this regex. Otherwise, there's no point in bloating the
+    // number of states if we don't have one.
+    //
+    // We also only set it when the state has a non-zero number of NFA states.
+    // Otherwise, we could wind up with states that *should* be DEAD states
+    // but are otherwise distinct from DEAD states because of this look-behind
+    // assertion being set. While this can't technically impact correctness *in
+    // theory*, it can create pathological DFAs that consume input until EOI or
+    // a quit byte is seen. Consuming until EOI isn't a correctness problem,
+    // but a (serious) perf problem. Hitting a quit byte, however, could be a
+    // correctness problem since it could cause search routines to report an
+    // error instead of a detected match once the quit state is entered. (The
+    // search routine could be made to be a bit smarter by reporting a match
+    // if one was detected once it enters a quit state (and indeed, the search
+    // routines in this crate do just that), but it seems better to prevent
+    // these things by construction if possible.)
+    if nfa.has_word_boundary()
+        && unit.is_word_byte()
+        && !sparses.set2.is_empty()
+    {
+        builder.set_is_from_word();
+    }
+    let mut builder_nfa = builder.into_nfa();
+    add_nfa_states(nfa, &sparses.set2, &mut builder_nfa);
+    builder_nfa
+}
+
+/// Compute the epsilon closure for the given NFA state. The epsilon closure
+/// consists of all NFA state IDs, including `start_nfa_id`, that can be
+/// reached from `start_nfa_id` without consuming any input. These state IDs
+/// are written to `set` in the order they are visited, but only if they are
+/// not already in `set`. `start_nfa_id` must be a valid state ID for the NFA
+/// given.
+///
+/// `look_have` consists of the satisfied assertions at the current
+/// position. For conditional look-around epsilon transitions, these are
+/// only followed if they are satisfied by `look_have`.
+///
+/// `stack` must have length 0. It is used as scratch space for depth first
+/// traversal. After returning, it is guaranteed that `stack` will have length
+/// 0.
+pub(crate) fn epsilon_closure(
+    nfa: &thompson::NFA,
+    start_nfa_id: StateID,
+    look_have: LookSet,
+    stack: &mut Vec<StateID>,
+    set: &mut SparseSet,
+) {
+    assert!(stack.is_empty());
+    // If this isn't an epsilon state, then the epsilon closure is always just
+    // itself, so there's no need to spin up the machinery below to handle it.
+    if !nfa.state(start_nfa_id).is_epsilon() {
+        set.insert(start_nfa_id);
+        return;
+    }
+
+    stack.push(start_nfa_id);
+    while let Some(mut id) = stack.pop() {
+        // In many cases, we can avoid stack operations when an NFA state only
+        // adds one new state to visit. In that case, we just set our ID to
+        // that state and mush on. We only use the stack when an NFA state
+        // introduces multiple new states to visit.
+        loop {
+            // Insert this NFA state, and if it's already in the set and thus
+            // already visited, then we can move on to the next one.
+            if !set.insert(id) {
+                break;
+            }
+            match *nfa.state(id) {
+                thompson::State::Range { .. }
+                | thompson::State::Sparse { .. }
+                | thompson::State::Fail
+                | thompson::State::Match { .. } => break,
+                thompson::State::Look { look, next } => {
+                    if !look_have.contains(look) {
+                        break;
+                    }
+                    id = next;
+                }
+                thompson::State::Union { ref alternates } => {
+                    id = match alternates.get(0) {
+                        None => break,
+                        Some(&id) => id,
+                    };
+                    // We need to process our alternates in order to preserve
+                    // match preferences, so put the earliest alternates closer
+                    // to the top of the stack.
+                    stack.extend(alternates[1..].iter().rev());
+                }
+                thompson::State::Capture { next, .. } => {
+                    id = next;
+                }
+            }
+        }
+    }
+}
+
+/// Add the NFA state IDs in the given `set` to the given DFA builder state.
+/// The order in which states are added corresponds to the order in which they
+/// were added to `set`.
+///
+/// The DFA builder state given should already have its complete set of match
+/// pattern IDs added (if any) and any look-behind assertions (StartLine,
+/// StartText and whether this state is being generated for a transition over a
+/// word byte when applicable) that are true immediately prior to transitioning
+/// into this state (via `builder.look_have()`). The match pattern IDs should
+/// correspond to matches that occured on the previous transition, since all
+/// matches are delayed by one byte. The things that should _not_ be set are
+/// look-ahead assertions (EndLine, EndText and whether the next byte is a
+/// word byte or not). The builder state should also not have anything in
+/// `look_need` set, as this routine will compute that for you.
+///
+/// The given NFA should be able to resolve all identifiers in `set` to a
+/// particular NFA state. Additionally, `set` must have capacity equivalent
+/// to `nfa.len()`.
+pub(crate) fn add_nfa_states(
+    nfa: &thompson::NFA,
+    set: &SparseSet,
+    builder: &mut StateBuilderNFA,
+) {
+    for nfa_id in set {
+        match *nfa.state(nfa_id) {
+            thompson::State::Range { .. } => {
+                builder.add_nfa_state_id(nfa_id);
+            }
+            thompson::State::Sparse { .. } => {
+                builder.add_nfa_state_id(nfa_id);
+            }
+            thompson::State::Look { look, .. } => {
+                builder.add_nfa_state_id(nfa_id);
+                builder.look_need().insert(look);
+            }
+            thompson::State::Union { .. }
+            | thompson::State::Capture { .. } => {
+                // Pure epsilon transitions don't need to be tracked
+                // as part of the DFA state. Tracking them is actually
+                // superfluous; they won't cause any harm other than making
+                // determinization slower.
+                //
+                // Why aren't these needed? Well, in an NFA, epsilon
+                // transitions are really just jumping points to other
+                // states. So once you hit an epsilon transition, the same
+                // set of resulting states always appears. Therefore,
+                // putting them in a DFA's set of ordered NFA states is
+                // strictly redundant.
+                //
+                // Look-around states are also epsilon transitions, but
+                // they are *conditional*. So their presence could be
+                // discriminatory, and thus, they are tracked above.
+                //
+                // But wait... why are epsilon states in our `set` in the
+                // first place? Why not just leave them out? They're in
+                // our `set` because it was generated by computing an
+                // epsilon closure, and we want to keep track of all states
+                // we visited to avoid re-visiting them. In exchange, we
+                // have to do this second iteration over our collected
+                // states to finalize our DFA state.
+                //
+                // Note that this optimization requires that we re-compute
+                // the epsilon closure to account for look-ahead in 'next'
+                // *only when necessary*. Namely, only when the set of
+                // look-around assertions changes and only when those
+                // changes are within the set of assertions that are
+                // needed in order to step through the closure correctly.
+                // Otherwise, if we re-do the epsilon closure needlessly,
+                // it could change based on the fact that we are omitting
+                // epsilon states here.
+            }
+            thompson::State::Fail => {
+                break;
+            }
+            thompson::State::Match { .. } => {
+                // Normally, the NFA match state doesn't actually need to
+                // be inside the DFA state. But since we delay matches by
+                // one byte, the matching DFA state corresponds to states
+                // that transition from the one we're building here. And
+                // the way we detect those cases is by looking for an NFA
+                // match state. See 'next' for how this is handled.
+                builder.add_nfa_state_id(nfa_id);
+            }
+        }
+    }
+    // If we know this state contains no look-around assertions, then
+    // there's no reason to track which look-around assertions were
+    // satisfied when this state was created.
+    if builder.look_need().is_empty() {
+        builder.look_have().clear();
+    }
+}
+
+/// Sets the appropriate look-behind assertions on the given state based on
+/// this starting configuration.
+pub(crate) fn set_lookbehind_from_start(
+    start: &Start,
+    builder: &mut StateBuilderMatches,
+) {
+    match *start {
+        Start::NonWordByte => {}
+        Start::WordByte => {
+            builder.set_is_from_word();
+        }
+        Start::Text => {
+            builder.look_have().insert(Look::StartText);
+            builder.look_have().insert(Look::StartLine);
+        }
+        Start::Line => {
+            builder.look_have().insert(Look::StartLine);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::Start;
+
+    #[test]
+    #[should_panic]
+    fn start_fwd_bad_range() {
+        Start::from_position_fwd(&[], 0, 1);
+    }
+
+    #[test]
+    #[should_panic]
+    fn start_rev_bad_range() {
+        Start::from_position_rev(&[], 0, 1);
+    }
+
+    #[test]
+    fn start_fwd() {
+        let f = Start::from_position_fwd;
+
+        assert_eq!(Start::Text, f(&[], 0, 0));
+        assert_eq!(Start::Text, f(b"abc", 0, 3));
+        assert_eq!(Start::Text, f(b"\nabc", 0, 3));
+
+        assert_eq!(Start::Line, f(b"\nabc", 1, 3));
+
+        assert_eq!(Start::WordByte, f(b"abc", 1, 3));
+
+        assert_eq!(Start::NonWordByte, f(b" abc", 1, 3));
+    }
+
+    #[test]
+    fn start_rev() {
+        let f = Start::from_position_rev;
+
+        assert_eq!(Start::Text, f(&[], 0, 0));
+        assert_eq!(Start::Text, f(b"abc", 0, 3));
+        assert_eq!(Start::Text, f(b"abc\n", 0, 4));
+
+        assert_eq!(Start::Line, f(b"abc\nz", 0, 3));
+
+        assert_eq!(Start::WordByte, f(b"abc", 0, 2));
+
+        assert_eq!(Start::NonWordByte, f(b"abc ", 0, 3));
+    }
+}
diff --git a/vendor/regex-automata/src/util/determinize/state.rs b/vendor/regex-automata/src/util/determinize/state.rs
new file mode 100644
index 000000000..567e600d6
--- /dev/null
+++ b/vendor/regex-automata/src/util/determinize/state.rs
@@ -0,0 +1,873 @@
+/*!
+This module defines a DFA state representation and builders for constructing
+DFA states.
+
+This representation is specifically for use in implementations of NFA-to-DFA
+conversion via powerset construction. (Also called "determinization" in this
+crate.)
+
+The term "DFA state" is somewhat overloaded in this crate. In some cases, it
+refers to the set of transitions over an alphabet for a particular state. In
+other cases, it refers to a set of NFA states. The former is really about the
+final representation of a state in a DFA's transition table, where as the
+latter---what this module is focusedon---is closer to an intermediate form that
+is used to help eventually build the transition table.
+
+This module exports four types. All four types represent the same idea: an
+ordered set of NFA states. This ordered set represents the epsilon closure of a
+particular NFA state, where the "epsilon closure" is the set of NFA states that
+can be transitioned to without consuming any input. i.e., Follow all of theNFA
+state's epsilon transitions. In addition, this implementation of DFA states
+cares about two other things: the ordered set of pattern IDs corresponding
+to the patterns that match if the state is a match state, and the set of
+look-behind assertions that were true when the state was created.
+
+The first, `State`, is a frozen representation of a state that cannot be
+modified. It may be cheaply cloned without copying the state itself and can be
+accessed safely from multiple threads simultaneously. This type is useful for
+when one knows that the DFA state being constructed is distinct from any other
+previously constructed states. Namely, powerset construction, in practice,
+requires one to keep a cache of previously created DFA states. Otherwise,
+the number of DFA states created in memory balloons to an impractically
+large number. For this reason, equivalent states should endeavor to have an
+equivalent byte-level representation. (In general, "equivalency" here means,
+"equivalent assertions, pattern IDs and NFA state IDs." We do not require that
+full DFA minimization be implemented here. This form of equivalency is only
+surface deep and is more-or-less a practical necessity.)
+
+The other three types represent different phases in the construction of a
+DFA state. Internally, these three types (and `State`) all use the same
+byte-oriented representation. That means one can use any of the builder types
+to check whether the state it represents already exists or not. If it does,
+then there is no need to freeze it into a `State` (which requires an alloc and
+a copy). Here are the three types described succinctly:
+
+* `StateBuilderEmpty` represents a state with no pattern IDs, no assertions
+and no NFA states. Creating a `StateBuilderEmpty` performs no allocs. A
+`StateBuilderEmpty` can only be used to query its underlying memory capacity,
+or to convert into a builder for recording pattern IDs and/or assertions.
+* `StateBuilderMatches` represents a state with zero or more pattern IDs, zero
+or more satisfied assertions and zero NFA state IDs. A `StateBuilderMatches`
+can only be used for adding pattern IDs and recording assertions.
+* `StateBuilderNFA` represents a state with zero or more pattern IDs, zero or
+more satisfied assertions and zero or more NFA state IDs. A `StateBuilderNFA`
+can only be used for adding NFA state IDs and recording some assertions.
+
+The expected flow here is to use the above builders to construct a candidate
+DFA state to check if it already exists. If it does, then there's no need to
+freeze it into a `State`. It it doesn't exist, then `StateBuilderNFA::to_state`
+can be called to freeze the builder into an immutable `State`. In either
+case, `clear` should be called on the builder to turn it back into a
+`StateBuilderEmpty` that reuses the underyling memory.
+
+The main purpose for splitting the builder into these distinct types is to
+make it impossible to do things like adding a pattern ID after adding an NFA
+state ID. Namely, this makes it simpler to use a space-and-time efficient
+binary representation for the state. (The format is documented on the `Repr`
+type below.) If we just used one type for everything, it would be possible for
+callers to use an incorrect interleaving of calls and thus result in a corrupt
+representation. I chose to use more type machinery to make this impossible to
+do because 1) determinization is itself pretty complex and it wouldn't be too
+hard to foul this up and 2) there isn't too much machinery involve and it's
+well contained.
+
+As an optimization, sometimes states won't have certain things set. For
+example, if the underlying NFA has no word boundary assertions, then there is
+no reason to set a state's look-behind assertion as to whether it was generated
+from a word byte or not. Similarly, if a state has no NFA states corresponding
+to look-around assertions, then there is no reason to set `look_have` to a
+non-empty set. Finally, callers usually omit unconditional epsilon transitions
+when adding NFA state IDs since they aren't discriminatory.
+
+Finally, the binary representation used by these states is, thankfully, not
+serialized anywhere. So any kind of change can be made with reckless abandon,
+as long as everything in this module agrees.
+*/
+
+use core::{convert::TryFrom, mem};
+
+use alloc::{sync::Arc, vec::Vec};
+
+use crate::{
+    nfa::thompson::LookSet,
+    util::{
+        bytes::{self, Endian},
+        id::{PatternID, StateID},
+    },
+};
+
+/// A DFA state that, at its core, is represented by an ordered set of NFA
+/// states.
+///
+/// This type is intended to be used only in NFA-to-DFA conversion via powerset
+/// construction.
+///
+/// It may be cheaply cloned and accessed safely from mulitple threads
+/// simultaneously.
+#[derive(Clone, Eq, Hash, PartialEq, PartialOrd, Ord)]
+pub(crate) struct State(Arc<[u8]>);
+
+/// This Borrow impl permits us to lookup any state in a map by its byte
+/// representation. This is particularly convenient when one has a StateBuilder
+/// and we want to see if a correspondingly equivalent state already exists. If
+/// one does exist, then we can reuse the allocation required by StateBuilder
+/// without having to convert it into a State first.
+impl core::borrow::Borrow<[u8]> for State {
+    fn borrow(&self) -> &[u8] {
+        &*self.0
+    }
+}
+
+impl core::fmt::Debug for State {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_tuple("State").field(&self.repr()).finish()
+    }
+}
+
+/// For docs on these routines, see the internal Repr and ReprVec types below.
+impl State {
+    pub(crate) fn dead() -> State {
+        StateBuilderEmpty::new().into_matches().into_nfa().to_state()
+    }
+
+    pub(crate) fn is_match(&self) -> bool {
+        self.repr().is_match()
+    }
+
+    pub(crate) fn is_from_word(&self) -> bool {
+        self.repr().is_from_word()
+    }
+
+    pub(crate) fn look_have(&self) -> LookSet {
+        self.repr().look_have()
+    }
+
+    pub(crate) fn look_need(&self) -> LookSet {
+        self.repr().look_need()
+    }
+
+    pub(crate) fn match_count(&self) -> usize {
+        self.repr().match_count()
+    }
+
+    pub(crate) fn match_pattern(&self, index: usize) -> PatternID {
+        self.repr().match_pattern(index)
+    }
+
+    pub(crate) fn match_pattern_ids(&self) -> Option<Vec<PatternID>> {
+        self.repr().match_pattern_ids()
+    }
+
+    pub(crate) fn iter_match_pattern_ids<F: FnMut(PatternID)>(&self, f: F) {
+        self.repr().iter_match_pattern_ids(f)
+    }
+
+    pub(crate) fn iter_nfa_state_ids<F: FnMut(StateID)>(&self, f: F) {
+        self.repr().iter_nfa_state_ids(f)
+    }
+
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.0.len()
+    }
+
+    fn repr(&self) -> Repr<'_> {
+        Repr(&*self.0)
+    }
+}
+
+/// A state builder that represents an empty state.
+///
+/// This is a useful "initial condition" for state construction. It has no
+/// NFA state IDs, no assertions set and no pattern IDs. No allocations are
+/// made when new() is called. Its main use is for being converted into a
+/// builder that can capture assertions and pattern IDs.
+#[derive(Clone, Debug)]
+pub(crate) struct StateBuilderEmpty(Vec<u8>);
+
+/// For docs on these routines, see the internal Repr and ReprVec types below.
+impl StateBuilderEmpty {
+    pub(crate) fn new() -> StateBuilderEmpty {
+        StateBuilderEmpty(alloc::vec![])
+    }
+
+    pub(crate) fn into_matches(mut self) -> StateBuilderMatches {
+        self.0.extend_from_slice(&[0, 0, 0]);
+        StateBuilderMatches(self.0)
+    }
+
+    fn clear(&mut self) {
+        self.0.clear();
+    }
+
+    pub(crate) fn capacity(&self) -> usize {
+        self.0.capacity()
+    }
+}
+
+/// A state builder that collects assertions and pattern IDs.
+///
+/// When collecting pattern IDs is finished, this can be converted into a
+/// builder that collects NFA state IDs.
+#[derive(Clone)]
+pub(crate) struct StateBuilderMatches(Vec<u8>);
+
+impl core::fmt::Debug for StateBuilderMatches {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_tuple("StateBuilderMatches").field(&self.repr()).finish()
+    }
+}
+
+/// For docs on these routines, see the internal Repr and ReprVec types below.
+impl StateBuilderMatches {
+    pub(crate) fn into_nfa(mut self) -> StateBuilderNFA {
+        self.repr_vec().close_match_pattern_ids();
+        StateBuilderNFA { repr: self.0, prev_nfa_state_id: StateID::ZERO }
+    }
+
+    pub(crate) fn clear(self) -> StateBuilderEmpty {
+        let mut builder = StateBuilderEmpty(self.0);
+        builder.clear();
+        builder
+    }
+
+    pub(crate) fn is_match(&self) -> bool {
+        self.repr().is_match()
+    }
+
+    pub(crate) fn is_from_word(&self) -> bool {
+        self.repr().is_from_word()
+    }
+
+    pub(crate) fn set_is_from_word(&mut self) {
+        self.repr_vec().set_is_from_word()
+    }
+
+    pub(crate) fn look_have(&mut self) -> &mut LookSet {
+        LookSet::from_repr_mut(&mut self.0[1])
+    }
+
+    pub(crate) fn look_need(&mut self) -> &mut LookSet {
+        LookSet::from_repr_mut(&mut self.0[2])
+    }
+
+    pub(crate) fn add_match_pattern_id(&mut self, pid: PatternID) {
+        self.repr_vec().add_match_pattern_id(pid)
+    }
+
+    fn repr(&self) -> Repr<'_> {
+        Repr(&self.0)
+    }
+
+    fn repr_vec(&mut self) -> ReprVec<'_> {
+        ReprVec(&mut self.0)
+    }
+}
+
+/// A state builder that collects some assertions and NFA state IDs.
+///
+/// When collecting NFA state IDs is finished, this can be used to build a
+/// `State` if necessary.
+///
+/// When dont with building a state (regardless of whether it got kept or not),
+/// it's usually a good idea to call `clear` to get an empty builder back so
+/// that it can be reused to build the next state.
+#[derive(Clone)]
+pub(crate) struct StateBuilderNFA {
+    repr: Vec<u8>,
+    prev_nfa_state_id: StateID,
+}
+
+impl core::fmt::Debug for StateBuilderNFA {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_tuple("StateBuilderNFA").field(&self.repr()).finish()
+    }
+}
+
+/// For docs on these routines, see the internal Repr and ReprVec types below.
+impl StateBuilderNFA {
+    pub(crate) fn to_state(&self) -> State {
+        State(Arc::from(&*self.repr))
+    }
+
+    pub(crate) fn clear(self) -> StateBuilderEmpty {
+        let mut builder = StateBuilderEmpty(self.repr);
+        builder.clear();
+        builder
+    }
+
+    pub(crate) fn is_match(&self) -> bool {
+        self.repr().is_match()
+    }
+
+    pub(crate) fn is_from_word(&self) -> bool {
+        self.repr().is_from_word()
+    }
+
+    pub(crate) fn look_have(&mut self) -> &mut LookSet {
+        LookSet::from_repr_mut(&mut self.repr[1])
+    }
+
+    pub(crate) fn look_need(&mut self) -> &mut LookSet {
+        LookSet::from_repr_mut(&mut self.repr[2])
+    }
+
+    pub(crate) fn add_nfa_state_id(&mut self, sid: StateID) {
+        ReprVec(&mut self.repr)
+            .add_nfa_state_id(&mut self.prev_nfa_state_id, sid)
+    }
+
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.repr.len()
+    }
+
+    pub(crate) fn as_bytes(&self) -> &[u8] {
+        &self.repr
+    }
+
+    fn repr(&self) -> Repr<'_> {
+        Repr(&self.repr)
+    }
+
+    fn repr_vec(&mut self) -> ReprVec<'_> {
+        ReprVec(&mut self.repr)
+    }
+}
+
+/// Repr is a read-only view into the representation of a DFA state.
+///
+/// Primarily, a Repr is how we achieve DRY: we implement decoding the format
+/// in one place, and then use a Repr to implement the various methods on the
+/// public state types.
+///
+/// The format is as follows:
+///
+/// The first three bytes correspond to bitsets.
+///
+/// Byte 0 is a bitset corresponding to miscellaneous flags associated with the
+/// state. Bit 0 is set to 1 if the state is a match state. Bit 1 is set to 1
+/// if the state has pattern IDs explicitly written to it. (This is a flag that
+/// is not meant to be set by determinization, but rather, is used as part of
+/// an internal space-saving optimization.) Bit 2 is set to 1 if the state was
+/// generated by a transition over a "word" byte. (Callers may not always set
+/// this. For example, if the NFA has no word boundary assertion, then needing
+/// to track whether a state came from a word byte or not is superfluous and
+/// wasteful.)
+///
+/// Byte 1 corresponds to the look-behind assertions that were satisfied by
+/// the transition that created this state. This generally only includes the
+/// StartLine and StartText assertions. (Look-ahead assertions are not tracked
+/// as part of states. Instead, these are applied by re-computing the epsilon
+/// closure of a state when computing the transition function. See `next` in
+/// the parent module.)
+///
+/// Byte 2 corresponds to the set of look-around assertions (including both
+/// look-behind and look-ahead) that appear somewhere in this state's set of
+/// NFA state IDs. This is used to determine whether this state's epsilon
+/// closure should be re-computed when computing the transition function.
+/// Namely, look-around assertions are "just" conditional epsilon transitions,
+/// so if there are new assertions available when computing the transition
+/// function, we should only re-compute the epsilon closure if those new
+/// assertions are relevant to this particular state.
+///
+/// Bytes 3..7 correspond to a 32-bit native-endian encoded integer
+/// corresponding to the number of patterns encoded in this state. If the state
+/// is not a match state (byte 0 bit 0 is 0) or if it's only pattern ID is
+/// PatternID::ZERO, then no integer is encoded at this position. Instead, byte
+/// offset 3 is the position at which the first NFA state ID is encoded.
+///
+/// For a match state with at least one non-ZERO pattern ID, the next bytes
+/// correspond to a sequence of 32-bit native endian encoded integers that
+/// represent each pattern ID, in order, that this match state represents.
+///
+/// After the pattern IDs (if any), NFA state IDs are delta encoded as
+/// varints.[1] The first NFA state ID is encoded as itself, and each
+/// subsequent NFA state ID is encoded as the difference between itself and the
+/// previous NFA state ID.
+///
+/// [1] - https://developers.google.com/protocol-buffers/docs/encoding#varints
+struct Repr<'a>(&'a [u8]);
+
+impl<'a> Repr<'a> {
+    /// Returns true if and only if this is a match state.
+    ///
+    /// If callers have added pattern IDs to this state, then callers MUST set
+    /// this state as a match state explicitly. However, as a special case,
+    /// states that are marked as match states but with no pattern IDs, then
+    /// the state is treated as if it had a single pattern ID equivalent to
+    /// PatternID::ZERO.
+    fn is_match(&self) -> bool {
+        self.0[0] & (1 << 0) > 0
+    }
+
+    /// Returns true if and only if this state has had at least one pattern
+    /// ID added to it.
+    ///
+    /// This is an internal-only flag that permits the representation to save
+    /// space in the common case of an NFA with one pattern in it. In that
+    /// case, a match state can only ever have exactly one pattern ID:
+    /// PatternID::ZERO. So there's no need to represent it.
+    fn has_pattern_ids(&self) -> bool {
+        self.0[0] & (1 << 1) > 0
+    }
+
+    /// Returns true if and only if this state is marked as having been created
+    /// from a transition over a word byte. This is useful for checking whether
+    /// a word boundary assertion is true or not, which requires look-behind
+    /// (whether the current state came from a word byte or not) and look-ahead
+    /// (whether the transition byte is a word byte or not).
+    ///
+    /// Since states with this set are distinct from states that don't have
+    /// this set (even if they are otherwise equivalent), callers should not
+    /// set this assertion unless the underlying NFA has at least one word
+    /// boundary assertion somewhere. Otherwise, a superfluous number of states
+    /// may be created.
+    fn is_from_word(&self) -> bool {
+        self.0[0] & (1 << 2) > 0
+    }
+
+    /// The set of look-behind assertions that were true in the transition that
+    /// created this state.
+    ///
+    /// Generally, this should be empty if 'look_need' is empty, since there is
+    /// no reason to track which look-behind assertions are true if the state
+    /// has no conditional epsilon transitions.
+    ///
+    /// Satisfied look-ahead assertions are not tracked in states. Instead,
+    /// these are re-computed on demand via epsilon closure when computing the
+    /// transition function.
+    fn look_have(&self) -> LookSet {
+        LookSet::from_repr(self.0[1])
+    }
+
+    /// The set of look-around (both behind and ahead) assertions that appear
+    /// at least once in this state's set of NFA states.
+    ///
+    /// This is used to determine whether the epsilon closure needs to be
+    /// re-computed when computing the transition function. Namely, if the
+    /// state has no conditional epsilon transitions, then there is no need
+    /// to re-compute the epsilon closure.
+    fn look_need(&self) -> LookSet {
+        LookSet::from_repr(self.0[2])
+    }
+
+    /// Returns the total number of match pattern IDs in this state.
+    ///
+    /// If this state is not a match state, then this always returns 0.
+    fn match_count(&self) -> usize {
+        if !self.is_match() {
+            return 0;
+        } else if !self.has_pattern_ids() {
+            1
+        } else {
+            self.encoded_pattern_count()
+        }
+    }
+
+    /// Returns the pattern ID for this match state at the given index.
+    ///
+    /// If the given index is greater than or equal to `match_count()` for this
+    /// state, then this could panic or return incorrect results.
+    fn match_pattern(&self, index: usize) -> PatternID {
+        if !self.has_pattern_ids() {
+            PatternID::ZERO
+        } else {
+            let offset = 7 + index * PatternID::SIZE;
+            // This is OK since we only ever serialize valid PatternIDs to
+            // states.
+            bytes::read_pattern_id_unchecked(&self.0[offset..]).0
+        }
+    }
+
+    /// Returns a copy of all match pattern IDs in this state. If this state
+    /// is not a match state, then this returns None.
+    fn match_pattern_ids(&self) -> Option<Vec<PatternID>> {
+        if !self.is_match() {
+            return None;
+        }
+        let mut pids = alloc::vec![];
+        self.iter_match_pattern_ids(|pid| pids.push(pid));
+        Some(pids)
+    }
+
+    /// Calls the given function on every pattern ID in this state.
+    fn iter_match_pattern_ids<F: FnMut(PatternID)>(&self, mut f: F) {
+        if !self.is_match() {
+            return;
+        }
+        // As an optimization for a very common case, when this is a match
+        // state for an NFA with only one pattern, we don't actually write the
+        // pattern ID to the state representation. Instead, we know it must
+        // be there since it is the only possible choice.
+        if !self.has_pattern_ids() {
+            f(PatternID::ZERO);
+            return;
+        }
+        let mut pids = &self.0[7..self.pattern_offset_end()];
+        while !pids.is_empty() {
+            let pid = bytes::read_u32(pids);
+            pids = &pids[PatternID::SIZE..];
+            // This is OK since we only ever serialize valid PatternIDs to
+            // states. And since pattern IDs can never exceed a usize, the
+            // unwrap is OK.
+            f(PatternID::new_unchecked(usize::try_from(pid).unwrap()));
+        }
+    }
+
+    /// Calls the given function on every NFA state ID in this state.
+    fn iter_nfa_state_ids<F: FnMut(StateID)>(&self, mut f: F) {
+        let mut sids = &self.0[self.pattern_offset_end()..];
+        let mut prev = 0i32;
+        while !sids.is_empty() {
+            let (delta, nr) = read_vari32(sids);
+            sids = &sids[nr..];
+            let sid = prev + delta;
+            prev = sid;
+            // This is OK since we only ever serialize valid StateIDs to
+            // states. And since state IDs can never exceed an isize, they must
+            // always be able to fit into a usize, and thus cast is OK.
+            f(StateID::new_unchecked(sid as usize))
+        }
+    }
+
+    /// Returns the offset into this state's representation where the pattern
+    /// IDs end and the NFA state IDs begin.
+    fn pattern_offset_end(&self) -> usize {
+        let encoded = self.encoded_pattern_count();
+        if encoded == 0 {
+            return 3;
+        }
+        // This arithmetic is OK since we were able to address this many bytes
+        // when writing to the state, thus, it must fit into a usize.
+        encoded.checked_mul(4).unwrap().checked_add(7).unwrap()
+    }
+
+    /// Returns the total number of *encoded* pattern IDs in this state.
+    ///
+    /// This may return 0 even when this is a match state, since the pattern
+    /// ID `PatternID::ZERO` is not encoded when it's the only pattern ID in
+    /// the match state (the overwhelming common case).
+    fn encoded_pattern_count(&self) -> usize {
+        if !self.has_pattern_ids() {
+            return 0;
+        }
+        // This unwrap is OK since the total number of patterns is always
+        // guaranteed to fit into a usize.
+        usize::try_from(bytes::read_u32(&self.0[3..7])).unwrap()
+    }
+}
+
+impl<'a> core::fmt::Debug for Repr<'a> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let mut nfa_ids = alloc::vec![];
+        self.iter_nfa_state_ids(|sid| nfa_ids.push(sid));
+        f.debug_struct("Repr")
+            .field("is_match", &self.is_match())
+            .field("is_from_word", &self.is_from_word())
+            .field("look_have", &self.look_have())
+            .field("look_need", &self.look_need())
+            .field("match_pattern_ids", &self.match_pattern_ids())
+            .field("nfa_state_ids", &nfa_ids)
+            .finish()
+    }
+}
+
+/// ReprVec is a write-only view into the representation of a DFA state.
+///
+/// See Repr for more details on the purpose of this type and also the format.
+///
+/// Note that not all possible combinations of methods may be called. This is
+/// precisely what the various StateBuilder types encapsulate: they only
+/// permit valid combinations via Rust's linear typing.
+struct ReprVec<'a>(&'a mut Vec<u8>);
+
+impl<'a> ReprVec<'a> {
+    /// Set this state as a match state.
+    ///
+    /// This should not be exposed explicitly outside of this module. It is
+    /// set automatically when a pattern ID is added.
+    fn set_is_match(&mut self) {
+        self.0[0] |= 1 << 0;
+    }
+
+    /// Set that this state has pattern IDs explicitly written to it.
+    ///
+    /// This should not be exposed explicitly outside of this module. This is
+    /// used internally as a space saving optimization. Namely, if the state
+    /// is a match state but does not have any pattern IDs written to it,
+    /// then it is automatically inferred to have a pattern ID of ZERO.
+    fn set_has_pattern_ids(&mut self) {
+        self.0[0] |= 1 << 1;
+    }
+
+    /// Set this state as being built from a transition over a word byte.
+    ///
+    /// Setting this is only necessary when one needs to deal with word
+    /// boundary assertions. Therefore, if the underlying NFA has no word
+    /// boundary assertions, callers should not set this.
+    fn set_is_from_word(&mut self) {
+        self.0[0] |= 1 << 2;
+    }
+
+    /// Return a mutable reference to the 'look_have' assertion set.
+    fn look_have_mut(&mut self) -> &mut LookSet {
+        LookSet::from_repr_mut(&mut self.0[1])
+    }
+
+    /// Return a mutable reference to the 'look_need' assertion set.
+    fn look_need_mut(&mut self) -> &mut LookSet {
+        LookSet::from_repr_mut(&mut self.0[2])
+    }
+
+    /// Add a pattern ID to this state. All match states must have at least
+    /// one pattern ID associated with it.
+    ///
+    /// Callers must never add duplicative pattern IDs.
+    ///
+    /// The order in which patterns are added must correspond to the order
+    /// in which patterns are reported as matches.
+    fn add_match_pattern_id(&mut self, pid: PatternID) {
+        // As a (somewhat small) space saving optimization, in the case where
+        // a matching state has exactly one pattern ID, PatternID::ZERO, we do
+        // not write either the pattern ID or the number of patterns encoded.
+        // Instead, all we do is set the 'is_match' bit on this state. Overall,
+        // this saves 8 bytes per match state for the overwhelming majority of
+        // match states.
+        //
+        // In order to know whether pattern IDs need to be explicitly read or
+        // not, we use another internal-only bit, 'has_pattern_ids', to
+        // indicate whether they have been explicitly written or not.
+        if !self.repr().has_pattern_ids() {
+            if pid == PatternID::ZERO {
+                self.set_is_match();
+                return;
+            }
+            // Make room for 'close_match_pattern_ids' to write the total
+            // number of pattern IDs written.
+            self.0.extend(core::iter::repeat(0).take(PatternID::SIZE));
+            self.set_has_pattern_ids();
+            // If this was already a match state, then the only way that's
+            // possible when the state doesn't have pattern IDs is if
+            // PatternID::ZERO was added by the caller previously. In this
+            // case, we are now adding a non-ZERO pattern ID after it, in
+            // which case, we want to make sure to represent ZERO explicitly
+            // now.
+            if self.repr().is_match() {
+                write_u32(self.0, 0)
+            } else {
+                // Otherwise, just make sure the 'is_match' bit is set.
+                self.set_is_match();
+            }
+        }
+        write_u32(self.0, pid.as_u32());
+    }
+
+    /// Indicate that no more pattern IDs will be added to this state.
+    ///
+    /// Once this is called, callers must not call it or 'add_match_pattern_id'
+    /// again.
+    ///
+    /// This should not be exposed explicitly outside of this module. It
+    /// should be called only when converting a StateBuilderMatches into a
+    /// StateBuilderNFA.
+    fn close_match_pattern_ids(&mut self) {
+        // If we never wrote any pattern IDs, then there's nothing to do here.
+        if !self.repr().has_pattern_ids() {
+            return;
+        }
+        let patsize = PatternID::SIZE;
+        let pattern_bytes = self.0.len() - 7;
+        // Every pattern ID uses 4 bytes, so number of bytes should be
+        // divisible by 4.
+        assert_eq!(pattern_bytes % patsize, 0);
+        // This unwrap is OK since we are guaranteed that the maximum number
+        // of possible patterns fits into a u32.
+        let count32 = u32::try_from(pattern_bytes / patsize).unwrap();
+        bytes::NE::write_u32(count32, &mut self.0[3..7]);
+    }
+
+    /// Add an NFA state ID to this state. The order in which NFA states are
+    /// added matters. It is the caller's responsibility to ensure that
+    /// duplicate NFA state IDs are not added.
+    fn add_nfa_state_id(&mut self, prev: &mut StateID, sid: StateID) {
+        let delta = sid.as_i32() - prev.as_i32();
+        write_vari32(self.0, delta);
+        *prev = sid;
+    }
+
+    /// Return a read-only view of this state's representation.
+    fn repr(&self) -> Repr<'_> {
+        Repr(self.0.as_slice())
+    }
+}
+
+/// Write a signed 32-bit integer using zig-zag encoding.
+///
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn write_vari32(data: &mut Vec<u8>, n: i32) {
+    let mut un = (n as u32) << 1;
+    if n < 0 {
+        un = !un;
+    }
+    write_varu32(data, un)
+}
+
+/// Read a signed 32-bit integer using zig-zag encoding. Also, return the
+/// number of bytes read.
+///
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn read_vari32(data: &[u8]) -> (i32, usize) {
+    let (un, i) = read_varu32(data);
+    let mut n = (un >> 1) as i32;
+    if un & 1 != 0 {
+        n = !n;
+    }
+    (n, i)
+}
+
+/// Write an unsigned 32-bit integer as a varint. In essence, `n` is written
+/// as a sequence of bytes where all bytes except for the last one have the
+/// most significant bit set. The least significant 7 bits correspond to the
+/// actual bits of `n`. So in the worst case, a varint uses 5 bytes, but in
+/// very common cases, it uses fewer than 4.
+///
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn write_varu32(data: &mut Vec<u8>, mut n: u32) {
+    while n >= 0b1000_0000 {
+        data.push((n as u8) | 0b1000_0000);
+        n >>= 7;
+    }
+    data.push(n as u8);
+}
+
+/// Read an unsigned 32-bit varint. Also, return the number of bytes read.
+///
+/// https://developers.google.com/protocol-buffers/docs/encoding#varints
+fn read_varu32(data: &[u8]) -> (u32, usize) {
+    // N.B. We can assume correctness here since we know that all varuints are
+    // written with write_varu32. Hence, the 'as' uses and unchecked arithmetic
+    // is all okay.
+    let mut n: u32 = 0;
+    let mut shift: u32 = 0;
+    for (i, &b) in data.iter().enumerate() {
+        if b < 0b1000_0000 {
+            return (n | ((b as u32) << shift), i + 1);
+        }
+        n |= ((b as u32) & 0b0111_1111) << shift;
+        shift += 7;
+    }
+    (0, 0)
+}
+
+/// Push a native-endian encoded `n` on to `dst`.
+fn write_u32(dst: &mut Vec<u8>, n: u32) {
+    use crate::util::bytes::{Endian, NE};
+
+    let start = dst.len();
+    dst.extend(core::iter::repeat(0).take(mem::size_of::<u32>()));
+    NE::write_u32(n, &mut dst[start..]);
+}
+
+#[cfg(test)]
+mod tests {
+    use alloc::vec;
+
+    use quickcheck::quickcheck;
+
+    use super::*;
+
+    quickcheck! {
+        fn prop_state_read_write_nfa_state_ids(sids: Vec<StateID>) -> bool {
+            // Builders states do not permit duplicate IDs.
+            let sids = dedup_state_ids(sids);
+
+            let mut b = StateBuilderEmpty::new().into_matches().into_nfa();
+            for &sid in &sids {
+                b.add_nfa_state_id(sid);
+            }
+            let s = b.to_state();
+            let mut got = vec![];
+            s.iter_nfa_state_ids(|sid| got.push(sid));
+            got == sids
+        }
+
+        fn prop_state_read_write_pattern_ids(pids: Vec<PatternID>) -> bool {
+            // Builders states do not permit duplicate IDs.
+            let pids = dedup_pattern_ids(pids);
+
+            let mut b = StateBuilderEmpty::new().into_matches();
+            for &pid in &pids {
+                b.add_match_pattern_id(pid);
+            }
+            let s = b.into_nfa().to_state();
+            let mut got = vec![];
+            s.iter_match_pattern_ids(|pid| got.push(pid));
+            got == pids
+        }
+
+        fn prop_state_read_write_nfa_state_and_pattern_ids(
+            sids: Vec<StateID>,
+            pids: Vec<PatternID>
+        ) -> bool {
+            // Builders states do not permit duplicate IDs.
+            let sids = dedup_state_ids(sids);
+            let pids = dedup_pattern_ids(pids);
+
+            let mut b = StateBuilderEmpty::new().into_matches();
+            for &pid in &pids {
+                b.add_match_pattern_id(pid);
+            }
+
+            let mut b = b.into_nfa();
+            for &sid in &sids {
+                b.add_nfa_state_id(sid);
+            }
+
+            let s = b.to_state();
+            let mut got_pids = vec![];
+            s.iter_match_pattern_ids(|pid| got_pids.push(pid));
+            let mut got_sids = vec![];
+            s.iter_nfa_state_ids(|sid| got_sids.push(sid));
+            got_pids == pids && got_sids == sids
+        }
+
+        fn prop_read_write_varu32(n: u32) -> bool {
+            let mut buf = vec![];
+            write_varu32(&mut buf, n);
+            let (got, nread) = read_varu32(&buf);
+            nread == buf.len() && got == n
+        }
+
+        fn prop_read_write_vari32(n: i32) -> bool {
+            let mut buf = vec![];
+            write_vari32(&mut buf, n);
+            let (got, nread) = read_vari32(&buf);
+            nread == buf.len() && got == n
+        }
+    }
+
+    fn dedup_state_ids(sids: Vec<StateID>) -> Vec<StateID> {
+        let mut set = alloc::collections::BTreeSet::new();
+        let mut deduped = vec![];
+        for sid in sids {
+            if set.contains(&sid) {
+                continue;
+            }
+            set.insert(sid);
+            deduped.push(sid);
+        }
+        deduped
+    }
+
+    fn dedup_pattern_ids(pids: Vec<PatternID>) -> Vec<PatternID> {
+        let mut set = alloc::collections::BTreeSet::new();
+        let mut deduped = vec![];
+        for pid in pids {
+            if set.contains(&pid) {
+                continue;
+            }
+            set.insert(pid);
+            deduped.push(pid);
+        }
+        deduped
+    }
+}
diff --git a/vendor/regex-automata/src/util/id.rs b/vendor/regex-automata/src/util/id.rs
new file mode 100644
index 000000000..70bf0a93b
--- /dev/null
+++ b/vendor/regex-automata/src/util/id.rs
@@ -0,0 +1,608 @@
+/*!
+Type definitions for identifier types.
+
+A [`StateID`] represents the possible set of identifiers used in regex engine
+implementations in this crate. For example, they are used to identify both NFA
+and DFA states.
+
+A [`PatternID`] represents the possible set of identifiers for patterns. All
+regex engine implementations in this crate support searching for multiple
+patterns simultaneously. A `PatternID` is how each pattern is uniquely
+identified for a particular instance of a regex engine. Namely, a pattern is
+assigned an auto-incrementing integer, starting at `0`, based on the order of
+patterns supplied during the construction of the regex engine.
+
+These identifier types represent a way for this crate to make correctness
+guarantees around the possible set of values that a `StateID` or a `PatternID`
+might represent. Similarly, they also provide a way of constraining the size of
+these identifiers to reduce space usage while still guaranteeing that all such
+identifiers are repsentable by a `usize` for the current target.
+
+Moreover, the identifier types clamp the range of permissible values to a range
+that is typically smaller than its internal representation. (With the maximum
+value being, e.g., `StateID::MAX`.) Users of these types may not rely this
+clamping for the purpose of memory safety. Users may, however, rely on these
+invariants to avoid panics or other types of logic bugs.
+*/
+
+// Continuing from the above comment about correctness guarantees, an example
+// of a way in which we use the guarantees on these types is delta encoding.
+// Namely, we require that IDs can be at most 2^31 - 2, which means the
+// difference between any two IDs is always representable as an i32.
+
+use core::{
+    convert::{Infallible, TryFrom},
+    mem, ops,
+};
+
+#[cfg(feature = "alloc")]
+use alloc::vec::Vec;
+
+/// An identifier for a regex pattern.
+///
+/// The identifier for a pattern corresponds to its relative position among
+/// other patterns in a single finite state machine. Namely, when building
+/// a multi-pattern regex engine, one must supply a sequence of patterns to
+/// match. The position (starting at 0) of each pattern in that sequence
+/// represents its identifier. This identifier is in turn used to identify and
+/// report matches of that pattern in various APIs.
+///
+/// A pattern ID is guaranteed to be representable by a `usize`. Similarly,
+/// the number of patterns in any regex engine in this crate is guaranteed to
+/// be representable by a `usize`. This applies to regex engines that have
+/// been deserialized; a deserialization error will be returned if it contains
+/// pattern IDs that violate these requirements in your current environment.
+///
+/// For extra convenience in some cases, this type also guarantees that all
+/// IDs can fit into an `i32` and an `isize` without overflowing.
+///
+/// # Representation
+///
+/// This type is always represented internally by a `u32` and is marked as
+/// `repr(transparent)`. Thus, this type always has the same representation as
+/// a `u32`.
+///
+/// # Indexing
+///
+/// For convenience, callers may use a `PatternID` to index slices.
+///
+/// # Safety
+///
+/// While a `PatternID` is meant to guarantee that its value fits into `usize`
+/// (while using a possibly smaller representation than `usize` on some
+/// targets), callers must not rely on this property for safety. Callers may
+/// choose to rely on this property for correctness however.
+#[repr(transparent)]
+#[derive(
+    Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
+)]
+pub struct PatternID(u32);
+
+impl PatternID {
+    /// The maximum pattern ID value, represented as a `usize`.
+    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+    pub const MAX: PatternID =
+        PatternID::new_unchecked(core::i32::MAX as usize - 1);
+
+    /// The maximum pattern ID value, represented as a `usize`.
+    #[cfg(target_pointer_width = "16")]
+    pub const MAX: PatternID = PatternID::new_unchecked(core::isize::MAX - 1);
+
+    /// The total number of patterns that are allowed in any single regex
+    /// engine.
+    pub const LIMIT: usize = PatternID::MAX.as_usize() + 1;
+
+    /// The zero pattern ID value.
+    pub const ZERO: PatternID = PatternID::new_unchecked(0);
+
+    /// The number of bytes that a single `PatternID` uses in memory.
+    pub const SIZE: usize = core::mem::size_of::<PatternID>();
+
+    /// Create a new pattern ID.
+    ///
+    /// If the given identifier exceeds [`PatternID::MAX`], then this returns
+    /// an error.
+    #[inline]
+    pub fn new(id: usize) -> Result<PatternID, PatternIDError> {
+        PatternID::try_from(id)
+    }
+
+    /// Create a new pattern ID without checking whether the given value
+    /// exceeds [`PatternID::MAX`].
+    ///
+    /// While this is unchecked, providing an incorrect value must never
+    /// sacrifice memory safety, as documented above.
+    #[inline]
+    pub const fn new_unchecked(id: usize) -> PatternID {
+        PatternID(id as u32)
+    }
+
+    /// Like [`PatternID::new`], but panics if the given ID is not valid.
+    #[inline]
+    pub fn must(id: usize) -> PatternID {
+        PatternID::new(id).unwrap()
+    }
+
+    /// Return this pattern ID as a `usize`.
+    #[inline]
+    pub const fn as_usize(&self) -> usize {
+        self.0 as usize
+    }
+
+    /// Return the internal u32 of this pattern ID.
+    #[inline]
+    pub const fn as_u32(&self) -> u32 {
+        self.0
+    }
+
+    /// Return the internal u32 of this pattern ID represented as an i32.
+    ///
+    /// This is guaranteed to never overflow an `i32`.
+    #[inline]
+    pub const fn as_i32(&self) -> i32 {
+        self.0 as i32
+    }
+
+    /// Returns one more than this pattern ID as a usize.
+    ///
+    /// Since a pattern ID has constraints on its maximum value, adding `1` to
+    /// it will always fit in a `usize` (and a `u32`).
+    #[inline]
+    pub fn one_more(&self) -> usize {
+        self.as_usize().checked_add(1).unwrap()
+    }
+
+    /// Decode this pattern ID from the bytes given using the native endian
+    /// byte order for the current target.
+    ///
+    /// If the decoded integer is not representable as a pattern ID for the
+    /// current target, then this returns an error.
+    #[inline]
+    pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<PatternID, PatternIDError> {
+        let id = u32::from_ne_bytes(bytes);
+        if id > PatternID::MAX.as_u32() {
+            return Err(PatternIDError { attempted: id as u64 });
+        }
+        Ok(PatternID::new_unchecked(id as usize))
+    }
+
+    /// Decode this pattern ID from the bytes given using the native endian
+    /// byte order for the current target.
+    ///
+    /// This is analogous to [`PatternID::new_unchecked`] in that is does not
+    /// check whether the decoded integer is representable as a pattern ID.
+    #[inline]
+    pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> PatternID {
+        PatternID::new_unchecked(u32::from_ne_bytes(bytes) as usize)
+    }
+
+    /// Return the underlying pattern ID integer as raw bytes in native endian
+    /// format.
+    #[inline]
+    pub fn to_ne_bytes(&self) -> [u8; 4] {
+        self.0.to_ne_bytes()
+    }
+
+    /// Returns an iterator over all pattern IDs from 0 up to and not including
+    /// the given length.
+    ///
+    /// If the given length exceeds [`PatternID::LIMIT`], then this panics.
+    #[cfg(feature = "alloc")]
+    pub(crate) fn iter(len: usize) -> PatternIDIter {
+        PatternIDIter::new(len)
+    }
+}
+
+/// This error occurs when a pattern ID could not be constructed.
+///
+/// This occurs when given an integer exceeding the maximum pattern ID value.
+///
+/// When the `std` feature is enabled, this implements the `Error` trait.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct PatternIDError {
+    attempted: u64,
+}
+
+impl PatternIDError {
+    /// Returns the value that failed to constructed a pattern ID.
+    pub fn attempted(&self) -> u64 {
+        self.attempted
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for PatternIDError {}
+
+impl core::fmt::Display for PatternIDError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(
+            f,
+            "failed to create PatternID from {:?}, which exceeds {:?}",
+            self.attempted(),
+            PatternID::MAX,
+        )
+    }
+}
+
+/// An identifier for a state in a regex engine.
+///
+/// A state ID is guaranteed to be representable by a `usize`. Similarly, the
+/// number of states in any regex engine in this crate is guaranteed to be
+/// representable by a `usize`. This applies to regex engines that have been
+/// deserialized; a deserialization error will be returned if it contains state
+/// IDs that violate these requirements in your current environment.
+///
+/// For extra convenience in some cases, this type also guarantees that all
+/// IDs can fit into an `i32` and an `isize` without overflowing.
+///
+/// # Representation
+///
+/// This type is always represented internally by a `u32` and is marked as
+/// `repr(transparent)`. Thus, this type always has the same representation as
+/// a `u32`.
+///
+/// # Indexing
+///
+/// For convenience, callers may use a `StateID` to index slices.
+///
+/// # Safety
+///
+/// While a `StateID` is meant to guarantee that its value fits into `usize`
+/// (while using a possibly smaller representation than `usize` on some
+/// targets), callers must not rely on this property for safety. Callers may
+/// choose to rely on this property for correctness however.
+#[repr(transparent)]
+#[derive(
+    Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
+)]
+pub struct StateID(u32);
+
+impl StateID {
+    /// The maximum state ID value.
+    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+    pub const MAX: StateID =
+        StateID::new_unchecked(core::i32::MAX as usize - 1);
+
+    /// The maximum state ID value.
+    #[cfg(target_pointer_width = "16")]
+    pub const MAX: StateID = StateID::new_unchecked(core::isize::MAX - 1);
+
+    /// The total number of states that are allowed in any single regex
+    /// engine, represented as a `usize`.
+    pub const LIMIT: usize = StateID::MAX.as_usize() + 1;
+
+    /// The zero state ID value.
+    pub const ZERO: StateID = StateID::new_unchecked(0);
+
+    /// The number of bytes that a single `StateID` uses in memory.
+    pub const SIZE: usize = core::mem::size_of::<StateID>();
+
+    /// Create a new state ID.
+    ///
+    /// If the given identifier exceeds [`StateID::MAX`], then this returns
+    /// an error.
+    #[inline]
+    pub fn new(id: usize) -> Result<StateID, StateIDError> {
+        StateID::try_from(id)
+    }
+
+    /// Create a new state ID without checking whether the given value
+    /// exceeds [`StateID::MAX`].
+    ///
+    /// While this is unchecked, providing an incorrect value must never
+    /// sacrifice memory safety, as documented above.
+    #[inline]
+    pub const fn new_unchecked(id: usize) -> StateID {
+        StateID(id as u32)
+    }
+
+    /// Like [`StateID::new`], but panics if the given ID is not valid.
+    #[inline]
+    pub fn must(id: usize) -> StateID {
+        StateID::new(id).unwrap()
+    }
+
+    /// Return this state ID as a `usize`.
+    #[inline]
+    pub const fn as_usize(&self) -> usize {
+        self.0 as usize
+    }
+
+    /// Return the internal u32 of this state ID.
+    #[inline]
+    pub const fn as_u32(&self) -> u32 {
+        self.0
+    }
+
+    /// Return the internal u32 of this pattern ID represented as an i32.
+    ///
+    /// This is guaranteed to never overflow an `i32`.
+    #[inline]
+    pub const fn as_i32(&self) -> i32 {
+        self.0 as i32
+    }
+
+    /// Returns one more than this state ID as a usize.
+    ///
+    /// Since a state ID has constraints on its maximum value, adding `1` to
+    /// it will always fit in a `usize` (and a `u32`).
+    #[inline]
+    pub fn one_more(&self) -> usize {
+        self.as_usize().checked_add(1).unwrap()
+    }
+
+    /// Decode this state ID from the bytes given using the native endian byte
+    /// order for the current target.
+    ///
+    /// If the decoded integer is not representable as a state ID for the
+    /// current target, then this returns an error.
+    #[inline]
+    pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<StateID, StateIDError> {
+        let id = u32::from_ne_bytes(bytes);
+        if id > StateID::MAX.as_u32() {
+            return Err(StateIDError { attempted: id as u64 });
+        }
+        Ok(StateID::new_unchecked(id as usize))
+    }
+
+    /// Decode this state ID from the bytes given using the native endian
+    /// byte order for the current target.
+    ///
+    /// This is analogous to [`StateID::new_unchecked`] in that is does not
+    /// check whether the decoded integer is representable as a state ID.
+    #[inline]
+    pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> StateID {
+        StateID::new_unchecked(u32::from_ne_bytes(bytes) as usize)
+    }
+
+    /// Return the underlying state ID integer as raw bytes in native endian
+    /// format.
+    #[inline]
+    pub fn to_ne_bytes(&self) -> [u8; 4] {
+        self.0.to_ne_bytes()
+    }
+
+    /// Returns an iterator over all state IDs from 0 up to and not including
+    /// the given length.
+    ///
+    /// If the given length exceeds [`StateID::LIMIT`], then this panics.
+    #[cfg(feature = "alloc")]
+    pub(crate) fn iter(len: usize) -> StateIDIter {
+        StateIDIter::new(len)
+    }
+}
+
+/// This error occurs when a state ID could not be constructed.
+///
+/// This occurs when given an integer exceeding the maximum state ID value.
+///
+/// When the `std` feature is enabled, this implements the `Error` trait.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct StateIDError {
+    attempted: u64,
+}
+
+impl StateIDError {
+    /// Returns the value that failed to constructed a state ID.
+    pub fn attempted(&self) -> u64 {
+        self.attempted
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for StateIDError {}
+
+impl core::fmt::Display for StateIDError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(
+            f,
+            "failed to create StateID from {:?}, which exceeds {:?}",
+            self.attempted(),
+            StateID::MAX,
+        )
+    }
+}
+
+/// A macro for defining exactly identical (modulo names) impls for ID types.
+macro_rules! impls {
+    ($ty:ident, $tyerr:ident, $tyiter:ident) => {
+        #[derive(Clone, Debug)]
+        pub(crate) struct $tyiter {
+            rng: ops::Range<usize>,
+        }
+
+        impl $tyiter {
+            #[cfg(feature = "alloc")]
+            fn new(len: usize) -> $tyiter {
+                assert!(
+                    len <= $ty::LIMIT,
+                    "cannot create iterator with IDs when number of \
+                     elements exceed {:?}",
+                    $ty::LIMIT,
+                );
+                $tyiter { rng: 0..len }
+            }
+        }
+
+        impl Iterator for $tyiter {
+            type Item = $ty;
+
+            fn next(&mut self) -> Option<$ty> {
+                if self.rng.start >= self.rng.end {
+                    return None;
+                }
+                let next_id = self.rng.start + 1;
+                let id = mem::replace(&mut self.rng.start, next_id);
+                // new_unchecked is OK since we asserted that the number of
+                // elements in this iterator will fit in an ID at construction.
+                Some($ty::new_unchecked(id))
+            }
+        }
+
+        impl<T> core::ops::Index<$ty> for [T] {
+            type Output = T;
+
+            #[inline]
+            fn index(&self, index: $ty) -> &T {
+                &self[index.as_usize()]
+            }
+        }
+
+        impl<T> core::ops::IndexMut<$ty> for [T] {
+            #[inline]
+            fn index_mut(&mut self, index: $ty) -> &mut T {
+                &mut self[index.as_usize()]
+            }
+        }
+
+        #[cfg(feature = "alloc")]
+        impl<T> core::ops::Index<$ty> for Vec<T> {
+            type Output = T;
+
+            #[inline]
+            fn index(&self, index: $ty) -> &T {
+                &self[index.as_usize()]
+            }
+        }
+
+        #[cfg(feature = "alloc")]
+        impl<T> core::ops::IndexMut<$ty> for Vec<T> {
+            #[inline]
+            fn index_mut(&mut self, index: $ty) -> &mut T {
+                &mut self[index.as_usize()]
+            }
+        }
+
+        impl TryFrom<usize> for $ty {
+            type Error = $tyerr;
+
+            fn try_from(id: usize) -> Result<$ty, $tyerr> {
+                if id > $ty::MAX.as_usize() {
+                    return Err($tyerr { attempted: id as u64 });
+                }
+                Ok($ty::new_unchecked(id))
+            }
+        }
+
+        impl TryFrom<u8> for $ty {
+            type Error = Infallible;
+
+            fn try_from(id: u8) -> Result<$ty, Infallible> {
+                Ok($ty::new_unchecked(id as usize))
+            }
+        }
+
+        impl TryFrom<u16> for $ty {
+            type Error = $tyerr;
+
+            fn try_from(id: u16) -> Result<$ty, $tyerr> {
+                if id as u32 > $ty::MAX.as_u32() {
+                    return Err($tyerr { attempted: id as u64 });
+                }
+                Ok($ty::new_unchecked(id as usize))
+            }
+        }
+
+        impl TryFrom<u32> for $ty {
+            type Error = $tyerr;
+
+            fn try_from(id: u32) -> Result<$ty, $tyerr> {
+                if id > $ty::MAX.as_u32() {
+                    return Err($tyerr { attempted: id as u64 });
+                }
+                Ok($ty::new_unchecked(id as usize))
+            }
+        }
+
+        impl TryFrom<u64> for $ty {
+            type Error = $tyerr;
+
+            fn try_from(id: u64) -> Result<$ty, $tyerr> {
+                if id > $ty::MAX.as_u32() as u64 {
+                    return Err($tyerr { attempted: id });
+                }
+                Ok($ty::new_unchecked(id as usize))
+            }
+        }
+
+        #[cfg(test)]
+        impl quickcheck::Arbitrary for $ty {
+            fn arbitrary(gen: &mut quickcheck::Gen) -> $ty {
+                use core::cmp::max;
+
+                let id = max(i32::MIN + 1, i32::arbitrary(gen)).abs();
+                if id > $ty::MAX.as_i32() {
+                    $ty::MAX
+                } else {
+                    $ty::new(usize::try_from(id).unwrap()).unwrap()
+                }
+            }
+        }
+    };
+}
+
+impls!(PatternID, PatternIDError, PatternIDIter);
+impls!(StateID, StateIDError, StateIDIter);
+
+/// A utility trait that defines a couple of adapters for making it convenient
+/// to access indices as ID types. We require ExactSizeIterator so that
+/// iterator construction can do a single check to make sure the index of each
+/// element is representable by its ID type.
+#[cfg(feature = "alloc")]
+pub(crate) trait IteratorIDExt: Iterator {
+    fn with_pattern_ids(self) -> WithPatternIDIter<Self>
+    where
+        Self: Sized + ExactSizeIterator,
+    {
+        WithPatternIDIter::new(self)
+    }
+
+    fn with_state_ids(self) -> WithStateIDIter<Self>
+    where
+        Self: Sized + ExactSizeIterator,
+    {
+        WithStateIDIter::new(self)
+    }
+}
+
+#[cfg(feature = "alloc")]
+impl<I: Iterator> IteratorIDExt for I {}
+
+#[cfg(feature = "alloc")]
+macro_rules! iditer {
+    ($ty:ident, $iterty:ident, $withiterty:ident) => {
+        /// An iterator adapter that is like std::iter::Enumerate, but attaches
+        /// IDs. It requires ExactSizeIterator. At construction, it ensures
+        /// that the index of each element in the iterator is representable in
+        /// the corresponding ID type.
+        #[derive(Clone, Debug)]
+        pub(crate) struct $withiterty<I> {
+            it: I,
+            ids: $iterty,
+        }
+
+        impl<I: Iterator + ExactSizeIterator> $withiterty<I> {
+            fn new(it: I) -> $withiterty<I> {
+                let ids = $ty::iter(it.len());
+                $withiterty { it, ids }
+            }
+        }
+
+        impl<I: Iterator + ExactSizeIterator> Iterator for $withiterty<I> {
+            type Item = ($ty, I::Item);
+
+            fn next(&mut self) -> Option<($ty, I::Item)> {
+                let item = self.it.next()?;
+                // Number of elements in this iterator must match, according
+                // to contract of ExactSizeIterator.
+                let id = self.ids.next().unwrap();
+                Some((id, item))
+            }
+        }
+    };
+}
+
+#[cfg(feature = "alloc")]
+iditer!(PatternID, PatternIDIter, WithPatternIDIter);
+#[cfg(feature = "alloc")]
+iditer!(StateID, StateIDIter, WithStateIDIter);
diff --git a/vendor/regex-automata/src/util/lazy.rs b/vendor/regex-automata/src/util/lazy.rs
new file mode 100644
index 000000000..d8cac6ef4
--- /dev/null
+++ b/vendor/regex-automata/src/util/lazy.rs
@@ -0,0 +1,31 @@
+use core::{
+    cell::Cell,
+    ptr,
+    sync::atomic::{AtomicPtr, Ordering},
+};
+
+use alloc::{boxed::Box, vec::Vec};
+
+#[inline(always)]
+pub(crate) fn get_or_init<T: Send + Sync + 'static>(
+    location: &'static AtomicPtr<T>,
+    init: impl FnOnce() -> T,
+) -> &'static T {
+    let mut ptr = location.load(Ordering::Acquire);
+    if ptr.is_null() {
+        let new_dfa = Box::new(init());
+        ptr = Box::into_raw(new_dfa);
+        let result = location.compare_exchange(
+            ptr::null_mut(),
+            ptr,
+            Ordering::AcqRel,
+            Ordering::Acquire,
+        );
+        if let Err(old) = result {
+            let redundant = unsafe { Box::from_raw(ptr) };
+            drop(redundant);
+            ptr = old;
+        }
+    }
+    unsafe { &*ptr }
+}
diff --git a/vendor/regex-automata/src/util/matchtypes.rs b/vendor/regex-automata/src/util/matchtypes.rs
new file mode 100644
index 000000000..de0fa65bf
--- /dev/null
+++ b/vendor/regex-automata/src/util/matchtypes.rs
@@ -0,0 +1,356 @@
+use crate::util::id::PatternID;
+
+/// The kind of match semantics to use for a DFA.
+///
+/// The default match kind is `LeftmostFirst`.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum MatchKind {
+    /// Report all possible matches.
+    All,
+    /// Report only the leftmost matches. When multiple leftmost matches exist,
+    /// report the match corresponding to the part of the regex that appears
+    /// first in the syntax.
+    LeftmostFirst,
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+    // There is prior art in RE2 that shows that we should be able to add
+    // LeftmostLongest too. The tricky part of it is supporting ungreedy
+    // repetitions. Instead of treating all NFA states as having equivalent
+    // priority (as in 'All') or treating all NFA states as having distinct
+    // priority based on order (as in 'LeftmostFirst'), we instead group NFA
+    // states into sets, and treat members of each set as having equivalent
+    // priority, but having greater priority than all following members
+    // of different sets.
+    //
+    // However, it's not clear whether it's really worth adding this. After
+    // all, leftmost-longest can be emulated when using literals by using
+    // leftmost-first and sorting the literals by length in descending order.
+    // However, this won't work for arbitrary regexes. e.g., `\w|\w\w` will
+    // always match `a` in `ab` when using leftmost-first, but leftmost-longest
+    // would match `ab`.
+}
+
+impl MatchKind {
+    #[cfg(feature = "alloc")]
+    pub(crate) fn continue_past_first_match(&self) -> bool {
+        *self == MatchKind::All
+    }
+}
+
+impl Default for MatchKind {
+    fn default() -> MatchKind {
+        MatchKind::LeftmostFirst
+    }
+}
+
+/// A representation of a match reported by a regex engine.
+///
+/// A match records the start and end offsets of the match in the haystack.
+///
+/// Every match guarantees that `start <= end`.
+#[derive(Clone, Debug, Eq, Hash, PartialEq)]
+pub struct Match {
+    /// The start offset of the match, inclusive.
+    start: usize,
+    /// The end offset of the match, exclusive.
+    end: usize,
+}
+
+impl Match {
+    /// Create a new match from a byte offset span.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `end < start`.
+    #[inline]
+    pub fn new(start: usize, end: usize) -> Match {
+        assert!(start <= end);
+        Match { start, end }
+    }
+
+    /// The starting position of the match.
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.start
+    }
+
+    /// The ending position of the match.
+    #[inline]
+    pub fn end(&self) -> usize {
+        self.end
+    }
+
+    /// Returns the match location as a range.
+    #[inline]
+    pub fn range(&self) -> core::ops::Range<usize> {
+        self.start..self.end
+    }
+
+    /// Returns true if and only if this match is empty. That is, when
+    /// `start() == end()`.
+    ///
+    /// An empty match can only be returned when the empty string was among
+    /// the patterns used to build the Aho-Corasick automaton.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.start == self.end
+    }
+}
+
+/// A representation of a match reported by a DFA.
+///
+/// This is called a "half" match because it only includes the end location
+/// (or start location for a reverse match) of a match. This corresponds to the
+/// information that a single DFA scan can report. Getting the other half of
+/// the match requires a second scan with a reversed DFA.
+///
+/// A half match also includes the pattern that matched. The pattern is
+/// identified by an ID, which corresponds to its position (starting from `0`)
+/// relative to other patterns used to construct the corresponding DFA. If only
+/// a single pattern is provided to the DFA, then all matches are guaranteed to
+/// have a pattern ID of `0`.
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct HalfMatch {
+    /// The pattern ID.
+    pub(crate) pattern: PatternID,
+    /// The offset of the match.
+    ///
+    /// For forward searches, the offset is exclusive. For reverse searches,
+    /// the offset is inclusive.
+    pub(crate) offset: usize,
+}
+
+impl HalfMatch {
+    /// Create a new half match from a pattern ID and a byte offset.
+    #[inline]
+    pub fn new(pattern: PatternID, offset: usize) -> HalfMatch {
+        HalfMatch { pattern, offset }
+    }
+
+    /// Create a new half match from a pattern ID and a byte offset.
+    ///
+    /// This is like [`HalfMatch::new`], but accepts a `usize` instead of a
+    /// [`PatternID`]. This panics if the given `usize` is not representable
+    /// as a `PatternID`.
+    #[inline]
+    pub fn must(pattern: usize, offset: usize) -> HalfMatch {
+        HalfMatch::new(PatternID::new(pattern).unwrap(), offset)
+    }
+
+    /// Returns the ID of the pattern that matched.
+    ///
+    /// The ID of a pattern is derived from the position in which it was
+    /// originally inserted into the corresponding DFA. The first pattern has
+    /// identifier `0`, and each subsequent pattern is `1`, `2` and so on.
+    #[inline]
+    pub fn pattern(&self) -> PatternID {
+        self.pattern
+    }
+
+    /// The position of the match.
+    ///
+    /// If this match was produced by a forward search, then the offset is
+    /// exclusive. If this match was produced by a reverse search, then the
+    /// offset is inclusive.
+    #[inline]
+    pub fn offset(&self) -> usize {
+        self.offset
+    }
+}
+
+/// A representation of a multi match reported by a regex engine.
+///
+/// A multi match has two essential pieces of information: the identifier of
+/// the pattern that matched, along with the start and end offsets of the match
+/// in the haystack.
+///
+/// The pattern is identified by an ID, which corresponds to its position
+/// (starting from `0`) relative to other patterns used to construct the
+/// corresponding regex engine. If only a single pattern is provided, then all
+/// multi matches are guaranteed to have a pattern ID of `0`.
+///
+/// Every multi match guarantees that `start <= end`.
+#[derive(Clone, Debug, Eq, Hash, PartialEq)]
+pub struct MultiMatch {
+    /// The pattern ID.
+    pattern: PatternID,
+    /// The start offset of the match, inclusive.
+    start: usize,
+    /// The end offset of the match, exclusive.
+    end: usize,
+}
+
+impl MultiMatch {
+    /// Create a new match from a pattern ID and a byte offset span.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `end < start`.
+    #[inline]
+    pub fn new(pattern: PatternID, start: usize, end: usize) -> MultiMatch {
+        assert!(start <= end);
+        MultiMatch { pattern, start, end }
+    }
+
+    /// Create a new match from a pattern ID and a byte offset span.
+    ///
+    /// This is like [`MultiMatch::new`], but accepts a `usize` instead of a
+    /// [`PatternID`]. This panics if the given `usize` is not representable
+    /// as a `PatternID`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `end < start` or if `pattern > PatternID::MAX`.
+    #[inline]
+    pub fn must(pattern: usize, start: usize, end: usize) -> MultiMatch {
+        MultiMatch::new(PatternID::new(pattern).unwrap(), start, end)
+    }
+
+    /// Returns the ID of the pattern that matched.
+    ///
+    /// The ID of a pattern is derived from the position in which it was
+    /// originally inserted into the corresponding regex engine. The first
+    /// pattern has identifier `0`, and each subsequent pattern is `1`, `2` and
+    /// so on.
+    #[inline]
+    pub fn pattern(&self) -> PatternID {
+        self.pattern
+    }
+
+    /// The starting position of the match.
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.start
+    }
+
+    /// The ending position of the match.
+    #[inline]
+    pub fn end(&self) -> usize {
+        self.end
+    }
+
+    /// Returns the match location as a range.
+    #[inline]
+    pub fn range(&self) -> core::ops::Range<usize> {
+        self.start..self.end
+    }
+
+    /// Returns true if and only if this match is empty. That is, when
+    /// `start() == end()`.
+    ///
+    /// An empty match can only be returned when the empty string was among
+    /// the patterns used to build the Aho-Corasick automaton.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.start == self.end
+    }
+}
+
+/// An error type indicating that a search stopped prematurely without finding
+/// a match.
+///
+/// This error type implies that one cannot assume that no matches occur, since
+/// the search stopped before completing.
+///
+/// Normally, when one searches for something, the response is either an
+/// affirmative "it was found at this location" or a negative "not found at
+/// all." However, in some cases, a regex engine can be configured to stop its
+/// search before concluding whether a match exists or not. When this happens,
+/// it may be important for the caller to know why the regex engine gave up and
+/// where in the input it gave up at. This error type exposes the 'why' and the
+/// 'where.'
+///
+/// For example, the DFAs provided by this library generally cannot correctly
+/// implement Unicode word boundaries. Instead, they provide an option to
+/// eagerly support them on ASCII text (since Unicode word boundaries are
+/// equivalent to ASCII word boundaries when searching ASCII text), but will
+/// "give up" if a non-ASCII byte is seen. In such cases, one is usually
+/// required to either report the failure to the caller (unergonomic) or
+/// otherwise fall back to some other regex engine (ergonomic, but potentially
+/// costly).
+///
+/// More generally, some regex engines offer the ability for callers to specify
+/// certain bytes that will trigger the regex engine to automatically quit if
+/// they are seen.
+///
+/// Still yet, there may be other reasons for a failed match. For example,
+/// the hybrid DFA provided by this crate can be configured to give up if it
+/// believes that it is not efficient. This in turn permits callers to choose a
+/// different regex engine.
+///
+/// # Advice
+///
+/// While this form of error reporting adds complexity, it is generally
+/// possible for callers to configure regex engines to never give up a search,
+/// and thus never return an error. Indeed, the default configuration for every
+/// regex engine in this crate is such that they will never stop searching
+/// early. Therefore, the only way to get a match error is if the regex engine
+/// is explicitly configured to do so. Options that enable this behavior
+/// document the new error conditions they imply.
+///
+/// Regex engines for which no errors are possible for any configuration will
+/// return the normal `Option<Match>` and not use this error type at all.
+///
+/// For example, regex engines in the `dfa` sub-module will only report
+/// `MatchError::Quit` if instructed by either
+/// [enabling Unicode word boundaries](crate::dfa::dense::Config::unicode_word_boundary)
+/// or by
+/// [explicitly specifying one or more quit bytes](crate::dfa::dense::Config::quit).
+#[derive(Clone, Debug, Eq, Hash, PartialEq)]
+pub enum MatchError {
+    // Note that the first version of this type was called `SearchError` and it
+    // included a third `None` variant to indicate that the search completed
+    // and no match was found. However, this was problematic for iterator
+    // APIs where the `None` sentinel for stopping iteration corresponds
+    // precisely to the "match not found" case. The fact that the `None`
+    // variant was buried inside this type was in turn quite awkward. So
+    // instead, I removed the `None` variant, renamed the type and used
+    // `Result<Option<Match>, MatchError>` in non-iterator APIs instead of the
+    // conceptually simpler `Result<Match, MatchError>`. However, we "regain"
+    // ergonomics by only putting the more complex API in the `try_` variants
+    // ("fallible") of search methods. The infallible APIs will instead just
+    // return `Option<Match>` and panic on error.
+    /// The search saw a "quit" byte at which it was instructed to stop
+    /// searching.
+    Quit {
+        /// The "quit" byte that was observed that caused the search to stop.
+        byte: u8,
+        /// The offset at which the quit byte was observed.
+        offset: usize,
+    },
+    /// The search, based on heuristics, determined that it would be better
+    /// to stop, typically to provide the caller an opportunity to use an
+    /// alternative regex engine.
+    ///
+    /// Currently, the only way for this to occur is via the lazy DFA and
+    /// only when it is configured to do so (it will not return this error by
+    /// default).
+    GaveUp {
+        /// The offset at which the search stopped. This corresponds to the
+        /// position immediately following the last byte scanned.
+        offset: usize,
+    },
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for MatchError {}
+
+impl core::fmt::Display for MatchError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        match *self {
+            MatchError::Quit { byte, offset } => write!(
+                f,
+                "quit search after observing byte \\x{:02X} at offset {}",
+                byte, offset,
+            ),
+            MatchError::GaveUp { offset } => {
+                write!(f, "gave up searching at offset {}", offset)
+            }
+        }
+    }
+}
diff --git a/vendor/regex-automata/src/util/mod.rs b/vendor/regex-automata/src/util/mod.rs
new file mode 100644
index 000000000..798507da2
--- /dev/null
+++ b/vendor/regex-automata/src/util/mod.rs
@@ -0,0 +1,275 @@
+/*!
+TODO
+*/
+
+use core::{ascii, fmt, str};
+
+#[cfg(feature = "alloc")]
+use alloc::vec::Vec;
+
+pub mod alphabet;
+pub(crate) mod bytes;
+#[cfg(feature = "alloc")]
+pub(crate) mod determinize;
+pub mod id;
+#[cfg(feature = "alloc")]
+pub(crate) mod lazy;
+pub(crate) mod matchtypes;
+pub mod prefilter;
+#[cfg(feature = "alloc")]
+pub(crate) mod sparse_set;
+pub(crate) mod start;
+#[cfg(feature = "alloc")]
+pub(crate) mod syntax;
+
+/// The offset, in bytes, that a match is delayed by in the DFAs generated by
+/// this crate. (This includes lazy DFAs.)
+///
+/// The purpose of this delay is to support look-ahead such as \b (ASCII-only)
+/// and $. In particular, both of these operators may require the
+/// identification of the end of input in order to confirm a match. Not only
+/// does this mean that all matches must therefore be delayed by a single byte,
+/// but that a special EOI value is added to the alphabet of all DFAs. (Which
+/// means that even though the alphabet of a DFA is typically all byte values,
+/// the actual maximum alphabet size is 257 due to the extra EOI value.)
+///
+/// Since we delay matches by only 1 byte, this can't fully support a
+/// Unicode-aware \b operator, which requires multi-byte look-ahead. Indeed,
+/// DFAs in this crate do not support it. (It's not as simple as just
+/// increasing the match offset to do it---otherwise we would---but building
+/// the full Unicode-aware word boundary detection into an automaton is quite
+/// tricky.)
+pub(crate) const MATCH_OFFSET: usize = 1;
+
+/// A type that wraps a single byte with a convenient fmt::Debug impl that
+/// escapes the byte.
+pub(crate) struct DebugByte(pub u8);
+
+impl fmt::Debug for DebugByte {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        // 10 bytes is enough to cover any output from ascii::escape_default.
+        let mut bytes = [0u8; 10];
+        let mut len = 0;
+        for (i, mut b) in ascii::escape_default(self.0).enumerate() {
+            // capitalize \xab to \xAB
+            if i >= 2 && b'a' <= b && b <= b'f' {
+                b -= 32;
+            }
+            bytes[len] = b;
+            len += 1;
+        }
+        write!(f, "{}", str::from_utf8(&bytes[..len]).unwrap())
+    }
+}
+
+/// Returns the smallest possible index of the next valid UTF-8 sequence
+/// starting after `i`.
+///
+/// For all inputs, including invalid UTF-8 and any value of `i`, the return
+/// value is guaranteed to be greater than `i`.
+///
+/// Generally speaking, this should only be called on `text` when it is
+/// permitted to assume that it is valid UTF-8 and where either `i >=
+/// text.len()` or where `text[i]` is a leading byte of a UTF-8 sequence.
+#[inline(always)]
+pub(crate) fn next_utf8(text: &[u8], i: usize) -> usize {
+    let b = match text.get(i) {
+        None => return i.checked_add(1).unwrap(),
+        Some(&b) => b,
+    };
+    // For cases where we see an invalid UTF-8 byte, there isn't much we can do
+    // other than just start at the next byte.
+    let inc = utf8_len(b).unwrap_or(1);
+    i.checked_add(inc).unwrap()
+}
+
+/// Returns true if and only if the given byte is considered a word character.
+/// This only applies to ASCII.
+///
+/// This was copied from regex-syntax so that we can use it to determine the
+/// starting DFA state while searching without depending on regex-syntax. The
+/// definition is never going to change, so there's no maintenance/bit-rot
+/// hazard here.
+#[inline(always)]
+pub(crate) fn is_word_byte(b: u8) -> bool {
+    match b {
+        b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' => true,
+        _ => false,
+    }
+}
+
+/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
+///
+/// If no valid encoding of a codepoint exists at the beginning of the given
+/// byte slice, then the first byte is returned instead.
+///
+/// This returns `None` if and only if `bytes` is empty.
+#[inline(always)]
+pub(crate) fn decode_utf8(bytes: &[u8]) -> Option<Result<char, u8>> {
+    if bytes.is_empty() {
+        return None;
+    }
+    let len = match utf8_len(bytes[0]) {
+        None => return Some(Err(bytes[0])),
+        Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
+        Some(1) => return Some(Ok(bytes[0] as char)),
+        Some(len) => len,
+    };
+    match str::from_utf8(&bytes[..len]) {
+        Ok(s) => Some(Ok(s.chars().next().unwrap())),
+        Err(_) => Some(Err(bytes[0])),
+    }
+}
+
+/// Decodes the last UTF-8 encoded codepoint from the given byte slice.
+///
+/// If no valid encoding of a codepoint exists at the end of the given byte
+/// slice, then the last byte is returned instead.
+///
+/// This returns `None` if and only if `bytes` is empty.
+#[inline(always)]
+pub(crate) fn decode_last_utf8(bytes: &[u8]) -> Option<Result<char, u8>> {
+    if bytes.is_empty() {
+        return None;
+    }
+    let mut start = bytes.len() - 1;
+    let limit = bytes.len().saturating_sub(4);
+    while start > limit && !is_leading_or_invalid_utf8_byte(bytes[start]) {
+        start -= 1;
+    }
+    match decode_utf8(&bytes[start..]) {
+        None => None,
+        Some(Ok(ch)) => Some(Ok(ch)),
+        Some(Err(_)) => Some(Err(bytes[bytes.len() - 1])),
+    }
+}
+
+/// Given a UTF-8 leading byte, this returns the total number of code units
+/// in the following encoded codepoint.
+///
+/// If the given byte is not a valid UTF-8 leading byte, then this returns
+/// `None`.
+#[inline(always)]
+fn utf8_len(byte: u8) -> Option<usize> {
+    if byte <= 0x7F {
+        return Some(1);
+    } else if byte & 0b1100_0000 == 0b1000_0000 {
+        return None;
+    } else if byte <= 0b1101_1111 {
+        Some(2)
+    } else if byte <= 0b1110_1111 {
+        Some(3)
+    } else if byte <= 0b1111_0111 {
+        Some(4)
+    } else {
+        None
+    }
+}
+
+/// Returns true if and only if the given byte is either a valid leading UTF-8
+/// byte, or is otherwise an invalid byte that can never appear anywhere in a
+/// valid UTF-8 sequence.
+#[inline(always)]
+fn is_leading_or_invalid_utf8_byte(b: u8) -> bool {
+    // In the ASCII case, the most significant bit is never set. The leading
+    // byte of a 2/3/4-byte sequence always has the top two most significant
+    // bits set. For bytes that can never appear anywhere in valid UTF-8, this
+    // also returns true, since every such byte has its two most significant
+    // bits set:
+    //
+    //     \xC0 :: 11000000
+    //     \xC1 :: 11000001
+    //     \xF5 :: 11110101
+    //     \xF6 :: 11110110
+    //     \xF7 :: 11110111
+    //     \xF8 :: 11111000
+    //     \xF9 :: 11111001
+    //     \xFA :: 11111010
+    //     \xFB :: 11111011
+    //     \xFC :: 11111100
+    //     \xFD :: 11111101
+    //     \xFE :: 11111110
+    //     \xFF :: 11111111
+    (b & 0b1100_0000) != 0b1000_0000
+}
+
+#[cfg(feature = "alloc")]
+#[inline(always)]
+pub(crate) fn is_word_char_fwd(bytes: &[u8], mut at: usize) -> bool {
+    use core::{ptr, sync::atomic::AtomicPtr};
+
+    use crate::{
+        dfa::{
+            dense::{self, DFA},
+            Automaton,
+        },
+        util::lazy,
+    };
+
+    static WORD: AtomicPtr<DFA<Vec<u32>>> = AtomicPtr::new(ptr::null_mut());
+
+    let dfa = lazy::get_or_init(&WORD, || {
+        // TODO: Should we use a lazy DFA here instead? It does complicate
+        // things somewhat, since we then need a mutable cache, which probably
+        // means a thread local.
+        dense::Builder::new()
+            .configure(dense::Config::new().anchored(true))
+            .build(r"\w")
+            .unwrap()
+    });
+    // This is OK since '\w' contains no look-around.
+    let mut sid = dfa.universal_start_state();
+    while at < bytes.len() {
+        let byte = bytes[at];
+        sid = dfa.next_state(sid, byte);
+        at += 1;
+        if dfa.is_special_state(sid) {
+            if dfa.is_match_state(sid) {
+                return true;
+            } else if dfa.is_dead_state(sid) {
+                return false;
+            }
+        }
+    }
+    dfa.is_match_state(dfa.next_eoi_state(sid))
+}
+
+#[cfg(feature = "alloc")]
+#[inline(always)]
+pub(crate) fn is_word_char_rev(bytes: &[u8], mut at: usize) -> bool {
+    use core::{ptr, sync::atomic::AtomicPtr};
+
+    use crate::{
+        dfa::{
+            dense::{self, DFA},
+            Automaton,
+        },
+        nfa::thompson::NFA,
+    };
+
+    static WORD: AtomicPtr<DFA<Vec<u32>>> = AtomicPtr::new(ptr::null_mut());
+
+    let dfa = lazy::get_or_init(&WORD, || {
+        dense::Builder::new()
+            .configure(dense::Config::new().anchored(true))
+            .thompson(NFA::config().reverse(true).shrink(true))
+            .build(r"\w")
+            .unwrap()
+    });
+
+    // This is OK since '\w' contains no look-around.
+    let mut sid = dfa.universal_start_state();
+    while at > 0 {
+        at -= 1;
+        let byte = bytes[at];
+        sid = dfa.next_state(sid, byte);
+        if dfa.is_special_state(sid) {
+            if dfa.is_match_state(sid) {
+                return true;
+            } else if dfa.is_dead_state(sid) {
+                return false;
+            }
+        }
+    }
+    dfa.is_match_state(dfa.next_eoi_state(sid))
+}
diff --git a/vendor/regex-automata/src/util/prefilter.rs b/vendor/regex-automata/src/util/prefilter.rs
new file mode 100644
index 000000000..5fe151524
--- /dev/null
+++ b/vendor/regex-automata/src/util/prefilter.rs
@@ -0,0 +1,281 @@
+use crate::Match;
+
+/// A candidate is the result of running a prefilter on a haystack at a
+/// particular position. The result is one of no match, a confirmed match or
+/// a possible match.
+///
+/// When no match is returned, the prefilter is guaranteeing that no possible
+/// match can be found in the haystack, and the caller may trust this. That is,
+/// all correct prefilters must never report false negatives.
+///
+/// In some cases, a prefilter can confirm a match very quickly, in which case,
+/// the caller may use this to stop what it's doing and report the match. In
+/// this case, prefilter implementations must never report a false positive.
+/// In other cases, the prefilter can only report a potential match, in which
+/// case the callers must attempt to confirm the match. In this case, prefilter
+/// implementations are permitted to return false positives.
+#[derive(Clone, Debug)]
+pub enum Candidate {
+    /// The prefilter reports that no match is possible. Prefilter
+    /// implementations will never report false negatives.
+    None,
+    /// The prefilter reports that a match has been confirmed at the provided
+    /// byte offsets. When this variant is reported, the prefilter is
+    /// guaranteeing a match. No false positives are permitted.
+    Match(Match),
+    /// The prefilter reports that a match *may* start at the given position.
+    /// When this variant is reported, it may correspond to a false positive.
+    PossibleStartOfMatch(usize),
+}
+
+impl Candidate {
+    /// Convert this candidate into an option. This is useful when callers do
+    /// not distinguish between true positives and false positives (i.e., the
+    /// caller must always confirm the match in order to update some other
+    /// state).
+    ///
+    /// The byte offset in the option returned corresponds to the starting
+    /// position of the possible match.
+    pub fn into_option(self) -> Option<usize> {
+        match self {
+            Candidate::None => None,
+            Candidate::Match(ref m) => Some(m.start()),
+            Candidate::PossibleStartOfMatch(start) => Some(start),
+        }
+    }
+}
+
+/// A prefilter describes the behavior of fast literal scanners for quickly
+/// skipping past bytes in the haystack that we know cannot possibly
+/// participate in a match.
+pub trait Prefilter: core::fmt::Debug {
+    /// Returns the next possible match candidate. This may yield false
+    /// positives, so callers must confirm a match starting at the position
+    /// returned. This, however, must never produce false negatives. That is,
+    /// this must, at minimum, return the starting position of the next match
+    /// in the given haystack after or at the given position.
+    fn next_candidate(
+        &self,
+        state: &mut State,
+        haystack: &[u8],
+        at: usize,
+    ) -> Candidate;
+
+    /// Returns the approximate total amount of heap used by this prefilter, in
+    /// units of bytes.
+    fn heap_bytes(&self) -> usize;
+
+    /// Returns true if and only if this prefilter may return false positives
+    /// via the `Candidate::PossibleStartOfMatch` variant. This is most useful
+    /// when false positives are not posssible (in which case, implementations
+    /// should return false), which may allow completely avoiding heavier regex
+    /// machinery when the prefilter can quickly confirm its own matches.
+    ///
+    /// By default, this returns true, which is conservative; it is always
+    /// correct to return `true`. Returning `false` here and reporting a false
+    /// positive will result in incorrect searches.
+    fn reports_false_positives(&self) -> bool {
+        true
+    }
+}
+
+impl<'a, P: Prefilter + ?Sized> Prefilter for &'a P {
+    #[inline]
+    fn next_candidate(
+        &self,
+        state: &mut State,
+        haystack: &[u8],
+        at: usize,
+    ) -> Candidate {
+        (**self).next_candidate(state, haystack, at)
+    }
+
+    fn heap_bytes(&self) -> usize {
+        (**self).heap_bytes()
+    }
+
+    fn reports_false_positives(&self) -> bool {
+        (**self).reports_false_positives()
+    }
+}
+
+#[derive(Clone)]
+pub struct Scanner<'p> {
+    prefilter: &'p dyn Prefilter,
+    state: State,
+}
+
+impl<'p> Scanner<'p> {
+    pub fn new(prefilter: &'p dyn Prefilter) -> Scanner<'p> {
+        Scanner { prefilter, state: State::new() }
+    }
+
+    pub(crate) fn is_effective(&mut self, at: usize) -> bool {
+        self.state.is_effective(at)
+    }
+
+    pub(crate) fn reports_false_positives(&self) -> bool {
+        self.prefilter.reports_false_positives()
+    }
+
+    pub(crate) fn next_candidate(
+        &mut self,
+        bytes: &[u8],
+        at: usize,
+    ) -> Candidate {
+        let cand = self.prefilter.next_candidate(&mut self.state, bytes, at);
+        match cand {
+            Candidate::None => {
+                self.state.update_skipped_bytes(bytes.len() - at);
+            }
+            Candidate::Match(ref m) => {
+                self.state.update_skipped_bytes(m.start() - at);
+            }
+            Candidate::PossibleStartOfMatch(i) => {
+                self.state.update_skipped_bytes(i - at);
+            }
+        }
+        cand
+    }
+}
+
+impl<'p> core::fmt::Debug for Scanner<'p> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_struct("Scanner").field("state", &self.state).finish()
+    }
+}
+
+/// State tracks state associated with the effectiveness of a
+/// prefilter. It is used to track how many bytes, on average, are skipped by
+/// the prefilter. If this average dips below a certain threshold over time,
+/// then the state renders the prefilter inert and stops using it.
+///
+/// A prefilter state should be created for each search. (Where creating an
+/// iterator via, e.g., `find_iter`, is treated as a single search.)
+#[derive(Clone, Debug)]
+pub struct State {
+    /// The number of skips that has been executed.
+    skips: usize,
+    /// The total number of bytes that have been skipped.
+    skipped: usize,
+    /// Once this heuristic has been deemed permanently ineffective, it will be
+    /// inert throughout the rest of its lifetime. This serves as a cheap way
+    /// to check inertness.
+    inert: bool,
+    /// The last (absolute) position at which a prefilter scanned to.
+    /// Prefilters can use this position to determine whether to re-scan or
+    /// not.
+    ///
+    /// Unlike other things that impact effectiveness, this is a fleeting
+    /// condition. That is, a prefilter can be considered ineffective if it is
+    /// at a position before `last_scan_at`, but can become effective again
+    /// once the search moves past `last_scan_at`.
+    ///
+    /// The utility of this is to both avoid additional overhead from calling
+    /// the prefilter and to avoid quadratic behavior. This ensures that a
+    /// prefilter will scan any particular byte at most once. (Note that some
+    /// prefilters, like the start-byte prefilter, do not need to use this
+    /// field at all, since it only looks for starting bytes.)
+    last_scan_at: usize,
+}
+
+impl State {
+    /// The minimum number of skip attempts to try before considering whether
+    /// a prefilter is effective or not.
+    const MIN_SKIPS: usize = 40;
+
+    /// The minimum amount of bytes that skipping must average.
+    ///
+    /// That is, after MIN_SKIPS have occurred, if the average number of bytes
+    /// skipped ever falls below MIN_AVG_SKIP, then the prefilter will be
+    /// rendered inert.
+    const MIN_AVG_SKIP: usize = 16;
+
+    /// Create a fresh prefilter state.
+    pub fn new() -> State {
+        State { skips: 0, skipped: 0, inert: false, last_scan_at: 0 }
+    }
+
+    /// Updates the position at which the last scan stopped. This may be
+    /// greater than the position of the last candidate reported. For example,
+    /// searching for the byte `z` in `abczdef` for the pattern `abcz` will
+    /// report a candidate at position `0`, but the end of its last scan will
+    /// be at position `3`.
+    ///
+    /// This position factors into the effectiveness of this prefilter. If the
+    /// current position is less than the last position at which a scan ended,
+    /// then the prefilter should not be re-run until the search moves past
+    /// that position.
+    ///
+    /// It is always correct to never update the last scan position. In fact,
+    /// it is also always correct to set the last scan position to an arbitrary
+    /// value. The key is setting it to a position in the future at which it
+    /// makes sense to restart the prefilter.
+    pub fn update_last_scan(&mut self, at: usize) {
+        if at > self.last_scan_at {
+            self.last_scan_at = at;
+        }
+    }
+
+    /// Return true if and only if this state indicates that a prefilter is
+    /// still effective. If the prefilter is not effective, then this state
+    /// is rendered "inert." At which point, all subsequent calls to
+    /// `is_effective` on this state will return `false`.
+    ///
+    /// `at` should correspond to the current starting position of the search.
+    ///
+    /// Callers typically do not need to use this, as it represents the
+    /// default implementation of
+    /// [`Prefilter::is_effective`](trait.Prefilter.html#tymethod.is_effective).
+    fn is_effective(&mut self, at: usize) -> bool {
+        if self.inert {
+            return false;
+        }
+        if at < self.last_scan_at {
+            return false;
+        }
+        if self.skips < State::MIN_SKIPS {
+            return true;
+        }
+
+        if self.skipped >= State::MIN_AVG_SKIP * self.skips {
+            return true;
+        }
+
+        // We're inert.
+        self.inert = true;
+        false
+    }
+
+    /// Update this state with the number of bytes skipped on the last
+    /// invocation of the prefilter.
+    fn update_skipped_bytes(&mut self, skipped: usize) {
+        self.skips += 1;
+        self.skipped += skipped;
+    }
+}
+
+/// A `Prefilter` implementation that reports a possible match at every
+/// position.
+///
+/// This should generally not be used as an actual prefilter. It is only
+/// useful when one needs to represent the absence of a prefilter in a generic
+/// context. For example, a [`dfa::regex::Regex`](crate::dfa::regex::Regex)
+/// uses this prefilter by default to indicate that no prefilter should be
+/// used.
+///
+/// A `None` prefilter value cannot be constructed.
+#[derive(Clone, Debug)]
+pub struct None {
+    _priv: (),
+}
+
+impl Prefilter for None {
+    fn next_candidate(&self, _: &mut State, _: &[u8], at: usize) -> Candidate {
+        Candidate::PossibleStartOfMatch(at)
+    }
+
+    fn heap_bytes(&self) -> usize {
+        0
+    }
+}
diff --git a/vendor/regex-automata/src/util/sparse_set.rs b/vendor/regex-automata/src/util/sparse_set.rs
new file mode 100644
index 000000000..bf59e4469
--- /dev/null
+++ b/vendor/regex-automata/src/util/sparse_set.rs
@@ -0,0 +1,229 @@
+use alloc::{boxed::Box, vec, vec::Vec};
+
+use crate::util::id::StateID;
+
+/// A pairse of sparse sets.
+///
+/// This is useful when one needs to compute NFA epsilon closures from a
+/// previous set of states derived from an epsilon closure. One set can be the
+/// starting states where as the other set can be the destination states after
+/// following the transitions for a particular byte of input.
+///
+/// There is no significance to 'set1' or 'set2'. They are both sparse sets of
+/// the same size.
+///
+/// The members of this struct are exposed so that callers may borrow 'set1'
+/// and 'set2' individually without being force to borrow both at the same
+/// time.
+#[derive(Clone, Debug)]
+pub(crate) struct SparseSets {
+    pub(crate) set1: SparseSet,
+    pub(crate) set2: SparseSet,
+}
+
+impl SparseSets {
+    /// Create a new pair of sparse sets where each set has the given capacity.
+    ///
+    /// This panics if the capacity given is bigger than `StateID::LIMIT`.
+    pub(crate) fn new(capacity: usize) -> SparseSets {
+        SparseSets {
+            set1: SparseSet::new(capacity),
+            set2: SparseSet::new(capacity),
+        }
+    }
+
+    /// Resizes these sparse sets to have the new capacity given.
+    ///
+    /// The sets are automatically cleared.
+    ///
+    /// This panics if the capacity given is bigger than `StateID::LIMIT`.
+    #[inline]
+    pub(crate) fn resize(&mut self, new_capacity: usize) {
+        self.set1.resize(new_capacity);
+        self.set2.resize(new_capacity);
+    }
+
+    /// Clear both sparse sets.
+    pub(crate) fn clear(&mut self) {
+        self.set1.clear();
+        self.set2.clear();
+    }
+
+    /// Swap set1 with set2.
+    pub(crate) fn swap(&mut self) {
+        core::mem::swap(&mut self.set1, &mut self.set2);
+    }
+
+    /// Returns the memory usage, in bytes, used by this pair of sparse sets.
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.set1.memory_usage() + self.set2.memory_usage()
+    }
+}
+
+/// A sparse set used for representing ordered NFA states.
+///
+/// This supports constant time addition and membership testing. Clearing an
+/// entire set can also be done in constant time. Iteration yields elements
+/// in the order in which they were inserted.
+///
+/// The data structure is based on: https://research.swtch.com/sparse
+/// Note though that we don't actually use uninitialized memory. We generally
+/// reuse sparse sets, so the initial allocation cost is bareable. However, its
+/// other properties listed above are extremely useful.
+#[derive(Clone)]
+pub(crate) struct SparseSet {
+    /// The number of elements currently in this set.
+    len: usize,
+    /// Dense contains the ids in the order in which they were inserted.
+    dense: Vec<StateID>,
+    /// Sparse maps ids to their location in dense.
+    ///
+    /// A state ID is in the set if and only if
+    /// sparse[id] < dense.len() && id == dense[sparse[id]].
+    sparse: Vec<StateID>,
+}
+
+impl SparseSet {
+    /// Create a new sparse set with the given capacity.
+    ///
+    /// Sparse sets have a fixed size and they cannot grow. Attempting to
+    /// insert more distinct elements than the total capacity of the set will
+    /// result in a panic.
+    ///
+    /// This panics if the capacity given is bigger than `StateID::LIMIT`.
+    #[inline]
+    pub(crate) fn new(capacity: usize) -> SparseSet {
+        let mut set = SparseSet { len: 0, dense: vec![], sparse: vec![] };
+        set.resize(capacity);
+        set
+    }
+
+    /// Resizes this sparse set to have the new capacity given.
+    ///
+    /// This set is automatically cleared.
+    ///
+    /// This panics if the capacity given is bigger than `StateID::LIMIT`.
+    #[inline]
+    pub(crate) fn resize(&mut self, new_capacity: usize) {
+        assert!(
+            new_capacity <= StateID::LIMIT,
+            "sparse set capacity cannot excced {:?}",
+            StateID::LIMIT
+        );
+        self.clear();
+        self.dense.resize(new_capacity, StateID::ZERO);
+        self.sparse.resize(new_capacity, StateID::ZERO);
+    }
+
+    /// Returns the capacity of this set.
+    ///
+    /// The capacity represents a fixed limit on the number of distinct
+    /// elements that are allowed in this set. The capacity cannot be changed.
+    #[inline]
+    pub(crate) fn capacity(&self) -> usize {
+        self.dense.len()
+    }
+
+    /// Returns the number of elements in this set.
+    #[inline]
+    pub(crate) fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns true if and only if this set is empty.
+    #[inline]
+    pub(crate) fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Insert the state ID value into this set and return true if the given
+    /// state ID was not previously in this set.
+    ///
+    /// This operation is idempotent. If the given value is already in this
+    /// set, then this is a no-op.
+    ///
+    /// If more than `capacity` ids are inserted, then this panics.
+    ///
+    /// This is marked as inline(always) since the compiler won't inline it
+    /// otherwise, and it's a fairly hot piece of code in DFA determinization.
+    #[inline(always)]
+    pub(crate) fn insert(&mut self, value: StateID) -> bool {
+        if self.contains(value) {
+            return false;
+        }
+
+        let i = self.len();
+        assert!(
+            i < self.capacity(),
+            "{:?} exceeds capacity of {:?} when inserting {:?}",
+            i,
+            self.capacity(),
+            value,
+        );
+        // OK since i < self.capacity() and self.capacity() is guaranteed to
+        // be <= StateID::LIMIT.
+        let id = StateID::new_unchecked(i);
+        self.dense[id] = value;
+        self.sparse[value] = id;
+        self.len += 1;
+        true
+    }
+
+    /// Returns true if and only if this set contains the given value.
+    #[inline]
+    pub(crate) fn contains(&self, value: StateID) -> bool {
+        let i = self.sparse[value];
+        i.as_usize() < self.len() && self.dense[i] == value
+    }
+
+    /// Returns the ith inserted element from this set.
+    ///
+    /// Panics when i >= self.len().
+    #[inline]
+    pub(crate) fn get(&self, i: usize) -> StateID {
+        self.dense[i]
+    }
+
+    /// Clear this set such that it has no members.
+    #[inline]
+    pub(crate) fn clear(&mut self) {
+        self.len = 0;
+    }
+
+    /// Returns the heap memory usage, in bytes, used by this sparse set.
+    #[inline]
+    pub(crate) fn memory_usage(&self) -> usize {
+        2 * self.dense.len() * StateID::SIZE
+    }
+}
+
+impl core::fmt::Debug for SparseSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let elements: Vec<StateID> = self.into_iter().collect();
+        f.debug_tuple("SparseSet").field(&elements).finish()
+    }
+}
+
+/// An iterator over all elements in a sparse set.
+///
+/// The lifetime `'a` refers to the lifetime of the set being iterated over.
+#[derive(Debug)]
+pub(crate) struct SparseSetIter<'a>(core::slice::Iter<'a, StateID>);
+
+impl<'a> IntoIterator for &'a SparseSet {
+    type Item = StateID;
+    type IntoIter = SparseSetIter<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        SparseSetIter(self.dense[..self.len()].iter())
+    }
+}
+
+impl<'a> Iterator for SparseSetIter<'a> {
+    type Item = StateID;
+
+    #[inline(always)]
+    fn next(&mut self) -> Option<StateID> {
+        self.0.next().map(|value| *value)
+    }
+}
diff --git a/vendor/regex-automata/src/util/start.rs b/vendor/regex-automata/src/util/start.rs
new file mode 100644
index 000000000..3c756fc26
--- /dev/null
+++ b/vendor/regex-automata/src/util/start.rs
@@ -0,0 +1,109 @@
+/// Represents the four possible starting configurations of a DFA search.
+///
+/// The starting configuration is determined by inspecting the the beginning of
+/// the haystack (up to 1 byte). Ultimately, this along with a pattern ID (if
+/// specified) is what selects the start state to use in a DFA.
+///
+/// In a DFA that doesn't have starting states for each pattern, then it will
+/// have a maximum of four DFA start states. If the DFA was compiled with start
+/// states for each pattern, then it will have a maximum of four DFA start
+/// states for searching for any pattern, and then another maximum of four DFA
+/// start states for executing an anchored search for each pattern.
+///
+/// This ends up being represented as a table in the DFA (whether lazy or fully
+/// built) where the stride of that table is 4, and each entry is an index into
+/// the state transition table. Note though that multiple entries in the table
+/// might point to the same state if the states would otherwise be equivalent.
+/// (This is guaranteed by DFA minimization and may even be accomplished by
+/// normal determinization, since it attempts to reuse equivalent states too.)
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(crate) enum Start {
+    /// This occurs when the starting position is not any of the ones below.
+    NonWordByte = 0,
+    /// This occurs when the byte immediately preceding the start of the search
+    /// is an ASCII word byte.
+    WordByte = 1,
+    /// This occurs when the starting position of the search corresponds to the
+    /// beginning of the haystack.
+    Text = 2,
+    /// This occurs when the byte immediately preceding the start of the search
+    /// is a line terminator. Specifically, `\n`.
+    Line = 3,
+}
+
+impl Start {
+    /// Return the starting state corresponding to the given integer. If no
+    /// starting state exists for the given integer, then None is returned.
+    pub(crate) fn from_usize(n: usize) -> Option<Start> {
+        match n {
+            0 => Some(Start::NonWordByte),
+            1 => Some(Start::WordByte),
+            2 => Some(Start::Text),
+            3 => Some(Start::Line),
+            _ => None,
+        }
+    }
+
+    /// Returns the total number of starting state configurations.
+    pub(crate) fn count() -> usize {
+        4
+    }
+
+    /// Returns the starting state configuration for the given search
+    /// parameters. If the given offset range is not valid, then this panics.
+    #[inline(always)]
+    pub(crate) fn from_position_fwd(
+        bytes: &[u8],
+        start: usize,
+        end: usize,
+    ) -> Start {
+        assert!(
+            bytes.get(start..end).is_some(),
+            "{}..{} is invalid",
+            start,
+            end
+        );
+        if start == 0 {
+            Start::Text
+        } else if bytes[start - 1] == b'\n' {
+            Start::Line
+        } else if crate::util::is_word_byte(bytes[start - 1]) {
+            Start::WordByte
+        } else {
+            Start::NonWordByte
+        }
+    }
+
+    /// Returns the starting state configuration for a reverse search with the
+    /// given search parameters. If the given offset range is not valid, then
+    /// this panics.
+    #[inline(always)]
+    pub(crate) fn from_position_rev(
+        bytes: &[u8],
+        start: usize,
+        end: usize,
+    ) -> Start {
+        assert!(
+            bytes.get(start..end).is_some(),
+            "{}..{} is invalid",
+            start,
+            end
+        );
+        if end == bytes.len() {
+            Start::Text
+        } else if bytes[end] == b'\n' {
+            Start::Line
+        } else if crate::util::is_word_byte(bytes[end]) {
+            Start::WordByte
+        } else {
+            Start::NonWordByte
+        }
+    }
+
+    /// Return this starting configuration as an integer. It is guaranteed to
+    /// be less than `Start::count()`.
+    #[inline(always)]
+    pub(crate) fn as_usize(&self) -> usize {
+        *self as usize
+    }
+}
diff --git a/vendor/regex-automata/src/util/syntax.rs b/vendor/regex-automata/src/util/syntax.rs
new file mode 100644
index 000000000..88beeee75
--- /dev/null
+++ b/vendor/regex-automata/src/util/syntax.rs
@@ -0,0 +1,272 @@
+use regex_syntax::ParserBuilder;
+
+/// A common set of configuration options that apply to the syntax of a regex.
+///
+/// This represents a group of configuration options that specifically apply
+/// to how the concrete syntax of a regular expression is interpreted. In
+/// particular, they are generally forwarded to the
+/// [`ParserBuilder`](https://docs.rs/regex-syntax/*/regex_syntax/struct.ParserBuilder.html)
+/// in the
+/// [`regex-syntax`](https://docs.rs/regex-syntax)
+/// crate when building a regex from its concrete syntax directly.
+///
+/// These options are defined as a group since they apply to every regex engine
+/// in this crate. Instead of re-defining them on every engine's builder, they
+/// are instead provided here as one cohesive unit.
+#[derive(Clone, Copy, Debug)]
+pub struct SyntaxConfig {
+    case_insensitive: bool,
+    multi_line: bool,
+    dot_matches_new_line: bool,
+    swap_greed: bool,
+    ignore_whitespace: bool,
+    unicode: bool,
+    utf8: bool,
+    nest_limit: u32,
+    octal: bool,
+}
+
+impl SyntaxConfig {
+    /// Return a new default syntax configuration.
+    pub fn new() -> SyntaxConfig {
+        // These defaults match the ones used in regex-syntax.
+        SyntaxConfig {
+            case_insensitive: false,
+            multi_line: false,
+            dot_matches_new_line: false,
+            swap_greed: false,
+            ignore_whitespace: false,
+            unicode: true,
+            utf8: true,
+            nest_limit: 250,
+            octal: false,
+        }
+    }
+
+    /// Enable or disable the case insensitive flag by default.
+    ///
+    /// When Unicode mode is enabled, case insensitivity is Unicode-aware.
+    /// Specifically, it will apply the "simple" case folding rules as
+    /// specified by Unicode.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `i` flag.
+    pub fn case_insensitive(mut self, yes: bool) -> SyntaxConfig {
+        self.case_insensitive = yes;
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag by default.
+    ///
+    /// When this is enabled, the `^` and `$` look-around assertions will
+    /// match immediately after and immediately before a new line character,
+    /// respectively. Note that the `\A` and `\z` look-around assertions are
+    /// unaffected by this setting and always correspond to matching at the
+    /// beginning and end of the input.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `m` flag.
+    pub fn multi_line(mut self, yes: bool) -> SyntaxConfig {
+        self.multi_line = yes;
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag by default.
+    ///
+    /// When this is enabled, `.` will match any character. When it's disabled,
+    /// then `.` will match any character except for a new line character.
+    ///
+    /// Note that `.` is impacted by whether the "unicode" setting is enabled
+    /// or not. When Unicode is enabled (the defualt), `.` will match any UTF-8
+    /// encoding of any Unicode scalar value (sans a new line, depending on
+    /// whether this "dot matches new line" option is enabled). When Unicode
+    /// mode is disabled, `.` will match any byte instead. Because of this,
+    /// when Unicode mode is disabled, `.` can only be used when the "allow
+    /// invalid UTF-8" option is enabled, since `.` could otherwise match
+    /// invalid UTF-8.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `s` flag.
+    pub fn dot_matches_new_line(mut self, yes: bool) -> SyntaxConfig {
+        self.dot_matches_new_line = yes;
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag by default.
+    ///
+    /// When this is enabled, `.*` (for example) will become ungreedy and `.*?`
+    /// will become greedy.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `U` flag.
+    pub fn swap_greed(mut self, yes: bool) -> SyntaxConfig {
+        self.swap_greed = yes;
+        self
+    }
+
+    /// Enable verbose mode in the regular expression.
+    ///
+    /// When enabled, verbose mode permits insigificant whitespace in many
+    /// places in the regular expression, as well as comments. Comments are
+    /// started using `#` and continue until the end of the line.
+    ///
+    /// By default, this is disabled. It may be selectively enabled in the
+    /// regular expression by using the `x` flag regardless of this setting.
+    pub fn ignore_whitespace(mut self, yes: bool) -> SyntaxConfig {
+        self.ignore_whitespace = yes;
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    ///
+    /// By default this is **enabled**. It may alternatively be selectively
+    /// disabled in the regular expression itself via the `u` flag.
+    ///
+    /// Note that unless "allow invalid UTF-8" is enabled (it's disabled by
+    /// default), a regular expression will fail to parse if Unicode mode is
+    /// disabled and a sub-expression could possibly match invalid UTF-8.
+    ///
+    /// **WARNING**: Unicode mode can greatly increase the size of the compiled
+    /// DFA, which can noticeably impact both memory usage and compilation
+    /// time. This is especially noticeable if your regex contains character
+    /// classes like `\w` that are impacted by whether Unicode is enabled or
+    /// not. If Unicode is not necessary, you are encouraged to disable it.
+    pub fn unicode(mut self, yes: bool) -> SyntaxConfig {
+        self.unicode = yes;
+        self
+    }
+
+    /// When disabled, the builder will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// For example, when [`SyntaxConfig::unicode`] is disabled, then
+    /// expressions like `[^a]` may match invalid UTF-8 since they can match
+    /// any single byte that is not `a`. By default, these sub-expressions
+    /// are disallowed to avoid returning offsets that split a UTF-8
+    /// encoded codepoint. However, in cases where matching at arbitrary
+    /// locations is desired, this option can be disabled to permit all such
+    /// sub-expressions.
+    ///
+    /// When enabled (the default), the builder is guaranteed to produce a
+    /// regex that will only ever match valid UTF-8 (otherwise, the builder
+    /// will return an error).
+    pub fn utf8(mut self, yes: bool) -> SyntaxConfig {
+        self.utf8 = yes;
+        self
+    }
+
+    /// Set the nesting limit used for the regular expression parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow when building a finite automaton from a regular expression's
+    /// abstract syntax tree. In particular, construction currently uses
+    /// recursion. In the future, the implementation may stop using recursion
+    /// and this option will no longer be necessary.
+    ///
+    /// This limit is not checked until the entire AST is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since the parser will
+    /// limit itself to heap space proportional to the lenth of the pattern
+    /// string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation AST item, which results
+    /// in a nest depth of `1`. In general, a nest limit is not something that
+    /// manifests in an obvious way in the concrete syntax, therefore, it
+    /// should not be used in a granular way.
+    pub fn nest_limit(mut self, limit: u32) -> SyntaxConfig {
+        self.nest_limit = limit;
+        self
+    }
+
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\1` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(mut self, yes: bool) -> SyntaxConfig {
+        self.octal = yes;
+        self
+    }
+
+    /// Returns whether "unicode" mode is enabled.
+    pub fn get_unicode(&self) -> bool {
+        self.unicode
+    }
+
+    /// Returns whether "case insensitive" mode is enabled.
+    pub fn get_case_insensitive(&self) -> bool {
+        self.case_insensitive
+    }
+
+    /// Returns whether "multi line" mode is enabled.
+    pub fn get_multi_line(&self) -> bool {
+        self.multi_line
+    }
+
+    /// Returns whether "dot matches new line" mode is enabled.
+    pub fn get_dot_matches_new_line(&self) -> bool {
+        self.dot_matches_new_line
+    }
+
+    /// Returns whether "swap greed" mode is enabled.
+    pub fn get_swap_greed(&self) -> bool {
+        self.swap_greed
+    }
+
+    /// Returns whether "ignore whitespace" mode is enabled.
+    pub fn get_ignore_whitespace(&self) -> bool {
+        self.ignore_whitespace
+    }
+
+    /// Returns whether UTF-8 mode is enabled.
+    pub fn get_utf8(&self) -> bool {
+        self.utf8
+    }
+
+    /// Returns the "nest limit" setting.
+    pub fn get_nest_limit(&self) -> u32 {
+        self.nest_limit
+    }
+
+    /// Returns whether "octal" mode is enabled.
+    pub fn get_octal(&self) -> bool {
+        self.octal
+    }
+
+    /// Applies this configuration to the given parser.
+    pub(crate) fn apply(&self, builder: &mut ParserBuilder) {
+        builder
+            .unicode(self.unicode)
+            .case_insensitive(self.case_insensitive)
+            .multi_line(self.multi_line)
+            .dot_matches_new_line(self.dot_matches_new_line)
+            .swap_greed(self.swap_greed)
+            .ignore_whitespace(self.ignore_whitespace)
+            .allow_invalid_utf8(!self.utf8)
+            .nest_limit(self.nest_limit)
+            .octal(self.octal);
+    }
+}
+
+impl Default for SyntaxConfig {
+    fn default() -> SyntaxConfig {
+        SyntaxConfig::new()
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:18:32 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:18:32 +0000
commit	4547b622d8d29df964fa2914213088b148c498fc (patch)
tree	9fc6b25f3c3add6b745be9a2400a6e96140046e9 /vendor/regex-automata/src/util
parent	Releasing progress-linux version 1.66.0+dfsg1-1~progress7.99u1. (diff)
download	rustc-4547b622d8d29df964fa2914213088b148c498fc.tar.xz rustc-4547b622d8d29df964fa2914213088b148c498fc.zip