12 files changed, 4459 insertions, 0 deletions
diff --git a/third_party/rust/aho-corasick/src/util/alphabet.rs b/third_party/rust/aho-corasick/src/util/alphabet.rs
new file mode 100644
index 0000000000..69724fa3ab
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/alphabet.rs
@@ -0,0 +1,409 @@
+use crate::util::int::Usize;
+
+/// A representation of byte oriented equivalence classes.
+///
+/// This is used in finite state machines to reduce the size of the transition
+/// table. This can have a particularly large impact not only on the total size
+/// of an FSM, but also on FSM build times because it reduces the number of
+/// transitions that need to be visited/set.
+#[derive(Clone, Copy)]
+pub(crate) struct ByteClasses([u8; 256]);
+
+impl ByteClasses {
+    /// Creates a new set of equivalence classes where all bytes are mapped to
+    /// the same class.
+    pub(crate) fn empty() -> ByteClasses {
+        ByteClasses([0; 256])
+    }
+
+    /// Creates a new set of equivalence classes where each byte belongs to
+    /// its own equivalence class.
+    pub(crate) fn singletons() -> ByteClasses {
+        let mut classes = ByteClasses::empty();
+        for b in 0..=255 {
+            classes.set(b, b);
+        }
+        classes
+    }
+
+    /// Set the equivalence class for the given byte.
+    #[inline]
+    pub(crate) fn set(&mut self, byte: u8, class: u8) {
+        self.0[usize::from(byte)] = class;
+    }
+
+    /// Get the equivalence class for the given byte.
+    #[inline]
+    pub(crate) fn get(&self, byte: u8) -> u8 {
+        self.0[usize::from(byte)]
+    }
+
+    /// Return the total number of elements in the alphabet represented by
+    /// these equivalence classes. Equivalently, this returns the total number
+    /// of equivalence classes.
+    #[inline]
+    pub(crate) fn alphabet_len(&self) -> usize {
+        // Add one since the number of equivalence classes is one bigger than
+        // the last one.
+        usize::from(self.0[255]) + 1
+    }
+
+    /// Returns the stride, as a base-2 exponent, required for these
+    /// equivalence classes.
+    ///
+    /// The stride is always the smallest power of 2 that is greater than or
+    /// equal to the alphabet length. This is done so that converting between
+    /// state IDs and indices can be done with shifts alone, which is much
+    /// faster than integer division. The "stride2" is the exponent. i.e.,
+    /// `2^stride2 = stride`.
+    pub(crate) fn stride2(&self) -> usize {
+        let zeros = self.alphabet_len().next_power_of_two().trailing_zeros();
+        usize::try_from(zeros).unwrap()
+    }
+
+    /// Returns the stride for these equivalence classes, which corresponds
+    /// to the smallest power of 2 greater than or equal to the number of
+    /// equivalence classes.
+    pub(crate) fn stride(&self) -> usize {
+        1 << self.stride2()
+    }
+
+    /// Returns true if and only if every byte in this class maps to its own
+    /// equivalence class. Equivalently, there are 257 equivalence classes
+    /// and each class contains exactly one byte (plus the special EOI class).
+    #[inline]
+    pub(crate) fn is_singleton(&self) -> bool {
+        self.alphabet_len() == 256
+    }
+
+    /// Returns an iterator over all equivalence classes in this set.
+    pub(crate) fn iter(&self) -> ByteClassIter {
+        ByteClassIter { it: 0..self.alphabet_len() }
+    }
+
+    /// Returns an iterator of the bytes in the given equivalence class.
+    pub(crate) fn elements(&self, class: u8) -> ByteClassElements {
+        ByteClassElements { classes: self, class, bytes: 0..=255 }
+    }
+
+    /// Returns an iterator of byte ranges in the given equivalence class.
+    ///
+    /// That is, a sequence of contiguous ranges are returned. Typically, every
+    /// class maps to a single contiguous range.
+    fn element_ranges(&self, class: u8) -> ByteClassElementRanges {
+        ByteClassElementRanges { elements: self.elements(class), range: None }
+    }
+}
+
+impl core::fmt::Debug for ByteClasses {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        if self.is_singleton() {
+            write!(f, "ByteClasses(<one-class-per-byte>)")
+        } else {
+            write!(f, "ByteClasses(")?;
+            for (i, class) in self.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{:?} => [", class)?;
+                for (start, end) in self.element_ranges(class) {
+                    if start == end {
+                        write!(f, "{:?}", start)?;
+                    } else {
+                        write!(f, "{:?}-{:?}", start, end)?;
+                    }
+                }
+                write!(f, "]")?;
+            }
+            write!(f, ")")
+        }
+    }
+}
+
+/// An iterator over each equivalence class.
+#[derive(Debug)]
+pub(crate) struct ByteClassIter {
+    it: core::ops::Range<usize>,
+}
+
+impl Iterator for ByteClassIter {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<u8> {
+        self.it.next().map(|class| class.as_u8())
+    }
+}
+
+/// An iterator over all elements in a specific equivalence class.
+#[derive(Debug)]
+pub(crate) struct ByteClassElements<'a> {
+    classes: &'a ByteClasses,
+    class: u8,
+    bytes: core::ops::RangeInclusive<u8>,
+}
+
+impl<'a> Iterator for ByteClassElements<'a> {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<u8> {
+        while let Some(byte) = self.bytes.next() {
+            if self.class == self.classes.get(byte) {
+                return Some(byte);
+            }
+        }
+        None
+    }
+}
+
+/// An iterator over all elements in an equivalence class expressed as a
+/// sequence of contiguous ranges.
+#[derive(Debug)]
+pub(crate) struct ByteClassElementRanges<'a> {
+    elements: ByteClassElements<'a>,
+    range: Option<(u8, u8)>,
+}
+
+impl<'a> Iterator for ByteClassElementRanges<'a> {
+    type Item = (u8, u8);
+
+    fn next(&mut self) -> Option<(u8, u8)> {
+        loop {
+            let element = match self.elements.next() {
+                None => return self.range.take(),
+                Some(element) => element,
+            };
+            match self.range.take() {
+                None => {
+                    self.range = Some((element, element));
+                }
+                Some((start, end)) => {
+                    if usize::from(end) + 1 != usize::from(element) {
+                        self.range = Some((element, element));
+                        return Some((start, end));
+                    }
+                    self.range = Some((start, element));
+                }
+            }
+        }
+    }
+}
+
+/// A partitioning of bytes into equivalence classes.
+///
+/// A byte class set keeps track of an *approximation* of equivalence classes
+/// of bytes during NFA construction. That is, every byte in an equivalence
+/// class cannot discriminate between a match and a non-match.
+///
+/// Note that this may not compute the minimal set of equivalence classes.
+/// Basically, any byte in a pattern given to the noncontiguous NFA builder
+/// will automatically be treated as its own equivalence class. All other
+/// bytes---any byte not in any pattern---will be treated as their own
+/// equivalence classes. In theory, all bytes not in any pattern should
+/// be part of a single equivalence class, but in practice, we only treat
+/// contiguous ranges of bytes as an equivalence class. So the number of
+/// classes computed may be bigger than necessary. This usually doesn't make
+/// much of a difference, and keeps the implementation simple.
+#[derive(Clone, Debug)]
+pub(crate) struct ByteClassSet(ByteSet);
+
+impl Default for ByteClassSet {
+    fn default() -> ByteClassSet {
+        ByteClassSet::empty()
+    }
+}
+
+impl ByteClassSet {
+    /// Create a new set of byte classes where all bytes are part of the same
+    /// equivalence class.
+    pub(crate) fn empty() -> Self {
+        ByteClassSet(ByteSet::empty())
+    }
+
+    /// Indicate the the range of byte given (inclusive) can discriminate a
+    /// match between it and all other bytes outside of the range.
+    pub(crate) fn set_range(&mut self, start: u8, end: u8) {
+        debug_assert!(start <= end);
+        if start > 0 {
+            self.0.add(start - 1);
+        }
+        self.0.add(end);
+    }
+
+    /// Convert this boolean set to a map that maps all byte values to their
+    /// corresponding equivalence class. The last mapping indicates the largest
+    /// equivalence class identifier (which is never bigger than 255).
+    pub(crate) fn byte_classes(&self) -> ByteClasses {
+        let mut classes = ByteClasses::empty();
+        let mut class = 0u8;
+        let mut b = 0u8;
+        loop {
+            classes.set(b, class);
+            if b == 255 {
+                break;
+            }
+            if self.0.contains(b) {
+                class = class.checked_add(1).unwrap();
+            }
+            b = b.checked_add(1).unwrap();
+        }
+        classes
+    }
+}
+
+/// A simple set of bytes that is reasonably cheap to copy and allocation free.
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
+pub(crate) struct ByteSet {
+    bits: BitSet,
+}
+
+/// The representation of a byte set. Split out so that we can define a
+/// convenient Debug impl for it while keeping "ByteSet" in the output.
+#[derive(Clone, Copy, Default, Eq, PartialEq)]
+struct BitSet([u128; 2]);
+
+impl ByteSet {
+    /// Create an empty set of bytes.
+    pub(crate) fn empty() -> ByteSet {
+        ByteSet { bits: BitSet([0; 2]) }
+    }
+
+    /// Add a byte to this set.
+    ///
+    /// If the given byte already belongs to this set, then this is a no-op.
+    pub(crate) fn add(&mut self, byte: u8) {
+        let bucket = byte / 128;
+        let bit = byte % 128;
+        self.bits.0[usize::from(bucket)] |= 1 << bit;
+    }
+
+    /// Return true if and only if the given byte is in this set.
+    pub(crate) fn contains(&self, byte: u8) -> bool {
+        let bucket = byte / 128;
+        let bit = byte % 128;
+        self.bits.0[usize::from(bucket)] & (1 << bit) > 0
+    }
+}
+
+impl core::fmt::Debug for BitSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let mut fmtd = f.debug_set();
+        for b in 0u8..=255 {
+            if (ByteSet { bits: *self }).contains(b) {
+                fmtd.entry(&b);
+            }
+        }
+        fmtd.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use alloc::{vec, vec::Vec};
+
+    use super::*;
+
+    #[test]
+    fn byte_classes() {
+        let mut set = ByteClassSet::empty();
+        set.set_range(b'a', b'z');
+
+        let classes = set.byte_classes();
+        assert_eq!(classes.get(0), 0);
+        assert_eq!(classes.get(1), 0);
+        assert_eq!(classes.get(2), 0);
+        assert_eq!(classes.get(b'a' - 1), 0);
+        assert_eq!(classes.get(b'a'), 1);
+        assert_eq!(classes.get(b'm'), 1);
+        assert_eq!(classes.get(b'z'), 1);
+        assert_eq!(classes.get(b'z' + 1), 2);
+        assert_eq!(classes.get(254), 2);
+        assert_eq!(classes.get(255), 2);
+
+        let mut set = ByteClassSet::empty();
+        set.set_range(0, 2);
+        set.set_range(4, 6);
+        let classes = set.byte_classes();
+        assert_eq!(classes.get(0), 0);
+        assert_eq!(classes.get(1), 0);
+        assert_eq!(classes.get(2), 0);
+        assert_eq!(classes.get(3), 1);
+        assert_eq!(classes.get(4), 2);
+        assert_eq!(classes.get(5), 2);
+        assert_eq!(classes.get(6), 2);
+        assert_eq!(classes.get(7), 3);
+        assert_eq!(classes.get(255), 3);
+    }
+
+    #[test]
+    fn full_byte_classes() {
+        let mut set = ByteClassSet::empty();
+        for b in 0u8..=255 {
+            set.set_range(b, b);
+        }
+        assert_eq!(set.byte_classes().alphabet_len(), 256);
+    }
+
+    #[test]
+    fn elements_typical() {
+        let mut set = ByteClassSet::empty();
+        set.set_range(b'b', b'd');
+        set.set_range(b'g', b'm');
+        set.set_range(b'z', b'z');
+        let classes = set.byte_classes();
+        // class 0: \x00-a
+        // class 1: b-d
+        // class 2: e-f
+        // class 3: g-m
+        // class 4: n-y
+        // class 5: z-z
+        // class 6: \x7B-\xFF
+        assert_eq!(classes.alphabet_len(), 7);
+
+        let elements = classes.elements(0).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 98);
+        assert_eq!(elements[0], b'\x00');
+        assert_eq!(elements[97], b'a');
+
+        let elements = classes.elements(1).collect::<Vec<_>>();
+        assert_eq!(elements, vec![b'b', b'c', b'd'],);
+
+        let elements = classes.elements(2).collect::<Vec<_>>();
+        assert_eq!(elements, vec![b'e', b'f'],);
+
+        let elements = classes.elements(3).collect::<Vec<_>>();
+        assert_eq!(elements, vec![b'g', b'h', b'i', b'j', b'k', b'l', b'm',],);
+
+        let elements = classes.elements(4).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 12);
+        assert_eq!(elements[0], b'n');
+        assert_eq!(elements[11], b'y');
+
+        let elements = classes.elements(5).collect::<Vec<_>>();
+        assert_eq!(elements, vec![b'z']);
+
+        let elements = classes.elements(6).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 133);
+        assert_eq!(elements[0], b'\x7B');
+        assert_eq!(elements[132], b'\xFF');
+    }
+
+    #[test]
+    fn elements_singletons() {
+        let classes = ByteClasses::singletons();
+        assert_eq!(classes.alphabet_len(), 256);
+
+        let elements = classes.elements(b'a').collect::<Vec<_>>();
+        assert_eq!(elements, vec![b'a']);
+    }
+
+    #[test]
+    fn elements_empty() {
+        let classes = ByteClasses::empty();
+        assert_eq!(classes.alphabet_len(), 1);
+
+        let elements = classes.elements(0).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 256);
+        assert_eq!(elements[0], b'\x00');
+        assert_eq!(elements[255], b'\xFF');
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/buffer.rs b/third_party/rust/aho-corasick/src/util/buffer.rs
new file mode 100644
index 0000000000..e9e982af58
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/buffer.rs
@@ -0,0 +1,124 @@
+use alloc::{vec, vec::Vec};
+
+/// The default buffer capacity that we use for the stream buffer.
+const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
+
+/// A fairly simple roll buffer for supporting stream searches.
+///
+/// This buffer acts as a temporary place to store a fixed amount of data when
+/// reading from a stream. Its central purpose is to allow "rolling" some
+/// suffix of the data to the beginning of the buffer before refilling it with
+/// more data from the stream. For example, let's say we are trying to match
+/// "foobar" on a stream. When we report the match, we'd like to not only
+/// report the correct offsets at which the match occurs, but also the matching
+/// bytes themselves. So let's say our stream is a file with the following
+/// contents: `test test foobar test test`. Now assume that we happen to read
+/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
+/// Naively, it would not be possible to report a single contiguous `foobar`
+/// match, but this roll buffer allows us to do that. Namely, after the second
+/// read, the contents of the buffer should be `st foobar test test`, where the
+/// search should ultimately resume immediately after `foo`. (The prefix `st `
+/// is included because the roll buffer saves N bytes at the end of the buffer,
+/// where N is the maximum possible length of a match.)
+///
+/// A lot of the logic for dealing with this is unfortunately split out between
+/// this roll buffer and the `StreamChunkIter`.
+///
+/// Note also that this buffer is not actually required to just report matches.
+/// Because a `Match` is just some offsets. But it *is* required for supporting
+/// things like `try_stream_replace_all` because that needs some mechanism for
+/// knowing which bytes in the stream correspond to a match and which don't. So
+/// when a match occurs across two `read` calls, *something* needs to retain
+/// the bytes from the previous `read` call because you don't know before the
+/// second read call whether a match exists or not.
+#[derive(Debug)]
+pub(crate) struct Buffer {
+    /// The raw buffer contents. This has a fixed size and never increases.
+    buf: Vec<u8>,
+    /// The minimum size of the buffer, which is equivalent to the maximum
+    /// possible length of a match. This corresponds to the amount that we
+    /// roll
+    min: usize,
+    /// The end of the contents of this buffer.
+    end: usize,
+}
+
+impl Buffer {
+    /// Create a new buffer for stream searching. The minimum buffer length
+    /// given should be the size of the maximum possible match length.
+    pub(crate) fn new(min_buffer_len: usize) -> Buffer {
+        let min = core::cmp::max(1, min_buffer_len);
+        // The minimum buffer amount is also the amount that we roll our
+        // buffer in order to support incremental searching. To this end,
+        // our actual capacity needs to be at least 1 byte bigger than our
+        // minimum amount, otherwise we won't have any overlap. In actuality,
+        // we want our buffer to be a bit bigger than that for performance
+        // reasons, so we set a lower bound of `8 * min`.
+        //
+        // TODO: It would be good to find a way to test the streaming
+        // implementation with the minimal buffer size. For now, we just
+        // uncomment out the next line and comment out the subsequent line.
+        // let capacity = 1 + min;
+        let capacity = core::cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
+        Buffer { buf: vec![0; capacity], min, end: 0 }
+    }
+
+    /// Return the contents of this buffer.
+    #[inline]
+    pub(crate) fn buffer(&self) -> &[u8] {
+        &self.buf[..self.end]
+    }
+
+    /// Return the minimum size of the buffer. The only way a buffer may be
+    /// smaller than this is if the stream itself contains less than the
+    /// minimum buffer amount.
+    #[inline]
+    pub(crate) fn min_buffer_len(&self) -> usize {
+        self.min
+    }
+
+    /// Return all free capacity in this buffer.
+    fn free_buffer(&mut self) -> &mut [u8] {
+        &mut self.buf[self.end..]
+    }
+
+    /// Refill the contents of this buffer by reading as much as possible into
+    /// this buffer's free capacity. If no more bytes could be read, then this
+    /// returns false. Otherwise, this reads until it has filled the buffer
+    /// past the minimum amount.
+    pub(crate) fn fill<R: std::io::Read>(
+        &mut self,
+        mut rdr: R,
+    ) -> std::io::Result<bool> {
+        let mut readany = false;
+        loop {
+            let readlen = rdr.read(self.free_buffer())?;
+            if readlen == 0 {
+                return Ok(readany);
+            }
+            readany = true;
+            self.end += readlen;
+            if self.buffer().len() >= self.min {
+                return Ok(true);
+            }
+        }
+    }
+
+    /// Roll the contents of the buffer so that the suffix of this buffer is
+    /// moved to the front and all other contents are dropped. The size of the
+    /// suffix corresponds precisely to the minimum buffer length.
+    ///
+    /// This should only be called when the entire contents of this buffer have
+    /// been searched.
+    pub(crate) fn roll(&mut self) {
+        let roll_start = self
+            .end
+            .checked_sub(self.min)
+            .expect("buffer capacity should be bigger than minimum amount");
+        let roll_end = roll_start + self.min;
+
+        assert!(roll_end <= self.end);
+        self.buf.copy_within(roll_start..roll_end, 0);
+        self.end = self.min;
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/byte_frequencies.rs b/third_party/rust/aho-corasick/src/util/byte_frequencies.rs
new file mode 100644
index 0000000000..c313b629db
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/byte_frequencies.rs
@@ -0,0 +1,258 @@
+pub const BYTE_FREQUENCIES: [u8; 256] = [
+    55,  // '\x00'
+    52,  // '\x01'
+    51,  // '\x02'
+    50,  // '\x03'
+    49,  // '\x04'
+    48,  // '\x05'
+    47,  // '\x06'
+    46,  // '\x07'
+    45,  // '\x08'
+    103, // '\t'
+    242, // '\n'
+    66,  // '\x0b'
+    67,  // '\x0c'
+    229, // '\r'
+    44,  // '\x0e'
+    43,  // '\x0f'
+    42,  // '\x10'
+    41,  // '\x11'
+    40,  // '\x12'
+    39,  // '\x13'
+    38,  // '\x14'
+    37,  // '\x15'
+    36,  // '\x16'
+    35,  // '\x17'
+    34,  // '\x18'
+    33,  // '\x19'
+    56,  // '\x1a'
+    32,  // '\x1b'
+    31,  // '\x1c'
+    30,  // '\x1d'
+    29,  // '\x1e'
+    28,  // '\x1f'
+    255, // ' '
+    148, // '!'
+    164, // '"'
+    149, // '#'
+    136, // '$'
+    160, // '%'
+    155, // '&'
+    173, // "'"
+    221, // '('
+    222, // ')'
+    134, // '*'
+    122, // '+'
+    232, // ','
+    202, // '-'
+    215, // '.'
+    224, // '/'
+    208, // '0'
+    220, // '1'
+    204, // '2'
+    187, // '3'
+    183, // '4'
+    179, // '5'
+    177, // '6'
+    168, // '7'
+    178, // '8'
+    200, // '9'
+    226, // ':'
+    195, // ';'
+    154, // '<'
+    184, // '='
+    174, // '>'
+    126, // '?'
+    120, // '@'
+    191, // 'A'
+    157, // 'B'
+    194, // 'C'
+    170, // 'D'
+    189, // 'E'
+    162, // 'F'
+    161, // 'G'
+    150, // 'H'
+    193, // 'I'
+    142, // 'J'
+    137, // 'K'
+    171, // 'L'
+    176, // 'M'
+    185, // 'N'
+    167, // 'O'
+    186, // 'P'
+    112, // 'Q'
+    175, // 'R'
+    192, // 'S'
+    188, // 'T'
+    156, // 'U'
+    140, // 'V'
+    143, // 'W'
+    123, // 'X'
+    133, // 'Y'
+    128, // 'Z'
+    147, // '['
+    138, // '\\'
+    146, // ']'
+    114, // '^'
+    223, // '_'
+    151, // '`'
+    249, // 'a'
+    216, // 'b'
+    238, // 'c'
+    236, // 'd'
+    253, // 'e'
+    227, // 'f'
+    218, // 'g'
+    230, // 'h'
+    247, // 'i'
+    135, // 'j'
+    180, // 'k'
+    241, // 'l'
+    233, // 'm'
+    246, // 'n'
+    244, // 'o'
+    231, // 'p'
+    139, // 'q'
+    245, // 'r'
+    243, // 's'
+    251, // 't'
+    235, // 'u'
+    201, // 'v'
+    196, // 'w'
+    240, // 'x'
+    214, // 'y'
+    152, // 'z'
+    182, // '{'
+    205, // '|'
+    181, // '}'
+    127, // '~'
+    27,  // '\x7f'
+    212, // '\x80'
+    211, // '\x81'
+    210, // '\x82'
+    213, // '\x83'
+    228, // '\x84'
+    197, // '\x85'
+    169, // '\x86'
+    159, // '\x87'
+    131, // '\x88'
+    172, // '\x89'
+    105, // '\x8a'
+    80,  // '\x8b'
+    98,  // '\x8c'
+    96,  // '\x8d'
+    97,  // '\x8e'
+    81,  // '\x8f'
+    207, // '\x90'
+    145, // '\x91'
+    116, // '\x92'
+    115, // '\x93'
+    144, // '\x94'
+    130, // '\x95'
+    153, // '\x96'
+    121, // '\x97'
+    107, // '\x98'
+    132, // '\x99'
+    109, // '\x9a'
+    110, // '\x9b'
+    124, // '\x9c'
+    111, // '\x9d'
+    82,  // '\x9e'
+    108, // '\x9f'
+    118, // '\xa0'
+    141, // '¡'
+    113, // '¢'
+    129, // '£'
+    119, // '¤'
+    125, // '¥'
+    165, // '¦'
+    117, // '§'
+    92,  // '¨'
+    106, // '©'
+    83,  // 'ª'
+    72,  // '«'
+    99,  // '¬'
+    93,  // '\xad'
+    65,  // '®'
+    79,  // '¯'
+    166, // '°'
+    237, // '±'
+    163, // '²'
+    199, // '³'
+    190, // '´'
+    225, // 'µ'
+    209, // '¶'
+    203, // '·'
+    198, // '¸'
+    217, // '¹'
+    219, // 'º'
+    206, // '»'
+    234, // '¼'
+    248, // '½'
+    158, // '¾'
+    239, // '¿'
+    255, // 'À'
+    255, // 'Á'
+    255, // 'Â'
+    255, // 'Ã'
+    255, // 'Ä'
+    255, // 'Å'
+    255, // 'Æ'
+    255, // 'Ç'
+    255, // 'È'
+    255, // 'É'
+    255, // 'Ê'
+    255, // 'Ë'
+    255, // 'Ì'
+    255, // 'Í'
+    255, // 'Î'
+    255, // 'Ï'
+    255, // 'Ð'
+    255, // 'Ñ'
+    255, // 'Ò'
+    255, // 'Ó'
+    255, // 'Ô'
+    255, // 'Õ'
+    255, // 'Ö'
+    255, // '×'
+    255, // 'Ø'
+    255, // 'Ù'
+    255, // 'Ú'
+    255, // 'Û'
+    255, // 'Ü'
+    255, // 'Ý'
+    255, // 'Þ'
+    255, // 'ß'
+    255, // 'à'
+    255, // 'á'
+    255, // 'â'
+    255, // 'ã'
+    255, // 'ä'
+    255, // 'å'
+    255, // 'æ'
+    255, // 'ç'
+    255, // 'è'
+    255, // 'é'
+    255, // 'ê'
+    255, // 'ë'
+    255, // 'ì'
+    255, // 'í'
+    255, // 'î'
+    255, // 'ï'
+    255, // 'ð'
+    255, // 'ñ'
+    255, // 'ò'
+    255, // 'ó'
+    255, // 'ô'
+    255, // 'õ'
+    255, // 'ö'
+    255, // '÷'
+    255, // 'ø'
+    255, // 'ù'
+    255, // 'ú'
+    255, // 'û'
+    255, // 'ü'
+    255, // 'ý'
+    255, // 'þ'
+    255, // 'ÿ'
+];
diff --git a/third_party/rust/aho-corasick/src/util/debug.rs b/third_party/rust/aho-corasick/src/util/debug.rs
new file mode 100644
index 0000000000..22b5f2231f
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/debug.rs
@@ -0,0 +1,26 @@
+/// A type that wraps a single byte with a convenient fmt::Debug impl that
+/// escapes the byte.
+pub(crate) struct DebugByte(pub(crate) u8);
+
+impl core::fmt::Debug for DebugByte {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        // Special case ASCII space. It's too hard to read otherwise, so
+        // put quotes around it. I sometimes wonder whether just '\x20' would
+        // be better...
+        if self.0 == b' ' {
+            return write!(f, "' '");
+        }
+        // 10 bytes is enough to cover any output from ascii::escape_default.
+        let mut bytes = [0u8; 10];
+        let mut len = 0;
+        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
+            // capitalize \xab to \xAB
+            if i >= 2 && b'a' <= b && b <= b'f' {
+                b -= 32;
+            }
+            bytes[len] = b;
+            len += 1;
+        }
+        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/error.rs b/third_party/rust/aho-corasick/src/util/error.rs
new file mode 100644
index 0000000000..326d04657b
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/error.rs
@@ -0,0 +1,259 @@
+use crate::util::{
+    primitives::{PatternID, SmallIndex},
+    search::MatchKind,
+};
+
+/// An error that occurred during the construction of an Aho-Corasick
+/// automaton.
+///
+/// Build errors occur when some kind of limit has been exceeded, either in the
+/// number of states, the number of patterns of the length of a pattern. These
+/// limits aren't part of the public API, but they should generally be large
+/// enough to handle most use cases.
+///
+/// When the `std` feature is enabled, this implements the `std::error::Error`
+/// trait.
+#[derive(Clone, Debug)]
+pub struct BuildError {
+    kind: ErrorKind,
+}
+
+/// The kind of error that occurred.
+#[derive(Clone, Debug)]
+enum ErrorKind {
+    /// An error that occurs when allocating a new state would result in an
+    /// identifier that exceeds the capacity of a `StateID`.
+    StateIDOverflow {
+        /// The maximum possible id.
+        max: u64,
+        /// The maximum ID requested.
+        requested_max: u64,
+    },
+    /// An error that occurs when adding a pattern to an Aho-Corasick
+    /// automaton would result in an identifier that exceeds the capacity of a
+    /// `PatternID`.
+    PatternIDOverflow {
+        /// The maximum possible id.
+        max: u64,
+        /// The maximum ID requested.
+        requested_max: u64,
+    },
+    /// Occurs when a pattern string is given to the Aho-Corasick constructor
+    /// that is too long.
+    PatternTooLong {
+        /// The ID of the pattern that was too long.
+        pattern: PatternID,
+        /// The length that was too long.
+        len: usize,
+    },
+}
+
+impl BuildError {
+    pub(crate) fn state_id_overflow(
+        max: u64,
+        requested_max: u64,
+    ) -> BuildError {
+        BuildError { kind: ErrorKind::StateIDOverflow { max, requested_max } }
+    }
+
+    pub(crate) fn pattern_id_overflow(
+        max: u64,
+        requested_max: u64,
+    ) -> BuildError {
+        BuildError {
+            kind: ErrorKind::PatternIDOverflow { max, requested_max },
+        }
+    }
+
+    pub(crate) fn pattern_too_long(
+        pattern: PatternID,
+        len: usize,
+    ) -> BuildError {
+        BuildError { kind: ErrorKind::PatternTooLong { pattern, len } }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for BuildError {}
+
+impl core::fmt::Display for BuildError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match self.kind {
+            ErrorKind::StateIDOverflow { max, requested_max } => {
+                write!(
+                    f,
+                    "state identifier overflow: failed to create state ID \
+                     from {}, which exceeds the max of {}",
+                    requested_max, max,
+                )
+            }
+            ErrorKind::PatternIDOverflow { max, requested_max } => {
+                write!(
+                    f,
+                    "pattern identifier overflow: failed to create pattern ID \
+                     from {}, which exceeds the max of {}",
+                    requested_max, max,
+                )
+            }
+            ErrorKind::PatternTooLong { pattern, len } => {
+                write!(
+                    f,
+                    "pattern {} with length {} exceeds \
+                     the maximum pattern length of {}",
+                    pattern.as_usize(),
+                    len,
+                    SmallIndex::MAX.as_usize(),
+                )
+            }
+        }
+    }
+}
+
+/// An error that occurred during an Aho-Corasick search.
+///
+/// An error that occurs during a search is limited to some kind of
+/// misconfiguration that resulted in an illegal call. Stated differently,
+/// whether an error occurs is not dependent on the specific bytes in the
+/// haystack.
+///
+/// Examples of misconfiguration:
+///
+/// * Executing a stream or overlapping search on a searcher that was built was
+/// something other than [`MatchKind::Standard`](crate::MatchKind::Standard)
+/// semantics.
+/// * Requested an anchored or an unanchored search on a searcher that doesn't
+/// support unanchored or anchored searches, respectively.
+///
+/// When the `std` feature is enabled, this implements the `std::error::Error`
+/// trait.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct MatchError(alloc::boxed::Box<MatchErrorKind>);
+
+impl MatchError {
+    /// Create a new error value with the given kind.
+    ///
+    /// This is a more verbose version of the kind-specific constructors, e.g.,
+    /// `MatchError::unsupported_stream`.
+    pub fn new(kind: MatchErrorKind) -> MatchError {
+        MatchError(alloc::boxed::Box::new(kind))
+    }
+
+    /// Returns a reference to the underlying error kind.
+    pub fn kind(&self) -> &MatchErrorKind {
+        &self.0
+    }
+
+    /// Create a new "invalid anchored search" error. This occurs when the
+    /// caller requests an anchored search but where anchored searches aren't
+    /// supported.
+    ///
+    /// This is the same as calling `MatchError::new` with a
+    /// [`MatchErrorKind::InvalidInputAnchored`] kind.
+    pub fn invalid_input_anchored() -> MatchError {
+        MatchError::new(MatchErrorKind::InvalidInputAnchored)
+    }
+
+    /// Create a new "invalid unanchored search" error. This occurs when the
+    /// caller requests an unanchored search but where unanchored searches
+    /// aren't supported.
+    ///
+    /// This is the same as calling `MatchError::new` with a
+    /// [`MatchErrorKind::InvalidInputUnanchored`] kind.
+    pub fn invalid_input_unanchored() -> MatchError {
+        MatchError::new(MatchErrorKind::InvalidInputUnanchored)
+    }
+
+    /// Create a new "unsupported stream search" error. This occurs when the
+    /// caller requests a stream search while using an Aho-Corasick automaton
+    /// with a match kind other than [`MatchKind::Standard`].
+    ///
+    /// The match kind given should be the match kind of the automaton. It
+    /// should never be `MatchKind::Standard`.
+    pub fn unsupported_stream(got: MatchKind) -> MatchError {
+        MatchError::new(MatchErrorKind::UnsupportedStream { got })
+    }
+
+    /// Create a new "unsupported overlapping search" error. This occurs when
+    /// the caller requests an overlapping search while using an Aho-Corasick
+    /// automaton with a match kind other than [`MatchKind::Standard`].
+    ///
+    /// The match kind given should be the match kind of the automaton. It
+    /// should never be `MatchKind::Standard`.
+    pub fn unsupported_overlapping(got: MatchKind) -> MatchError {
+        MatchError::new(MatchErrorKind::UnsupportedOverlapping { got })
+    }
+
+    /// Create a new "unsupported empty pattern" error. This occurs when the
+    /// caller requests a search for which matching an automaton that contains
+    /// an empty pattern string is not supported.
+    pub fn unsupported_empty() -> MatchError {
+        MatchError::new(MatchErrorKind::UnsupportedEmpty)
+    }
+}
+
+/// The underlying kind of a [`MatchError`].
+///
+/// This is a **non-exhaustive** enum. That means new variants may be added in
+/// a semver-compatible release.
+#[non_exhaustive]
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum MatchErrorKind {
+    /// An error indicating that an anchored search was requested, but from a
+    /// searcher that was built without anchored support.
+    InvalidInputAnchored,
+    /// An error indicating that an unanchored search was requested, but from a
+    /// searcher that was built without unanchored support.
+    InvalidInputUnanchored,
+    /// An error indicating that a stream search was attempted on an
+    /// Aho-Corasick automaton with an unsupported `MatchKind`.
+    UnsupportedStream {
+        /// The match semantics for the automaton that was used.
+        got: MatchKind,
+    },
+    /// An error indicating that an overlapping search was attempted on an
+    /// Aho-Corasick automaton with an unsupported `MatchKind`.
+    UnsupportedOverlapping {
+        /// The match semantics for the automaton that was used.
+        got: MatchKind,
+    },
+    /// An error indicating that the operation requested doesn't support
+    /// automatons that contain an empty pattern string.
+    UnsupportedEmpty,
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for MatchError {}
+
+impl core::fmt::Display for MatchError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        match *self.kind() {
+            MatchErrorKind::InvalidInputAnchored => {
+                write!(f, "anchored searches are not supported or enabled")
+            }
+            MatchErrorKind::InvalidInputUnanchored => {
+                write!(f, "unanchored searches are not supported or enabled")
+            }
+            MatchErrorKind::UnsupportedStream { got } => {
+                write!(
+                    f,
+                    "match kind {:?} does not support stream searching",
+                    got,
+                )
+            }
+            MatchErrorKind::UnsupportedOverlapping { got } => {
+                write!(
+                    f,
+                    "match kind {:?} does not support overlapping searches",
+                    got,
+                )
+            }
+            MatchErrorKind::UnsupportedEmpty => {
+                write!(
+                    f,
+                    "matching with an empty pattern string is not \
+                     supported for this operation",
+                )
+            }
+        }
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/int.rs b/third_party/rust/aho-corasick/src/util/int.rs
new file mode 100644
index 0000000000..28ede7a47f
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/int.rs
@@ -0,0 +1,284 @@
+/*!
+This module provides several integer oriented traits for converting between
+both fixed size integers and integers whose size varies based on the target
+(like `usize`).
+
+The main design principle for this module is to centralize all uses of `as`.
+The thinking here is that `as` makes it very easy to perform accidental lossy
+conversions, and if we centralize all its uses here under more descriptive
+higher level operations, its use and correctness becomes easier to audit.
+
+This was copied mostly wholesale from `regex-automata`.
+
+NOTE: for simplicity, we don't take target pointer width into account here for
+`usize` conversions. Since we currently only panic in debug mode, skipping the
+check when it can be proven it isn't needed at compile time doesn't really
+matter. Now, if we wind up wanting to do as many checks as possible in release
+mode, then we would want to skip those when we know the conversions are always
+non-lossy.
+*/
+
+pub(crate) trait U8 {
+    fn as_usize(self) -> usize;
+}
+
+impl U8 for u8 {
+    fn as_usize(self) -> usize {
+        usize::from(self)
+    }
+}
+
+pub(crate) trait U16 {
+    fn as_usize(self) -> usize;
+    fn low_u8(self) -> u8;
+    fn high_u8(self) -> u8;
+}
+
+impl U16 for u16 {
+    fn as_usize(self) -> usize {
+        usize::from(self)
+    }
+
+    fn low_u8(self) -> u8 {
+        self as u8
+    }
+
+    fn high_u8(self) -> u8 {
+        (self >> 8) as u8
+    }
+}
+
+pub(crate) trait U32 {
+    fn as_usize(self) -> usize;
+    fn low_u8(self) -> u8;
+    fn low_u16(self) -> u16;
+    fn high_u16(self) -> u16;
+}
+
+impl U32 for u32 {
+    #[inline]
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("u32 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn low_u8(self) -> u8 {
+        self as u8
+    }
+
+    fn low_u16(self) -> u16 {
+        self as u16
+    }
+
+    fn high_u16(self) -> u16 {
+        (self >> 16) as u16
+    }
+}
+
+pub(crate) trait U64 {
+    fn as_usize(self) -> usize;
+    fn low_u8(self) -> u8;
+    fn low_u16(self) -> u16;
+    fn low_u32(self) -> u32;
+    fn high_u32(self) -> u32;
+}
+
+impl U64 for u64 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("u64 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn low_u8(self) -> u8 {
+        self as u8
+    }
+
+    fn low_u16(self) -> u16 {
+        self as u16
+    }
+
+    fn low_u32(self) -> u32 {
+        self as u32
+    }
+
+    fn high_u32(self) -> u32 {
+        (self >> 32) as u32
+    }
+}
+
+pub(crate) trait I8 {
+    fn as_usize(self) -> usize;
+    fn to_bits(self) -> u8;
+    fn from_bits(n: u8) -> i8;
+}
+
+impl I8 for i8 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("i8 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn to_bits(self) -> u8 {
+        self as u8
+    }
+
+    fn from_bits(n: u8) -> i8 {
+        n as i8
+    }
+}
+
+pub(crate) trait I32 {
+    fn as_usize(self) -> usize;
+    fn to_bits(self) -> u32;
+    fn from_bits(n: u32) -> i32;
+}
+
+impl I32 for i32 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("i32 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn to_bits(self) -> u32 {
+        self as u32
+    }
+
+    fn from_bits(n: u32) -> i32 {
+        n as i32
+    }
+}
+
+pub(crate) trait I64 {
+    fn as_usize(self) -> usize;
+    fn to_bits(self) -> u64;
+    fn from_bits(n: u64) -> i64;
+}
+
+impl I64 for i64 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("i64 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn to_bits(self) -> u64 {
+        self as u64
+    }
+
+    fn from_bits(n: u64) -> i64 {
+        n as i64
+    }
+}
+
+pub(crate) trait Usize {
+    fn as_u8(self) -> u8;
+    fn as_u16(self) -> u16;
+    fn as_u32(self) -> u32;
+    fn as_u64(self) -> u64;
+}
+
+impl Usize for usize {
+    fn as_u8(self) -> u8 {
+        #[cfg(debug_assertions)]
+        {
+            u8::try_from(self).expect("usize overflowed u8")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u8
+        }
+    }
+
+    fn as_u16(self) -> u16 {
+        #[cfg(debug_assertions)]
+        {
+            u16::try_from(self).expect("usize overflowed u16")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u16
+        }
+    }
+
+    fn as_u32(self) -> u32 {
+        #[cfg(debug_assertions)]
+        {
+            u32::try_from(self).expect("usize overflowed u32")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u32
+        }
+    }
+
+    fn as_u64(self) -> u64 {
+        #[cfg(debug_assertions)]
+        {
+            u64::try_from(self).expect("usize overflowed u64")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u64
+        }
+    }
+}
+
+// Pointers aren't integers, but we convert pointers to integers to perform
+// offset arithmetic in some places. (And no, we don't convert the integers
+// back to pointers.) So add 'as_usize' conversions here too for completeness.
+//
+// These 'as' casts are actually okay because they're always non-lossy. But the
+// idea here is to just try and remove as much 'as' as possible, particularly
+// in this crate where we are being really paranoid about offsets and making
+// sure we don't panic on inputs that might be untrusted. This way, the 'as'
+// casts become easier to audit if they're all in one place, even when some of
+// them are actually okay 100% of the time.
+
+pub(crate) trait Pointer {
+    fn as_usize(self) -> usize;
+}
+
+impl<T> Pointer for *const T {
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
+
+pub(crate) trait PointerMut {
+    fn as_usize(self) -> usize;
+}
+
+impl<T> PointerMut for *mut T {
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/mod.rs b/third_party/rust/aho-corasick/src/util/mod.rs
new file mode 100644
index 0000000000..f7a1ddd07b
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/mod.rs
@@ -0,0 +1,12 @@
+pub(crate) mod alphabet;
+#[cfg(feature = "std")]
+pub(crate) mod buffer;
+pub(crate) mod byte_frequencies;
+pub(crate) mod debug;
+pub(crate) mod error;
+pub(crate) mod int;
+pub(crate) mod prefilter;
+pub(crate) mod primitives;
+pub(crate) mod remapper;
+pub(crate) mod search;
+pub(crate) mod special;
diff --git a/third_party/rust/aho-corasick/src/util/prefilter.rs b/third_party/rust/aho-corasick/src/util/prefilter.rs
new file mode 100644
index 0000000000..f5ddc75b7c
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/prefilter.rs
@@ -0,0 +1,924 @@
+use core::{
+    cmp,
+    fmt::Debug,
+    panic::{RefUnwindSafe, UnwindSafe},
+    u8,
+};
+
+use alloc::{sync::Arc, vec, vec::Vec};
+
+use crate::{
+    packed,
+    util::{
+        alphabet::ByteSet,
+        search::{Match, MatchKind, Span},
+    },
+};
+
+/// A prefilter for accelerating a search.
+///
+/// This crate uses prefilters in the core search implementations to accelerate
+/// common cases. They typically only apply to cases where there are a small
+/// number of patterns (less than 100 or so), but when they do, thoughput can
+/// be boosted considerably, perhaps by an order of magnitude. When a prefilter
+/// is active, it is used whenever a search enters an automaton's start state.
+///
+/// Currently, prefilters cannot be constructed by
+/// callers. A `Prefilter` can only be accessed via the
+/// [`Automaton::prefilter`](crate::automaton::Automaton::prefilter)
+/// method and used to execute a search. In other words, a prefilter can be
+/// used to optimize your own search implementation if necessary, but cannot do
+/// much else. If you have a use case for more APIs, please submit an issue.
+#[derive(Clone, Debug)]
+pub struct Prefilter {
+    finder: Arc<dyn PrefilterI>,
+    memory_usage: usize,
+}
+
+impl Prefilter {
+    /// Execute a search in the haystack within the span given. If a match or
+    /// a possible match is returned, then it is guaranteed to occur within
+    /// the bounds of the span.
+    ///
+    /// If the span provided is invalid for the given haystack, then behavior
+    /// is unspecified.
+    #[inline]
+    pub fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        self.finder.find_in(haystack, span)
+    }
+
+    #[inline]
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.memory_usage
+    }
+}
+
+/// A candidate is the result of running a prefilter on a haystack at a
+/// particular position.
+///
+/// The result is either no match, a confirmed match or a possible match.
+///
+/// When no match is returned, the prefilter is guaranteeing that no possible
+/// match can be found in the haystack, and the caller may trust this. That is,
+/// all correct prefilters must never report false negatives.
+///
+/// In some cases, a prefilter can confirm a match very quickly, in which case,
+/// the caller may use this to stop what it's doing and report the match. In
+/// this case, prefilter implementations must never report a false positive.
+/// In other cases, the prefilter can only report a potential match, in which
+/// case the callers must attempt to confirm the match. In this case, prefilter
+/// implementations are permitted to return false positives.
+#[derive(Clone, Debug)]
+pub enum Candidate {
+    /// No match was found. Since false negatives are not possible, this means
+    /// the search can quit as it is guaranteed not to find another match.
+    None,
+    /// A confirmed match was found. Callers do not need to confirm it.
+    Match(Match),
+    /// The start of a possible match was found. Callers must confirm it before
+    /// reporting it as a match.
+    PossibleStartOfMatch(usize),
+}
+
+impl Candidate {
+    /// Convert this candidate into an option. This is useful when callers
+    /// do not distinguish between true positives and false positives (i.e.,
+    /// the caller must always confirm the match).
+    pub fn into_option(self) -> Option<usize> {
+        match self {
+            Candidate::None => None,
+            Candidate::Match(ref m) => Some(m.start()),
+            Candidate::PossibleStartOfMatch(start) => Some(start),
+        }
+    }
+}
+
+/// A prefilter describes the behavior of fast literal scanners for quickly
+/// skipping past bytes in the haystack that we know cannot possibly
+/// participate in a match.
+trait PrefilterI:
+    Send + Sync + RefUnwindSafe + UnwindSafe + Debug + 'static
+{
+    /// Returns the next possible match candidate. This may yield false
+    /// positives, so callers must confirm a match starting at the position
+    /// returned. This, however, must never produce false negatives. That is,
+    /// this must, at minimum, return the starting position of the next match
+    /// in the given haystack after or at the given position.
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate;
+}
+
+impl<P: PrefilterI + ?Sized> PrefilterI for Arc<P> {
+    #[inline(always)]
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        (**self).find_in(haystack, span)
+    }
+}
+
+/// A builder for constructing the best possible prefilter. When constructed,
+/// this builder will heuristically select the best prefilter it can build,
+/// if any, and discard the rest.
+#[derive(Debug)]
+pub(crate) struct Builder {
+    count: usize,
+    ascii_case_insensitive: bool,
+    start_bytes: StartBytesBuilder,
+    rare_bytes: RareBytesBuilder,
+    memmem: MemmemBuilder,
+    packed: Option<packed::Builder>,
+    // If we run across a condition that suggests we shouldn't use a prefilter
+    // at all (like an empty pattern), then disable prefilters entirely.
+    enabled: bool,
+}
+
+impl Builder {
+    /// Create a new builder for constructing the best possible prefilter.
+    pub(crate) fn new(kind: MatchKind) -> Builder {
+        let pbuilder = kind
+            .as_packed()
+            .map(|kind| packed::Config::new().match_kind(kind).builder());
+        Builder {
+            count: 0,
+            ascii_case_insensitive: false,
+            start_bytes: StartBytesBuilder::new(),
+            rare_bytes: RareBytesBuilder::new(),
+            memmem: MemmemBuilder::default(),
+            packed: pbuilder,
+            enabled: true,
+        }
+    }
+
+    /// Enable ASCII case insensitivity. When set, byte strings added to this
+    /// builder will be interpreted without respect to ASCII case.
+    pub(crate) fn ascii_case_insensitive(mut self, yes: bool) -> Builder {
+        self.ascii_case_insensitive = yes;
+        self.start_bytes = self.start_bytes.ascii_case_insensitive(yes);
+        self.rare_bytes = self.rare_bytes.ascii_case_insensitive(yes);
+        self
+    }
+
+    /// Return a prefilter suitable for quickly finding potential matches.
+    ///
+    /// All patterns added to an Aho-Corasick automaton should be added to this
+    /// builder before attempting to construct the prefilter.
+    pub(crate) fn build(&self) -> Option<Prefilter> {
+        if !self.enabled {
+            debug!("prefilter not enabled, skipping");
+            return None;
+        }
+        // If we only have one pattern, then deferring to memmem is always
+        // the best choice. This is kind of a weird case, because, well, why
+        // use Aho-Corasick if you only have one pattern? But maybe you don't
+        // know exactly how many patterns you'll get up front, and you need to
+        // support the option of multiple patterns. So instead of relying on
+        // the caller to branch and use memmem explicitly, we just do it for
+        // them.
+        if !self.ascii_case_insensitive {
+            if let Some(pre) = self.memmem.build() {
+                debug!("using memmem prefilter");
+                return Some(pre);
+            }
+        }
+        let (packed, patlen, minlen) = if self.ascii_case_insensitive {
+            (None, usize::MAX, 0)
+        } else {
+            let patlen = self.packed.as_ref().map_or(usize::MAX, |p| p.len());
+            let minlen = self.packed.as_ref().map_or(0, |p| p.minimum_len());
+            let packed =
+                self.packed.as_ref().and_then(|b| b.build()).map(|s| {
+                    let memory_usage = s.memory_usage();
+                    debug!(
+                        "built packed prefilter (len: {}, \
+                         minimum pattern len: {}, memory usage: {}) \
+                         for consideration",
+                        patlen, minlen, memory_usage,
+                    );
+                    Prefilter { finder: Arc::new(Packed(s)), memory_usage }
+                });
+            (packed, patlen, minlen)
+        };
+        match (self.start_bytes.build(), self.rare_bytes.build()) {
+            // If we could build both start and rare prefilters, then there are
+            // a few cases in which we'd want to use the start-byte prefilter
+            // over the rare-byte prefilter, since the former has lower
+            // overhead.
+            (prestart @ Some(_), prerare @ Some(_)) => {
+                debug!(
+                    "both start (len={}, rank={}) and \
+                     rare (len={}, rank={}) byte prefilters \
+                     are available",
+                    self.start_bytes.count,
+                    self.start_bytes.rank_sum,
+                    self.rare_bytes.count,
+                    self.rare_bytes.rank_sum,
+                );
+                if patlen <= 16
+                    && minlen >= 2
+                    && self.start_bytes.count >= 3
+                    && self.rare_bytes.count >= 3
+                {
+                    debug!(
+                        "start and rare byte prefilters available, but \
+                             they're probably slower than packed so using \
+                             packed"
+                    );
+                    return packed;
+                }
+                // If the start-byte prefilter can scan for a smaller number
+                // of bytes than the rare-byte prefilter, then it's probably
+                // faster.
+                let has_fewer_bytes =
+                    self.start_bytes.count < self.rare_bytes.count;
+                // Otherwise, if the combined frequency rank of the detected
+                // bytes in the start-byte prefilter is "close" to the combined
+                // frequency rank of the rare-byte prefilter, then we pick
+                // the start-byte prefilter even if the rare-byte prefilter
+                // heuristically searches for rare bytes. This is because the
+                // rare-byte prefilter has higher constant costs, so we tend to
+                // prefer the start-byte prefilter when we can.
+                let has_rarer_bytes =
+                    self.start_bytes.rank_sum <= self.rare_bytes.rank_sum + 50;
+                if has_fewer_bytes {
+                    debug!(
+                        "using start byte prefilter because it has fewer
+                         bytes to search for than the rare byte prefilter",
+                    );
+                    prestart
+                } else if has_rarer_bytes {
+                    debug!(
+                        "using start byte prefilter because its byte \
+                         frequency rank was determined to be \
+                         \"good enough\" relative to the rare byte prefilter \
+                         byte frequency rank",
+                    );
+                    prestart
+                } else {
+                    debug!("using rare byte prefilter");
+                    prerare
+                }
+            }
+            (prestart @ Some(_), None) => {
+                if patlen <= 16 && minlen >= 2 && self.start_bytes.count >= 3 {
+                    debug!(
+                        "start byte prefilter available, but \
+                         it's probably slower than packed so using \
+                         packed"
+                    );
+                    return packed;
+                }
+                debug!(
+                    "have start byte prefilter but not rare byte prefilter, \
+                     so using start byte prefilter",
+                );
+                prestart
+            }
+            (None, prerare @ Some(_)) => {
+                if patlen <= 16 && minlen >= 2 && self.rare_bytes.count >= 3 {
+                    debug!(
+                        "rare byte prefilter available, but \
+                         it's probably slower than packed so using \
+                         packed"
+                    );
+                    return packed;
+                }
+                debug!(
+                    "have rare byte prefilter but not start byte prefilter, \
+                     so using rare byte prefilter",
+                );
+                prerare
+            }
+            (None, None) if self.ascii_case_insensitive => {
+                debug!(
+                    "no start or rare byte prefilter and ASCII case \
+                     insensitivity was enabled, so skipping prefilter",
+                );
+                None
+            }
+            (None, None) => {
+                if packed.is_some() {
+                    debug!("falling back to packed prefilter");
+                } else {
+                    debug!("no prefilter available");
+                }
+                packed
+            }
+        }
+    }
+
+    /// Add a literal string to this prefilter builder.
+    pub(crate) fn add(&mut self, bytes: &[u8]) {
+        if bytes.is_empty() {
+            self.enabled = false;
+        }
+        if !self.enabled {
+            return;
+        }
+        self.count += 1;
+        self.start_bytes.add(bytes);
+        self.rare_bytes.add(bytes);
+        self.memmem.add(bytes);
+        if let Some(ref mut pbuilder) = self.packed {
+            pbuilder.add(bytes);
+        }
+    }
+}
+
+/// A type that wraps a packed searcher and implements the `Prefilter`
+/// interface.
+#[derive(Clone, Debug)]
+struct Packed(packed::Searcher);
+
+impl PrefilterI for Packed {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        self.0
+            .find_in(&haystack, span)
+            .map_or(Candidate::None, Candidate::Match)
+    }
+}
+
+/// A builder for constructing a prefilter that uses memmem.
+#[derive(Debug, Default)]
+struct MemmemBuilder {
+    /// The number of patterns that have been added.
+    count: usize,
+    /// The singular pattern to search for. This is only set when count==1.
+    one: Option<Vec<u8>>,
+}
+
+impl MemmemBuilder {
+    fn build(&self) -> Option<Prefilter> {
+        #[cfg(all(feature = "std", feature = "perf-literal"))]
+        fn imp(builder: &MemmemBuilder) -> Option<Prefilter> {
+            let pattern = builder.one.as_ref()?;
+            assert_eq!(1, builder.count);
+            let finder = Arc::new(Memmem(
+                memchr::memmem::Finder::new(pattern).into_owned(),
+            ));
+            let memory_usage = pattern.len();
+            Some(Prefilter { finder, memory_usage })
+        }
+
+        #[cfg(not(all(feature = "std", feature = "perf-literal")))]
+        fn imp(_: &MemmemBuilder) -> Option<Prefilter> {
+            None
+        }
+
+        imp(self)
+    }
+
+    fn add(&mut self, bytes: &[u8]) {
+        self.count += 1;
+        if self.count == 1 {
+            self.one = Some(bytes.to_vec());
+        } else {
+            self.one = None;
+        }
+    }
+}
+
+/// A type that wraps a SIMD accelerated single substring search from the
+/// `memchr` crate for use as a prefilter.
+///
+/// Currently, this prefilter is only active for Aho-Corasick searchers with
+/// a single pattern. In theory, this could be extended to support searchers
+/// that have a common prefix of more than one byte (for one byte, we would use
+/// memchr), but it's not clear if it's worth it or not.
+///
+/// Also, unfortunately, this currently also requires the 'std' feature to
+/// be enabled. That's because memchr doesn't have a no-std-but-with-alloc
+/// mode, and so APIs like Finder::into_owned aren't available when 'std' is
+/// disabled. But there should be an 'alloc' feature that brings in APIs like
+/// Finder::into_owned but doesn't use std-only features like runtime CPU
+/// feature detection.
+#[cfg(all(feature = "std", feature = "perf-literal"))]
+#[derive(Clone, Debug)]
+struct Memmem(memchr::memmem::Finder<'static>);
+
+#[cfg(all(feature = "std", feature = "perf-literal"))]
+impl PrefilterI for Memmem {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        use crate::util::primitives::PatternID;
+
+        self.0.find(&haystack[span]).map_or(Candidate::None, |i| {
+            let start = span.start + i;
+            let end = start + self.0.needle().len();
+            // N.B. We can declare a match and use a fixed pattern ID here
+            // because a Memmem prefilter is only ever created for searchers
+            // with exactly one pattern. Thus, every match is always a match
+            // and it is always for the first and only pattern.
+            Candidate::Match(Match::new(PatternID::ZERO, start..end))
+        })
+    }
+}
+
+/// A builder for constructing a rare byte prefilter.
+///
+/// A rare byte prefilter attempts to pick out a small set of rare bytes that
+/// occurr in the patterns, and then quickly scan to matches of those rare
+/// bytes.
+#[derive(Clone, Debug)]
+struct RareBytesBuilder {
+    /// Whether this prefilter should account for ASCII case insensitivity or
+    /// not.
+    ascii_case_insensitive: bool,
+    /// A set of rare bytes, indexed by byte value.
+    rare_set: ByteSet,
+    /// A set of byte offsets associated with bytes in a pattern. An entry
+    /// corresponds to a particular bytes (its index) and is only non-zero if
+    /// the byte occurred at an offset greater than 0 in at least one pattern.
+    ///
+    /// If a byte's offset is not representable in 8 bits, then the rare bytes
+    /// prefilter becomes inert.
+    byte_offsets: RareByteOffsets,
+    /// Whether this is available as a prefilter or not. This can be set to
+    /// false during construction if a condition is seen that invalidates the
+    /// use of the rare-byte prefilter.
+    available: bool,
+    /// The number of bytes set to an active value in `byte_offsets`.
+    count: usize,
+    /// The sum of frequency ranks for the rare bytes detected. This is
+    /// intended to give a heuristic notion of how rare the bytes are.
+    rank_sum: u16,
+}
+
+/// A set of byte offsets, keyed by byte.
+#[derive(Clone, Copy)]
+struct RareByteOffsets {
+    /// Each entry corresponds to the maximum offset of the corresponding
+    /// byte across all patterns seen.
+    set: [RareByteOffset; 256],
+}
+
+impl RareByteOffsets {
+    /// Create a new empty set of rare byte offsets.
+    pub(crate) fn empty() -> RareByteOffsets {
+        RareByteOffsets { set: [RareByteOffset::default(); 256] }
+    }
+
+    /// Add the given offset for the given byte to this set. If the offset is
+    /// greater than the existing offset, then it overwrites the previous
+    /// value and returns false. If there is no previous value set, then this
+    /// sets it and returns true.
+    pub(crate) fn set(&mut self, byte: u8, off: RareByteOffset) {
+        self.set[byte as usize].max =
+            cmp::max(self.set[byte as usize].max, off.max);
+    }
+}
+
+impl core::fmt::Debug for RareByteOffsets {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let mut offsets = vec![];
+        for off in self.set.iter() {
+            if off.max > 0 {
+                offsets.push(off);
+            }
+        }
+        f.debug_struct("RareByteOffsets").field("set", &offsets).finish()
+    }
+}
+
+/// Offsets associated with an occurrence of a "rare" byte in any of the
+/// patterns used to construct a single Aho-Corasick automaton.
+#[derive(Clone, Copy, Debug)]
+struct RareByteOffset {
+    /// The maximum offset at which a particular byte occurs from the start
+    /// of any pattern. This is used as a shift amount. That is, when an
+    /// occurrence of this byte is found, the candidate position reported by
+    /// the prefilter is `position_of_byte - max`, such that the automaton
+    /// will begin its search at a position that is guaranteed to observe a
+    /// match.
+    ///
+    /// To avoid accidentally quadratic behavior, a prefilter is considered
+    /// ineffective when it is asked to start scanning from a position that it
+    /// has already scanned past.
+    ///
+    /// Using a `u8` here means that if we ever see a pattern that's longer
+    /// than 255 bytes, then the entire rare byte prefilter is disabled.
+    max: u8,
+}
+
+impl Default for RareByteOffset {
+    fn default() -> RareByteOffset {
+        RareByteOffset { max: 0 }
+    }
+}
+
+impl RareByteOffset {
+    /// Create a new rare byte offset. If the given offset is too big, then
+    /// None is returned. In that case, callers should render the rare bytes
+    /// prefilter inert.
+    fn new(max: usize) -> Option<RareByteOffset> {
+        if max > u8::MAX as usize {
+            None
+        } else {
+            Some(RareByteOffset { max: max as u8 })
+        }
+    }
+}
+
+impl RareBytesBuilder {
+    /// Create a new builder for constructing a rare byte prefilter.
+    fn new() -> RareBytesBuilder {
+        RareBytesBuilder {
+            ascii_case_insensitive: false,
+            rare_set: ByteSet::empty(),
+            byte_offsets: RareByteOffsets::empty(),
+            available: true,
+            count: 0,
+            rank_sum: 0,
+        }
+    }
+
+    /// Enable ASCII case insensitivity. When set, byte strings added to this
+    /// builder will be interpreted without respect to ASCII case.
+    fn ascii_case_insensitive(mut self, yes: bool) -> RareBytesBuilder {
+        self.ascii_case_insensitive = yes;
+        self
+    }
+
+    /// Build the rare bytes prefilter.
+    ///
+    /// If there are more than 3 distinct rare bytes found, or if heuristics
+    /// otherwise determine that this prefilter should not be used, then `None`
+    /// is returned.
+    fn build(&self) -> Option<Prefilter> {
+        #[cfg(feature = "perf-literal")]
+        fn imp(builder: &RareBytesBuilder) -> Option<Prefilter> {
+            if !builder.available || builder.count > 3 {
+                return None;
+            }
+            let (mut bytes, mut len) = ([0; 3], 0);
+            for b in 0..=255 {
+                if builder.rare_set.contains(b) {
+                    bytes[len] = b as u8;
+                    len += 1;
+                }
+            }
+            let finder: Arc<dyn PrefilterI> = match len {
+                0 => return None,
+                1 => Arc::new(RareBytesOne {
+                    byte1: bytes[0],
+                    offset: builder.byte_offsets.set[bytes[0] as usize],
+                }),
+                2 => Arc::new(RareBytesTwo {
+                    offsets: builder.byte_offsets,
+                    byte1: bytes[0],
+                    byte2: bytes[1],
+                }),
+                3 => Arc::new(RareBytesThree {
+                    offsets: builder.byte_offsets,
+                    byte1: bytes[0],
+                    byte2: bytes[1],
+                    byte3: bytes[2],
+                }),
+                _ => unreachable!(),
+            };
+            Some(Prefilter { finder, memory_usage: 0 })
+        }
+
+        #[cfg(not(feature = "perf-literal"))]
+        fn imp(_: &RareBytesBuilder) -> Option<Prefilter> {
+            None
+        }
+
+        imp(self)
+    }
+
+    /// Add a byte string to this builder.
+    ///
+    /// All patterns added to an Aho-Corasick automaton should be added to this
+    /// builder before attempting to construct the prefilter.
+    fn add(&mut self, bytes: &[u8]) {
+        // If we've already given up, then do nothing.
+        if !self.available {
+            return;
+        }
+        // If we've already blown our budget, then don't waste time looking
+        // for more rare bytes.
+        if self.count > 3 {
+            self.available = false;
+            return;
+        }
+        // If the pattern is too long, then our offset table is bunk, so
+        // give up.
+        if bytes.len() >= 256 {
+            self.available = false;
+            return;
+        }
+        let mut rarest = match bytes.get(0) {
+            None => return,
+            Some(&b) => (b, freq_rank(b)),
+        };
+        // The idea here is to look for the rarest byte in each pattern, and
+        // add that to our set. As a special exception, if we see a byte that
+        // we've already added, then we immediately stop and choose that byte,
+        // even if there's another rare byte in the pattern. This helps us
+        // apply the rare byte optimization in more cases by attempting to pick
+        // bytes that are in common between patterns. So for example, if we
+        // were searching for `Sherlock` and `lockjaw`, then this would pick
+        // `k` for both patterns, resulting in the use of `memchr` instead of
+        // `memchr2` for `k` and `j`.
+        let mut found = false;
+        for (pos, &b) in bytes.iter().enumerate() {
+            self.set_offset(pos, b);
+            if found {
+                continue;
+            }
+            if self.rare_set.contains(b) {
+                found = true;
+                continue;
+            }
+            let rank = freq_rank(b);
+            if rank < rarest.1 {
+                rarest = (b, rank);
+            }
+        }
+        if !found {
+            self.add_rare_byte(rarest.0);
+        }
+    }
+
+    fn set_offset(&mut self, pos: usize, byte: u8) {
+        // This unwrap is OK because pos is never bigger than our max.
+        let offset = RareByteOffset::new(pos).unwrap();
+        self.byte_offsets.set(byte, offset);
+        if self.ascii_case_insensitive {
+            self.byte_offsets.set(opposite_ascii_case(byte), offset);
+        }
+    }
+
+    fn add_rare_byte(&mut self, byte: u8) {
+        self.add_one_rare_byte(byte);
+        if self.ascii_case_insensitive {
+            self.add_one_rare_byte(opposite_ascii_case(byte));
+        }
+    }
+
+    fn add_one_rare_byte(&mut self, byte: u8) {
+        if !self.rare_set.contains(byte) {
+            self.rare_set.add(byte);
+            self.count += 1;
+            self.rank_sum += freq_rank(byte) as u16;
+        }
+    }
+}
+
+/// A prefilter for scanning for a single "rare" byte.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct RareBytesOne {
+    byte1: u8,
+    offset: RareByteOffset,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for RareBytesOne {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr(self.byte1, &haystack[span])
+            .map(|i| {
+                let pos = span.start + i;
+                cmp::max(
+                    span.start,
+                    pos.saturating_sub(usize::from(self.offset.max)),
+                )
+            })
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// A prefilter for scanning for two "rare" bytes.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct RareBytesTwo {
+    offsets: RareByteOffsets,
+    byte1: u8,
+    byte2: u8,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for RareBytesTwo {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr2(self.byte1, self.byte2, &haystack[span])
+            .map(|i| {
+                let pos = span.start + i;
+                let offset = self.offsets.set[usize::from(haystack[pos])].max;
+                cmp::max(span.start, pos.saturating_sub(usize::from(offset)))
+            })
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// A prefilter for scanning for three "rare" bytes.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct RareBytesThree {
+    offsets: RareByteOffsets,
+    byte1: u8,
+    byte2: u8,
+    byte3: u8,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for RareBytesThree {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr3(self.byte1, self.byte2, self.byte3, &haystack[span])
+            .map(|i| {
+                let pos = span.start + i;
+                let offset = self.offsets.set[usize::from(haystack[pos])].max;
+                cmp::max(span.start, pos.saturating_sub(usize::from(offset)))
+            })
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// A builder for constructing a starting byte prefilter.
+///
+/// A starting byte prefilter is a simplistic prefilter that looks for possible
+/// matches by reporting all positions corresponding to a particular byte. This
+/// generally only takes affect when there are at most 3 distinct possible
+/// starting bytes. e.g., the patterns `foo`, `bar`, and `baz` have two
+/// distinct starting bytes (`f` and `b`), and this prefilter returns all
+/// occurrences of either `f` or `b`.
+///
+/// In some cases, a heuristic frequency analysis may determine that it would
+/// be better not to use this prefilter even when there are 3 or fewer distinct
+/// starting bytes.
+#[derive(Clone, Debug)]
+struct StartBytesBuilder {
+    /// Whether this prefilter should account for ASCII case insensitivity or
+    /// not.
+    ascii_case_insensitive: bool,
+    /// The set of starting bytes observed.
+    byteset: Vec<bool>,
+    /// The number of bytes set to true in `byteset`.
+    count: usize,
+    /// The sum of frequency ranks for the rare bytes detected. This is
+    /// intended to give a heuristic notion of how rare the bytes are.
+    rank_sum: u16,
+}
+
+impl StartBytesBuilder {
+    /// Create a new builder for constructing a start byte prefilter.
+    fn new() -> StartBytesBuilder {
+        StartBytesBuilder {
+            ascii_case_insensitive: false,
+            byteset: vec![false; 256],
+            count: 0,
+            rank_sum: 0,
+        }
+    }
+
+    /// Enable ASCII case insensitivity. When set, byte strings added to this
+    /// builder will be interpreted without respect to ASCII case.
+    fn ascii_case_insensitive(mut self, yes: bool) -> StartBytesBuilder {
+        self.ascii_case_insensitive = yes;
+        self
+    }
+
+    /// Build the starting bytes prefilter.
+    ///
+    /// If there are more than 3 distinct starting bytes, or if heuristics
+    /// otherwise determine that this prefilter should not be used, then `None`
+    /// is returned.
+    fn build(&self) -> Option<Prefilter> {
+        #[cfg(feature = "perf-literal")]
+        fn imp(builder: &StartBytesBuilder) -> Option<Prefilter> {
+            if builder.count > 3 {
+                return None;
+            }
+            let (mut bytes, mut len) = ([0; 3], 0);
+            for b in 0..256 {
+                if !builder.byteset[b] {
+                    continue;
+                }
+                // We don't handle non-ASCII bytes for now. Getting non-ASCII
+                // bytes right is trickier, since we generally don't want to put
+                // a leading UTF-8 code unit into a prefilter that isn't ASCII,
+                // since they can frequently. Instead, it would be better to use a
+                // continuation byte, but this requires more sophisticated analysis
+                // of the automaton and a richer prefilter API.
+                if b > 0x7F {
+                    return None;
+                }
+                bytes[len] = b as u8;
+                len += 1;
+            }
+            let finder: Arc<dyn PrefilterI> = match len {
+                0 => return None,
+                1 => Arc::new(StartBytesOne { byte1: bytes[0] }),
+                2 => Arc::new(StartBytesTwo {
+                    byte1: bytes[0],
+                    byte2: bytes[1],
+                }),
+                3 => Arc::new(StartBytesThree {
+                    byte1: bytes[0],
+                    byte2: bytes[1],
+                    byte3: bytes[2],
+                }),
+                _ => unreachable!(),
+            };
+            Some(Prefilter { finder, memory_usage: 0 })
+        }
+
+        #[cfg(not(feature = "perf-literal"))]
+        fn imp(_: &StartBytesBuilder) -> Option<Prefilter> {
+            None
+        }
+
+        imp(self)
+    }
+
+    /// Add a byte string to this builder.
+    ///
+    /// All patterns added to an Aho-Corasick automaton should be added to this
+    /// builder before attempting to construct the prefilter.
+    fn add(&mut self, bytes: &[u8]) {
+        if self.count > 3 {
+            return;
+        }
+        if let Some(&byte) = bytes.get(0) {
+            self.add_one_byte(byte);
+            if self.ascii_case_insensitive {
+                self.add_one_byte(opposite_ascii_case(byte));
+            }
+        }
+    }
+
+    fn add_one_byte(&mut self, byte: u8) {
+        if !self.byteset[byte as usize] {
+            self.byteset[byte as usize] = true;
+            self.count += 1;
+            self.rank_sum += freq_rank(byte) as u16;
+        }
+    }
+}
+
+/// A prefilter for scanning for a single starting byte.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct StartBytesOne {
+    byte1: u8,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for StartBytesOne {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr(self.byte1, &haystack[span])
+            .map(|i| span.start + i)
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// A prefilter for scanning for two starting bytes.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct StartBytesTwo {
+    byte1: u8,
+    byte2: u8,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for StartBytesTwo {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr2(self.byte1, self.byte2, &haystack[span])
+            .map(|i| span.start + i)
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// A prefilter for scanning for three starting bytes.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct StartBytesThree {
+    byte1: u8,
+    byte2: u8,
+    byte3: u8,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for StartBytesThree {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr3(self.byte1, self.byte2, self.byte3, &haystack[span])
+            .map(|i| span.start + i)
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// If the given byte is an ASCII letter, then return it in the opposite case.
+/// e.g., Given `b'A'`, this returns `b'a'`, and given `b'a'`, this returns
+/// `b'A'`. If a non-ASCII letter is given, then the given byte is returned.
+pub(crate) fn opposite_ascii_case(b: u8) -> u8 {
+    if b'A' <= b && b <= b'Z' {
+        b.to_ascii_lowercase()
+    } else if b'a' <= b && b <= b'z' {
+        b.to_ascii_uppercase()
+    } else {
+        b
+    }
+}
+
+/// Return the frequency rank of the given byte. The higher the rank, the more
+/// common the byte (heuristically speaking).
+fn freq_rank(b: u8) -> u8 {
+    use crate::util::byte_frequencies::BYTE_FREQUENCIES;
+    BYTE_FREQUENCIES[b as usize]
+}
diff --git a/third_party/rust/aho-corasick/src/util/primitives.rs b/third_party/rust/aho-corasick/src/util/primitives.rs
new file mode 100644
index 0000000000..784d397171
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/primitives.rs
@@ -0,0 +1,759 @@
+/*!
+Lower level primitive types that are useful in a variety of circumstances.
+
+# Overview
+
+This list represents the principle types in this module and briefly describes
+when you might want to use them.
+
+* [`PatternID`] - A type that represents the identifier of a regex pattern.
+This is probably the most widely used type in this module (which is why it's
+also re-exported in the crate root).
+* [`StateID`] - A type the represents the identifier of a finite automaton
+state. This is used for both NFAs and DFAs, with the notable exception of
+the hybrid NFA/DFA. (The hybrid NFA/DFA uses a special purpose "lazy" state
+identifier.)
+* [`SmallIndex`] - The internal representation of both a `PatternID` and a
+`StateID`. Its purpose is to serve as a type that can index memory without
+being as big as a `usize` on 64-bit targets. The main idea behind this type
+is that there are many things in regex engines that will, in practice, never
+overflow a 32-bit integer. (For example, like the number of patterns in a regex
+or the number of states in an NFA.) Thus, a `SmallIndex` can be used to index
+memory without peppering `as` casts everywhere. Moreover, it forces callers
+to handle errors in the case where, somehow, the value would otherwise overflow
+either a 32-bit integer or a `usize` (e.g., on 16-bit targets).
+*/
+
+// The macro we use to define some types below adds methods that we don't
+// use on some of the types. There isn't much, so we just squash the warning.
+#![allow(dead_code)]
+
+use alloc::vec::Vec;
+
+use crate::util::int::{Usize, U16, U32, U64};
+
+/// A type that represents a "small" index.
+///
+/// The main idea of this type is to provide something that can index memory,
+/// but uses less memory than `usize` on 64-bit systems. Specifically, its
+/// representation is always a `u32` and has `repr(transparent)` enabled. (So
+/// it is safe to transmute between a `u32` and a `SmallIndex`.)
+///
+/// A small index is typically useful in cases where there is no practical way
+/// that the index will overflow a 32-bit integer. A good example of this is
+/// an NFA state. If you could somehow build an NFA with `2^30` states, its
+/// memory usage would be exorbitant and its runtime execution would be so
+/// slow as to be completely worthless. Therefore, this crate generally deems
+/// it acceptable to return an error if it would otherwise build an NFA that
+/// requires a slice longer than what a 32-bit integer can index. In exchange,
+/// we can use 32-bit indices instead of 64-bit indices in various places.
+///
+/// This type ensures this by providing a constructor that will return an error
+/// if its argument cannot fit into the type. This makes it much easier to
+/// handle these sorts of boundary cases that are otherwise extremely subtle.
+///
+/// On all targets, this type guarantees that its value will fit in a `u32`,
+/// `i32`, `usize` and an `isize`. This means that on 16-bit targets, for
+/// example, this type's maximum value will never overflow an `isize`,
+/// which means it will never overflow a `i16` even though its internal
+/// representation is still a `u32`.
+///
+/// The purpose for making the type fit into even signed integer types like
+/// `isize` is to guarantee that the difference between any two small indices
+/// is itself also a small index. This is useful in certain contexts, e.g.,
+/// for delta encoding.
+///
+/// # Other types
+///
+/// The following types wrap `SmallIndex` to provide a more focused use case:
+///
+/// * [`PatternID`] is for representing the identifiers of patterns.
+/// * [`StateID`] is for representing the identifiers of states in finite
+/// automata. It is used for both NFAs and DFAs.
+///
+/// # Representation
+///
+/// This type is always represented internally by a `u32` and is marked as
+/// `repr(transparent)`. Thus, this type always has the same representation as
+/// a `u32`. It is thus safe to transmute between a `u32` and a `SmallIndex`.
+///
+/// # Indexing
+///
+/// For convenience, callers may use a `SmallIndex` to index slices.
+///
+/// # Safety
+///
+/// While a `SmallIndex` is meant to guarantee that its value fits into `usize`
+/// without using as much space as a `usize` on all targets, callers must
+/// not rely on this property for safety. Callers may choose to rely on this
+/// property for correctness however. For example, creating a `SmallIndex` with
+/// an invalid value can be done in entirely safe code. This may in turn result
+/// in panics or silent logical errors.
+#[derive(
+    Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
+)]
+#[repr(transparent)]
+pub(crate) struct SmallIndex(u32);
+
+impl SmallIndex {
+    /// The maximum index value.
+    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+    pub const MAX: SmallIndex =
+        // FIXME: Use as_usize() once const functions in traits are stable.
+        SmallIndex::new_unchecked(core::i32::MAX as usize - 1);
+
+    /// The maximum index value.
+    #[cfg(target_pointer_width = "16")]
+    pub const MAX: SmallIndex =
+        SmallIndex::new_unchecked(core::isize::MAX - 1);
+
+    /// The total number of values that can be represented as a small index.
+    pub const LIMIT: usize = SmallIndex::MAX.as_usize() + 1;
+
+    /// The zero index value.
+    pub const ZERO: SmallIndex = SmallIndex::new_unchecked(0);
+
+    /// The number of bytes that a single small index uses in memory.
+    pub const SIZE: usize = core::mem::size_of::<SmallIndex>();
+
+    /// Create a new small index.
+    ///
+    /// If the given index exceeds [`SmallIndex::MAX`], then this returns
+    /// an error.
+    #[inline]
+    pub fn new(index: usize) -> Result<SmallIndex, SmallIndexError> {
+        SmallIndex::try_from(index)
+    }
+
+    /// Create a new small index without checking whether the given value
+    /// exceeds [`SmallIndex::MAX`].
+    ///
+    /// Using this routine with an invalid index value will result in
+    /// unspecified behavior, but *not* undefined behavior. In particular, an
+    /// invalid index value is likely to cause panics or possibly even silent
+    /// logical errors.
+    ///
+    /// Callers must never rely on a `SmallIndex` to be within a certain range
+    /// for memory safety.
+    #[inline]
+    pub const fn new_unchecked(index: usize) -> SmallIndex {
+        // FIXME: Use as_u32() once const functions in traits are stable.
+        SmallIndex::from_u32_unchecked(index as u32)
+    }
+
+    /// Create a new small index from a `u32` without checking whether the
+    /// given value exceeds [`SmallIndex::MAX`].
+    ///
+    /// Using this routine with an invalid index value will result in
+    /// unspecified behavior, but *not* undefined behavior. In particular, an
+    /// invalid index value is likely to cause panics or possibly even silent
+    /// logical errors.
+    ///
+    /// Callers must never rely on a `SmallIndex` to be within a certain range
+    /// for memory safety.
+    #[inline]
+    pub const fn from_u32_unchecked(index: u32) -> SmallIndex {
+        SmallIndex(index)
+    }
+
+    /// Like [`SmallIndex::new`], but panics if the given index is not valid.
+    #[inline]
+    pub fn must(index: usize) -> SmallIndex {
+        SmallIndex::new(index).expect("invalid small index")
+    }
+
+    /// Return this small index as a `usize`. This is guaranteed to never
+    /// overflow `usize`.
+    #[inline]
+    pub const fn as_usize(&self) -> usize {
+        // FIXME: Use as_usize() once const functions in traits are stable.
+        self.0 as usize
+    }
+
+    /// Return this small index as a `u64`. This is guaranteed to never
+    /// overflow.
+    #[inline]
+    pub const fn as_u64(&self) -> u64 {
+        // FIXME: Use u64::from() once const functions in traits are stable.
+        self.0 as u64
+    }
+
+    /// Return the internal `u32` of this small index. This is guaranteed to
+    /// never overflow `u32`.
+    #[inline]
+    pub const fn as_u32(&self) -> u32 {
+        self.0
+    }
+
+    /// Return the internal `u32` of this small index represented as an `i32`.
+    /// This is guaranteed to never overflow an `i32`.
+    #[inline]
+    pub const fn as_i32(&self) -> i32 {
+        // This is OK because we guarantee that our max value is <= i32::MAX.
+        self.0 as i32
+    }
+
+    /// Returns one more than this small index as a usize.
+    ///
+    /// Since a small index has constraints on its maximum value, adding `1` to
+    /// it will always fit in a `usize`, `isize`, `u32` and a `i32`.
+    #[inline]
+    pub fn one_more(&self) -> usize {
+        self.as_usize() + 1
+    }
+
+    /// Decode this small index from the bytes given using the native endian
+    /// byte order for the current target.
+    ///
+    /// If the decoded integer is not representable as a small index for the
+    /// current target, then this returns an error.
+    #[inline]
+    pub fn from_ne_bytes(
+        bytes: [u8; 4],
+    ) -> Result<SmallIndex, SmallIndexError> {
+        let id = u32::from_ne_bytes(bytes);
+        if id > SmallIndex::MAX.as_u32() {
+            return Err(SmallIndexError { attempted: u64::from(id) });
+        }
+        Ok(SmallIndex::new_unchecked(id.as_usize()))
+    }
+
+    /// Decode this small index from the bytes given using the native endian
+    /// byte order for the current target.
+    ///
+    /// This is analogous to [`SmallIndex::new_unchecked`] in that is does not
+    /// check whether the decoded integer is representable as a small index.
+    #[inline]
+    pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> SmallIndex {
+        SmallIndex::new_unchecked(u32::from_ne_bytes(bytes).as_usize())
+    }
+
+    /// Return the underlying small index integer as raw bytes in native endian
+    /// format.
+    #[inline]
+    pub fn to_ne_bytes(&self) -> [u8; 4] {
+        self.0.to_ne_bytes()
+    }
+}
+
+impl<T> core::ops::Index<SmallIndex> for [T] {
+    type Output = T;
+
+    #[inline]
+    fn index(&self, index: SmallIndex) -> &T {
+        &self[index.as_usize()]
+    }
+}
+
+impl<T> core::ops::IndexMut<SmallIndex> for [T] {
+    #[inline]
+    fn index_mut(&mut self, index: SmallIndex) -> &mut T {
+        &mut self[index.as_usize()]
+    }
+}
+
+impl<T> core::ops::Index<SmallIndex> for Vec<T> {
+    type Output = T;
+
+    #[inline]
+    fn index(&self, index: SmallIndex) -> &T {
+        &self[index.as_usize()]
+    }
+}
+
+impl<T> core::ops::IndexMut<SmallIndex> for Vec<T> {
+    #[inline]
+    fn index_mut(&mut self, index: SmallIndex) -> &mut T {
+        &mut self[index.as_usize()]
+    }
+}
+
+impl From<StateID> for SmallIndex {
+    fn from(sid: StateID) -> SmallIndex {
+        sid.0
+    }
+}
+
+impl From<PatternID> for SmallIndex {
+    fn from(pid: PatternID) -> SmallIndex {
+        pid.0
+    }
+}
+
+impl From<u8> for SmallIndex {
+    fn from(index: u8) -> SmallIndex {
+        SmallIndex::new_unchecked(usize::from(index))
+    }
+}
+
+impl TryFrom<u16> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: u16) -> Result<SmallIndex, SmallIndexError> {
+        if u32::from(index) > SmallIndex::MAX.as_u32() {
+            return Err(SmallIndexError { attempted: u64::from(index) });
+        }
+        Ok(SmallIndex::new_unchecked(index.as_usize()))
+    }
+}
+
+impl TryFrom<u32> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: u32) -> Result<SmallIndex, SmallIndexError> {
+        if index > SmallIndex::MAX.as_u32() {
+            return Err(SmallIndexError { attempted: u64::from(index) });
+        }
+        Ok(SmallIndex::new_unchecked(index.as_usize()))
+    }
+}
+
+impl TryFrom<u64> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: u64) -> Result<SmallIndex, SmallIndexError> {
+        if index > SmallIndex::MAX.as_u64() {
+            return Err(SmallIndexError { attempted: index });
+        }
+        Ok(SmallIndex::new_unchecked(index.as_usize()))
+    }
+}
+
+impl TryFrom<usize> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: usize) -> Result<SmallIndex, SmallIndexError> {
+        if index > SmallIndex::MAX.as_usize() {
+            return Err(SmallIndexError { attempted: index.as_u64() });
+        }
+        Ok(SmallIndex::new_unchecked(index))
+    }
+}
+
+/// This error occurs when a small index could not be constructed.
+///
+/// This occurs when given an integer exceeding the maximum small index value.
+///
+/// When the `std` feature is enabled, this implements the `Error` trait.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct SmallIndexError {
+    attempted: u64,
+}
+
+impl SmallIndexError {
+    /// Returns the value that could not be converted to a small index.
+    pub fn attempted(&self) -> u64 {
+        self.attempted
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for SmallIndexError {}
+
+impl core::fmt::Display for SmallIndexError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(
+            f,
+            "failed to create small index from {:?}, which exceeds {:?}",
+            self.attempted(),
+            SmallIndex::MAX,
+        )
+    }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct SmallIndexIter {
+    rng: core::ops::Range<usize>,
+}
+
+impl Iterator for SmallIndexIter {
+    type Item = SmallIndex;
+
+    fn next(&mut self) -> Option<SmallIndex> {
+        if self.rng.start >= self.rng.end {
+            return None;
+        }
+        let next_id = self.rng.start + 1;
+        let id = core::mem::replace(&mut self.rng.start, next_id);
+        // new_unchecked is OK since we asserted that the number of
+        // elements in this iterator will fit in an ID at construction.
+        Some(SmallIndex::new_unchecked(id))
+    }
+}
+
+macro_rules! index_type_impls {
+    ($name:ident, $err:ident, $iter:ident, $withiter:ident) => {
+        impl $name {
+            /// The maximum value.
+            pub const MAX: $name = $name(SmallIndex::MAX);
+
+            /// The total number of values that can be represented.
+            pub const LIMIT: usize = SmallIndex::LIMIT;
+
+            /// The zero value.
+            pub const ZERO: $name = $name(SmallIndex::ZERO);
+
+            /// The number of bytes that a single value uses in memory.
+            pub const SIZE: usize = SmallIndex::SIZE;
+
+            /// Create a new value that is represented by a "small index."
+            ///
+            /// If the given index exceeds the maximum allowed value, then this
+            /// returns an error.
+            #[inline]
+            pub fn new(value: usize) -> Result<$name, $err> {
+                SmallIndex::new(value).map($name).map_err($err)
+            }
+
+            /// Create a new value without checking whether the given argument
+            /// exceeds the maximum.
+            ///
+            /// Using this routine with an invalid value will result in
+            /// unspecified behavior, but *not* undefined behavior. In
+            /// particular, an invalid ID value is likely to cause panics or
+            /// possibly even silent logical errors.
+            ///
+            /// Callers must never rely on this type to be within a certain
+            /// range for memory safety.
+            #[inline]
+            pub const fn new_unchecked(value: usize) -> $name {
+                $name(SmallIndex::new_unchecked(value))
+            }
+
+            /// Create a new value from a `u32` without checking whether the
+            /// given value exceeds the maximum.
+            ///
+            /// Using this routine with an invalid value will result in
+            /// unspecified behavior, but *not* undefined behavior. In
+            /// particular, an invalid ID value is likely to cause panics or
+            /// possibly even silent logical errors.
+            ///
+            /// Callers must never rely on this type to be within a certain
+            /// range for memory safety.
+            #[inline]
+            pub const fn from_u32_unchecked(index: u32) -> $name {
+                $name(SmallIndex::from_u32_unchecked(index))
+            }
+
+            /// Like `new`, but panics if the given value is not valid.
+            #[inline]
+            pub fn must(value: usize) -> $name {
+                $name::new(value).expect(concat!(
+                    "invalid ",
+                    stringify!($name),
+                    " value"
+                ))
+            }
+
+            /// Return the internal value as a `usize`. This is guaranteed to
+            /// never overflow `usize`.
+            #[inline]
+            pub const fn as_usize(&self) -> usize {
+                self.0.as_usize()
+            }
+
+            /// Return the internal value as a `u64`. This is guaranteed to
+            /// never overflow.
+            #[inline]
+            pub const fn as_u64(&self) -> u64 {
+                self.0.as_u64()
+            }
+
+            /// Return the internal value as a `u32`. This is guaranteed to
+            /// never overflow `u32`.
+            #[inline]
+            pub const fn as_u32(&self) -> u32 {
+                self.0.as_u32()
+            }
+
+            /// Return the internal value as a `i32`. This is guaranteed to
+            /// never overflow an `i32`.
+            #[inline]
+            pub const fn as_i32(&self) -> i32 {
+                self.0.as_i32()
+            }
+
+            /// Returns one more than this value as a usize.
+            ///
+            /// Since values represented by a "small index" have constraints
+            /// on their maximum value, adding `1` to it will always fit in a
+            /// `usize`, `u32` and a `i32`.
+            #[inline]
+            pub fn one_more(&self) -> usize {
+                self.0.one_more()
+            }
+
+            /// Decode this value from the bytes given using the native endian
+            /// byte order for the current target.
+            ///
+            /// If the decoded integer is not representable as a small index
+            /// for the current target, then this returns an error.
+            #[inline]
+            pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<$name, $err> {
+                SmallIndex::from_ne_bytes(bytes).map($name).map_err($err)
+            }
+
+            /// Decode this value from the bytes given using the native endian
+            /// byte order for the current target.
+            ///
+            /// This is analogous to `new_unchecked` in that is does not check
+            /// whether the decoded integer is representable as a small index.
+            #[inline]
+            pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> $name {
+                $name(SmallIndex::from_ne_bytes_unchecked(bytes))
+            }
+
+            /// Return the underlying integer as raw bytes in native endian
+            /// format.
+            #[inline]
+            pub fn to_ne_bytes(&self) -> [u8; 4] {
+                self.0.to_ne_bytes()
+            }
+
+            /// Returns an iterator over all values from 0 up to and not
+            /// including the given length.
+            ///
+            /// If the given length exceeds this type's limit, then this
+            /// panics.
+            pub(crate) fn iter(len: usize) -> $iter {
+                $iter::new(len)
+            }
+        }
+
+        // We write our own Debug impl so that we get things like PatternID(5)
+        // instead of PatternID(SmallIndex(5)).
+        impl core::fmt::Debug for $name {
+            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                f.debug_tuple(stringify!($name)).field(&self.as_u32()).finish()
+            }
+        }
+
+        impl<T> core::ops::Index<$name> for [T] {
+            type Output = T;
+
+            #[inline]
+            fn index(&self, index: $name) -> &T {
+                &self[index.as_usize()]
+            }
+        }
+
+        impl<T> core::ops::IndexMut<$name> for [T] {
+            #[inline]
+            fn index_mut(&mut self, index: $name) -> &mut T {
+                &mut self[index.as_usize()]
+            }
+        }
+
+        impl<T> core::ops::Index<$name> for Vec<T> {
+            type Output = T;
+
+            #[inline]
+            fn index(&self, index: $name) -> &T {
+                &self[index.as_usize()]
+            }
+        }
+
+        impl<T> core::ops::IndexMut<$name> for Vec<T> {
+            #[inline]
+            fn index_mut(&mut self, index: $name) -> &mut T {
+                &mut self[index.as_usize()]
+            }
+        }
+
+        impl From<SmallIndex> for $name {
+            fn from(index: SmallIndex) -> $name {
+                $name(index)
+            }
+        }
+
+        impl From<u8> for $name {
+            fn from(value: u8) -> $name {
+                $name(SmallIndex::from(value))
+            }
+        }
+
+        impl TryFrom<u16> for $name {
+            type Error = $err;
+
+            fn try_from(value: u16) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        impl TryFrom<u32> for $name {
+            type Error = $err;
+
+            fn try_from(value: u32) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        impl TryFrom<u64> for $name {
+            type Error = $err;
+
+            fn try_from(value: u64) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        impl TryFrom<usize> for $name {
+            type Error = $err;
+
+            fn try_from(value: usize) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        /// This error occurs when an ID could not be constructed.
+        ///
+        /// This occurs when given an integer exceeding the maximum allowed
+        /// value.
+        ///
+        /// When the `std` feature is enabled, this implements the `Error`
+        /// trait.
+        #[derive(Clone, Debug, Eq, PartialEq)]
+        pub struct $err(SmallIndexError);
+
+        impl $err {
+            /// Returns the value that could not be converted to an ID.
+            pub fn attempted(&self) -> u64 {
+                self.0.attempted()
+            }
+        }
+
+        #[cfg(feature = "std")]
+        impl std::error::Error for $err {}
+
+        impl core::fmt::Display for $err {
+            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                write!(
+                    f,
+                    "failed to create {} from {:?}, which exceeds {:?}",
+                    stringify!($name),
+                    self.attempted(),
+                    $name::MAX,
+                )
+            }
+        }
+
+        #[derive(Clone, Debug)]
+        pub(crate) struct $iter(SmallIndexIter);
+
+        impl $iter {
+            fn new(len: usize) -> $iter {
+                assert!(
+                    len <= $name::LIMIT,
+                    "cannot create iterator for {} when number of \
+                     elements exceed {:?}",
+                    stringify!($name),
+                    $name::LIMIT,
+                );
+                $iter(SmallIndexIter { rng: 0..len })
+            }
+        }
+
+        impl Iterator for $iter {
+            type Item = $name;
+
+            fn next(&mut self) -> Option<$name> {
+                self.0.next().map($name)
+            }
+        }
+
+        /// An iterator adapter that is like std::iter::Enumerate, but attaches
+        /// small index values instead. It requires `ExactSizeIterator`. At
+        /// construction, it ensures that the index of each element in the
+        /// iterator is representable in the corresponding small index type.
+        #[derive(Clone, Debug)]
+        pub(crate) struct $withiter<I> {
+            it: I,
+            ids: $iter,
+        }
+
+        impl<I: Iterator + ExactSizeIterator> $withiter<I> {
+            fn new(it: I) -> $withiter<I> {
+                let ids = $name::iter(it.len());
+                $withiter { it, ids }
+            }
+        }
+
+        impl<I: Iterator + ExactSizeIterator> Iterator for $withiter<I> {
+            type Item = ($name, I::Item);
+
+            fn next(&mut self) -> Option<($name, I::Item)> {
+                let item = self.it.next()?;
+                // Number of elements in this iterator must match, according
+                // to contract of ExactSizeIterator.
+                let id = self.ids.next().unwrap();
+                Some((id, item))
+            }
+        }
+    };
+}
+
+/// The identifier of a pattern in an Aho-Corasick automaton.
+///
+/// It is represented by a `u32` even on 64-bit systems in order to conserve
+/// space. Namely, on all targets, this type guarantees that its value will
+/// fit in a `u32`, `i32`, `usize` and an `isize`. This means that on 16-bit
+/// targets, for example, this type's maximum value will never overflow an
+/// `isize`, which means it will never overflow a `i16` even though its
+/// internal representation is still a `u32`.
+///
+/// # Safety
+///
+/// While a `PatternID` is meant to guarantee that its value fits into `usize`
+/// without using as much space as a `usize` on all targets, callers must
+/// not rely on this property for safety. Callers may choose to rely on this
+/// property for correctness however. For example, creating a `StateID` with an
+/// invalid value can be done in entirely safe code. This may in turn result in
+/// panics or silent logical errors.
+#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
+#[repr(transparent)]
+pub struct PatternID(SmallIndex);
+
+/// The identifier of a finite automaton state.
+///
+/// It is represented by a `u32` even on 64-bit systems in order to conserve
+/// space. Namely, on all targets, this type guarantees that its value will
+/// fit in a `u32`, `i32`, `usize` and an `isize`. This means that on 16-bit
+/// targets, for example, this type's maximum value will never overflow an
+/// `isize`, which means it will never overflow a `i16` even though its
+/// internal representation is still a `u32`.
+///
+/// # Safety
+///
+/// While a `StateID` is meant to guarantee that its value fits into `usize`
+/// without using as much space as a `usize` on all targets, callers must
+/// not rely on this property for safety. Callers may choose to rely on this
+/// property for correctness however. For example, creating a `StateID` with an
+/// invalid value can be done in entirely safe code. This may in turn result in
+/// panics or silent logical errors.
+#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
+#[repr(transparent)]
+pub struct StateID(SmallIndex);
+
+index_type_impls!(PatternID, PatternIDError, PatternIDIter, WithPatternIDIter);
+index_type_impls!(StateID, StateIDError, StateIDIter, WithStateIDIter);
+
+/// A utility trait that defines a couple of adapters for making it convenient
+/// to access indices as "small index" types. We require ExactSizeIterator so
+/// that iterator construction can do a single check to make sure the index of
+/// each element is representable by its small index type.
+pub(crate) trait IteratorIndexExt: Iterator {
+    fn with_pattern_ids(self) -> WithPatternIDIter<Self>
+    where
+        Self: Sized + ExactSizeIterator,
+    {
+        WithPatternIDIter::new(self)
+    }
+
+    fn with_state_ids(self) -> WithStateIDIter<Self>
+    where
+        Self: Sized + ExactSizeIterator,
+    {
+        WithStateIDIter::new(self)
+    }
+}
+
+impl<I: Iterator> IteratorIndexExt for I {}
diff --git a/third_party/rust/aho-corasick/src/util/remapper.rs b/third_party/rust/aho-corasick/src/util/remapper.rs
new file mode 100644
index 0000000000..7c47a082cd
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/remapper.rs
@@ -0,0 +1,214 @@
+use alloc::vec::Vec;
+
+use crate::{nfa::noncontiguous, util::primitives::StateID};
+
+/// Remappable is a tightly coupled abstraction that facilitates remapping
+/// state identifiers in DFAs.
+///
+/// The main idea behind remapping state IDs is that DFAs often need to check
+/// if a certain state is a "special" state of some kind (like a match state)
+/// during a search. Since this is extremely perf critical code, we want this
+/// check to be as fast as possible. Partitioning state IDs into, for example,
+/// into "non-match" and "match" states means one can tell if a state is a
+/// match state via a simple comparison of the state ID.
+///
+/// The issue is that during the DFA construction process, it's not
+/// particularly easy to partition the states. Instead, the simplest thing is
+/// to often just do a pass over all of the states and shuffle them into their
+/// desired partitionings. To do that, we need a mechanism for swapping states.
+/// Hence, this abstraction.
+///
+/// Normally, for such little code, I would just duplicate it. But this is a
+/// key optimization and the implementation is a bit subtle. So the abstraction
+/// is basically a ham-fisted attempt at DRY. The only place we use this is in
+/// the dense and one-pass DFAs.
+///
+/// See also src/dfa/special.rs for a more detailed explanation of how dense
+/// DFAs are partitioned.
+pub(crate) trait Remappable: core::fmt::Debug {
+    /// Return the total number of states.
+    fn state_len(&self) -> usize;
+
+    /// Swap the states pointed to by the given IDs. The underlying finite
+    /// state machine should be mutated such that all of the transitions in
+    /// `id1` are now in the memory region where the transitions for `id2`
+    /// were, and all of the transitions in `id2` are now in the memory region
+    /// where the transitions for `id1` were.
+    ///
+    /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`.
+    ///
+    /// It is expected that, after calling this, the underlying state machine
+    /// will be left in an inconsistent state, since any other transitions
+    /// pointing to, e.g., `id1` need to be updated to point to `id2`, since
+    /// that's where `id1` moved to.
+    ///
+    /// In order to "fix" the underlying inconsistent state, a `Remapper`
+    /// should be used to guarantee that `remap` is called at the appropriate
+    /// time.
+    fn swap_states(&mut self, id1: StateID, id2: StateID);
+
+    /// This must remap every single state ID in the underlying value according
+    /// to the function given. For example, in a DFA, this should remap every
+    /// transition and every starting state ID.
+    fn remap(&mut self, map: impl Fn(StateID) -> StateID);
+}
+
+/// Remapper is an abstraction the manages the remapping of state IDs in a
+/// finite state machine. This is useful when one wants to shuffle states into
+/// different positions in the machine.
+///
+/// One of the key complexities this manages is the ability to correctly move
+/// one state multiple times.
+///
+/// Once shuffling is complete, `remap` must be called, which will rewrite
+/// all pertinent transitions to updated state IDs. Neglecting to call `remap`
+/// will almost certainly result in a corrupt machine.
+#[derive(Debug)]
+pub(crate) struct Remapper {
+    /// A map from the index of a state to its pre-multiplied identifier.
+    ///
+    /// When a state is swapped with another, then their corresponding
+    /// locations in this map are also swapped. Thus, its new position will
+    /// still point to its old pre-multiplied StateID.
+    ///
+    /// While there is a bit more to it, this then allows us to rewrite the
+    /// state IDs in a DFA's transition table in a single pass. This is done
+    /// by iterating over every ID in this map, then iterating over each
+    /// transition for the state at that ID and re-mapping the transition from
+    /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position
+    /// in this map where `old_id` *started*, and set it to where it ended up
+    /// after all swaps have been completed.
+    map: Vec<StateID>,
+    /// A way to map indices to state IDs (and back).
+    idx: IndexMapper,
+}
+
+impl Remapper {
+    /// Create a new remapper from the given remappable implementation. The
+    /// remapper can then be used to swap states. The remappable value given
+    /// here must the same one given to `swap` and `remap`.
+    ///
+    /// The given stride should be the stride of the transition table expressed
+    /// as a power of 2. This stride is used to map between state IDs and state
+    /// indices. If state IDs and state indices are equivalent, then provide
+    /// a `stride2` of `0`, which acts as an identity.
+    pub(crate) fn new(r: &impl Remappable, stride2: usize) -> Remapper {
+        let idx = IndexMapper { stride2 };
+        let map = (0..r.state_len()).map(|i| idx.to_state_id(i)).collect();
+        Remapper { map, idx }
+    }
+
+    /// Swap two states. Once this is called, callers must follow through to
+    /// call `remap`, or else it's possible for the underlying remappable
+    /// value to be in a corrupt state.
+    pub(crate) fn swap(
+        &mut self,
+        r: &mut impl Remappable,
+        id1: StateID,
+        id2: StateID,
+    ) {
+        if id1 == id2 {
+            return;
+        }
+        r.swap_states(id1, id2);
+        self.map.swap(self.idx.to_index(id1), self.idx.to_index(id2));
+    }
+
+    /// Complete the remapping process by rewriting all state IDs in the
+    /// remappable value according to the swaps performed.
+    pub(crate) fn remap(mut self, r: &mut impl Remappable) {
+        // Update the map to account for states that have been swapped
+        // multiple times. For example, if (A, C) and (C, G) are swapped, then
+        // transitions previously pointing to A should now point to G. But if
+        // we don't update our map, they will erroneously be set to C. All we
+        // do is follow the swaps in our map until we see our original state
+        // ID.
+        //
+        // The intuition here is to think about how changes are made to the
+        // map: only through pairwise swaps. That means that starting at any
+        // given state, it is always possible to find the loop back to that
+        // state by following the swaps represented in the map (which might be
+        // 0 swaps).
+        //
+        // We are also careful to clone the map before starting in order to
+        // freeze it. We use the frozen map to find our loops, since we need to
+        // update our map as well. Without freezing it, our updates could break
+        // the loops referenced above and produce incorrect results.
+        let oldmap = self.map.clone();
+        for i in 0..r.state_len() {
+            let cur_id = self.idx.to_state_id(i);
+            let mut new_id = oldmap[i];
+            if cur_id == new_id {
+                continue;
+            }
+            loop {
+                let id = oldmap[self.idx.to_index(new_id)];
+                if cur_id == id {
+                    self.map[i] = new_id;
+                    break;
+                }
+                new_id = id;
+            }
+        }
+        r.remap(|sid| self.map[self.idx.to_index(sid)]);
+    }
+}
+
+/// A simple type for mapping between state indices and state IDs.
+///
+/// The reason why this exists is because state IDs are "premultiplied" in a
+/// DFA. That is, in order to get to the transitions for a particular state,
+/// one need only use the state ID as-is, instead of having to multiply it by
+/// transition table's stride.
+///
+/// The downside of this is that it's inconvenient to map between state IDs
+/// using a dense map, e.g., Vec<StateID>. That's because state IDs look like
+/// `0`, `stride`, `2*stride`, `3*stride`, etc., instead of `0`, `1`, `2`, `3`,
+/// etc.
+///
+/// Since our state IDs are premultiplied, we can convert back-and-forth
+/// between IDs and indices by simply unmultiplying the IDs and multiplying the
+/// indices.
+///
+/// Note that for a sparse NFA, state IDs and indices are equivalent. In this
+/// case, we set the stride of the index mapped to be `0`, which acts as an
+/// identity.
+#[derive(Debug)]
+struct IndexMapper {
+    /// The power of 2 corresponding to the stride of the corresponding
+    /// transition table. 'id >> stride2' de-multiplies an ID while 'index <<
+    /// stride2' pre-multiplies an index to an ID.
+    stride2: usize,
+}
+
+impl IndexMapper {
+    /// Convert a state ID to a state index.
+    fn to_index(&self, id: StateID) -> usize {
+        id.as_usize() >> self.stride2
+    }
+
+    /// Convert a state index to a state ID.
+    fn to_state_id(&self, index: usize) -> StateID {
+        // CORRECTNESS: If the given index is not valid, then it is not
+        // required for this to panic or return a valid state ID. We'll "just"
+        // wind up with panics or silent logic errors at some other point. But
+        // this is OK because if Remappable::state_len is correct and so is
+        // 'to_index', then all inputs to 'to_state_id' should be valid indices
+        // and thus transform into valid state IDs.
+        StateID::new_unchecked(index << self.stride2)
+    }
+}
+
+impl Remappable for noncontiguous::NFA {
+    fn state_len(&self) -> usize {
+        noncontiguous::NFA::states(self).len()
+    }
+
+    fn swap_states(&mut self, id1: StateID, id2: StateID) {
+        noncontiguous::NFA::swap_states(self, id1, id2)
+    }
+
+    fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
+        noncontiguous::NFA::remap(self, map)
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/search.rs b/third_party/rust/aho-corasick/src/util/search.rs
new file mode 100644
index 0000000000..59b7035e1f
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/search.rs
@@ -0,0 +1,1148 @@
+use core::ops::{Range, RangeBounds};
+
+use crate::util::primitives::PatternID;
+
+/// The configuration and the haystack to use for an Aho-Corasick search.
+///
+/// When executing a search, there are a few parameters one might want to
+/// configure:
+///
+/// * The haystack to search, provided to the [`Input::new`] constructor. This
+/// is the only required parameter.
+/// * The span _within_ the haystack to limit a search to. (The default
+/// is the entire haystack.) This is configured via [`Input::span`] or
+/// [`Input::range`].
+/// * Whether to run an unanchored (matches can occur anywhere after the
+/// start of the search) or anchored (matches can only occur beginning at
+/// the start of the search) search. Unanchored search is the default. This is
+/// configured via [`Input::anchored`].
+/// * Whether to quit the search as soon as a match has been found, regardless
+/// of the [`MatchKind`] that the searcher was built with. This is configured
+/// via [`Input::earliest`].
+///
+/// For most cases, the defaults for all optional parameters are appropriate.
+/// The utility of this type is that it keeps the default or common case simple
+/// while permitting tweaking parameters in more niche use cases while reusing
+/// the same search APIs.
+///
+/// # Valid bounds and search termination
+///
+/// An `Input` permits setting the bounds of a search via either
+/// [`Input::span`] or [`Input::range`]. The bounds set must be valid, or
+/// else a panic will occur. Bounds are valid if and only if:
+///
+/// * The bounds represent a valid range into the input's haystack.
+/// * **or** the end bound is a valid ending bound for the haystack *and*
+/// the start bound is exactly one greater than the end bound.
+///
+/// In the latter case, [`Input::is_done`] will return true and indicates any
+/// search receiving such an input should immediately return with no match.
+///
+/// Other than representing "search is complete," the `Input::span` and
+/// `Input::range` APIs are never necessary. Instead, callers can slice the
+/// haystack instead, e.g., with `&haystack[start..end]`. With that said, they
+/// can be more convenient than slicing because the match positions reported
+/// when using `Input::span` or `Input::range` are in terms of the original
+/// haystack. If you instead use `&haystack[start..end]`, then you'll need to
+/// add `start` to any match position returned in order for it to be a correct
+/// index into `haystack`.
+///
+/// # Example: `&str` and `&[u8]` automatically convert to an `Input`
+///
+/// There is a `From<&T> for Input` implementation for all `T: AsRef<[u8]>`.
+/// Additionally, the [`AhoCorasick`](crate::AhoCorasick) search APIs accept
+/// a `Into<Input>`. These two things combined together mean you can provide
+/// things like `&str` and `&[u8]` to search APIs when the defaults are
+/// suitable, but also an `Input` when they're not. For example:
+///
+/// ```
+/// use aho_corasick::{AhoCorasick, Anchored, Input, Match, StartKind};
+///
+/// // Build a searcher that supports both unanchored and anchored modes.
+/// let ac = AhoCorasick::builder()
+///     .start_kind(StartKind::Both)
+///     .build(&["abcd", "b"])
+///     .unwrap();
+/// let haystack = "abcd";
+///
+/// // A search using default parameters is unanchored. With standard
+/// // semantics, this finds `b` first.
+/// assert_eq!(
+///     Some(Match::must(1, 1..2)),
+///     ac.find(haystack),
+/// );
+/// // Using the same 'find' routine, we can provide an 'Input' explicitly
+/// // that is configured to do an anchored search. Since 'b' doesn't start
+/// // at the beginning of the search, it is not reported as a match.
+/// assert_eq!(
+///     Some(Match::must(0, 0..4)),
+///     ac.find(Input::new(haystack).anchored(Anchored::Yes)),
+/// );
+/// ```
+#[derive(Clone)]
+pub struct Input<'h> {
+    haystack: &'h [u8],
+    span: Span,
+    anchored: Anchored,
+    earliest: bool,
+}
+
+impl<'h> Input<'h> {
+    /// Create a new search configuration for the given haystack.
+    #[inline]
+    pub fn new<H: ?Sized + AsRef<[u8]>>(haystack: &'h H) -> Input<'h> {
+        Input {
+            haystack: haystack.as_ref(),
+            span: Span { start: 0, end: haystack.as_ref().len() },
+            anchored: Anchored::No,
+            earliest: false,
+        }
+    }
+
+    /// Set the span for this search.
+    ///
+    /// This routine is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`. To provide anything supported by range
+    /// syntax, use the [`Input::range`] method.
+    ///
+    /// The default span is the entire haystack.
+    ///
+    /// Note that [`Input::range`] overrides this method and vice versa.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given span does not correspond to valid bounds in
+    /// the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// This example shows how the span of the search can impact whether a
+    /// match is reported or not.
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Input, MatchKind};
+    ///
+    /// let patterns = &["b", "abcd", "abc"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let input = Input::new(haystack).span(0..3);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// // Without the span stopping the search early, 'abcd' would be reported
+    /// // because it is the correct leftmost-first match.
+    /// assert_eq!("abc", &haystack[mat.span()]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn span<S: Into<Span>>(mut self, span: S) -> Input<'h> {
+        self.set_span(span);
+        self
+    }
+
+    /// Like `Input::span`, but accepts any range instead.
+    ///
+    /// The default range is the entire haystack.
+    ///
+    /// Note that [`Input::span`] overrides this method and vice versa.
+    ///
+    /// # Panics
+    ///
+    /// This routine will panic if the given range could not be converted
+    /// to a valid [`Range`]. For example, this would panic when given
+    /// `0..=usize::MAX` since it cannot be represented using a half-open
+    /// interval in terms of `usize`.
+    ///
+    /// This routine also panics if the given range does not correspond to
+    /// valid bounds in the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    ///
+    /// let input = Input::new("foobar").range(2..=4);
+    /// assert_eq!(2..5, input.get_range());
+    /// ```
+    #[inline]
+    pub fn range<R: RangeBounds<usize>>(mut self, range: R) -> Input<'h> {
+        self.set_range(range);
+        self
+    }
+
+    /// Sets the anchor mode of a search.
+    ///
+    /// When a search is anchored (via [`Anchored::Yes`]), a match must begin
+    /// at the start of a search. When a search is not anchored (that's
+    /// [`Anchored::No`]), searchers will look for a match anywhere in the
+    /// haystack.
+    ///
+    /// By default, the anchored mode is [`Anchored::No`].
+    ///
+    /// # Support for anchored searches
+    ///
+    /// Anchored or unanchored searches might not always be available,
+    /// depending on the type of searcher used and its configuration:
+    ///
+    /// * [`noncontiguous::NFA`](crate::nfa::noncontiguous::NFA) always
+    /// supports both unanchored and anchored searches.
+    /// * [`contiguous::NFA`](crate::nfa::contiguous::NFA) always supports both
+    /// unanchored and anchored searches.
+    /// * [`dfa::DFA`](crate::dfa::DFA) supports only unanchored
+    /// searches by default.
+    /// [`dfa::Builder::start_kind`](crate::dfa::Builder::start_kind) can
+    /// be used to change the default to supporting both kinds of searches
+    /// or even just anchored searches.
+    /// * [`AhoCorasick`](crate::AhoCorasick) inherits the same setup as a
+    /// `DFA`. Namely, it only supports unanchored searches by default, but
+    /// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind)
+    /// can change this.
+    ///
+    /// If you try to execute a search using a `try_` ("fallible") method with
+    /// an unsupported anchor mode, then an error will be returned. For calls
+    /// to infallible search methods, a panic will result.
+    ///
+    /// # Example
+    ///
+    /// This demonstrates the differences between an anchored search and
+    /// an unanchored search. Notice that we build our `AhoCorasick` searcher
+    /// with [`StartKind::Both`] so that it supports both unanchored and
+    /// anchored searches simultaneously.
+    ///
+    /// ```
+    /// use aho_corasick::{
+    ///     AhoCorasick, Anchored, Input, MatchKind, StartKind,
+    /// };
+    ///
+    /// let patterns = &["bcd"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .start_kind(StartKind::Both)
+    ///     .build(patterns)
+    ///     .unwrap();
+    ///
+    /// // Note that 'Anchored::No' is the default, so it doesn't need to
+    /// // be explicitly specified here.
+    /// let input = Input::new(haystack);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// assert_eq!("bcd", &haystack[mat.span()]);
+    ///
+    /// // While 'bcd' occurs in the haystack, it does not begin where our
+    /// // search begins, so no match is found.
+    /// let input = Input::new(haystack).anchored(Anchored::Yes);
+    /// assert_eq!(None, ac.try_find(input)?);
+    ///
+    /// // However, if we start our search where 'bcd' starts, then we will
+    /// // find a match.
+    /// let input = Input::new(haystack).range(1..).anchored(Anchored::Yes);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// assert_eq!("bcd", &haystack[mat.span()]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn anchored(mut self, mode: Anchored) -> Input<'h> {
+        self.set_anchored(mode);
+        self
+    }
+
+    /// Whether to execute an "earliest" search or not.
+    ///
+    /// When running a non-overlapping search, an "earliest" search will
+    /// return the match location as early as possible. For example, given
+    /// the patterns `abc` and `b`, and a haystack of `abc`, a normal
+    /// leftmost-first search will return `abc` as a match. But an "earliest"
+    /// search will return as soon as it is known that a match occurs, which
+    /// happens once `b` is seen.
+    ///
+    /// Note that when using [`MatchKind::Standard`], the "earliest" option
+    /// has no effect since standard semantics are already "earliest." Note
+    /// also that this has no effect in overlapping searches, since overlapping
+    /// searches also use standard semantics and report all possible matches.
+    ///
+    /// This is disabled by default.
+    ///
+    /// # Example
+    ///
+    /// This example shows the difference between "earliest" searching and
+    /// normal leftmost searching.
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Anchored, Input, MatchKind, StartKind};
+    ///
+    /// let patterns = &["abc", "b"];
+    /// let haystack = "abc";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    ///
+    /// // The normal leftmost-first match.
+    /// let input = Input::new(haystack);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// assert_eq!("abc", &haystack[mat.span()]);
+    ///
+    /// // The "earliest" possible match, even if it isn't leftmost-first.
+    /// let input = Input::new(haystack).earliest(true);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// assert_eq!("b", &haystack[mat.span()]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn earliest(mut self, yes: bool) -> Input<'h> {
+        self.set_earliest(yes);
+        self
+    }
+
+    /// Set the span for this search configuration.
+    ///
+    /// This is like the [`Input::span`] method, except this mutates the
+    /// span in place.
+    ///
+    /// This routine is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given span does not correspond to valid bounds in
+    /// the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_span(2..4);
+    /// assert_eq!(2..4, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_span<S: Into<Span>>(&mut self, span: S) {
+        let span = span.into();
+        assert!(
+            span.end <= self.haystack.len()
+                && span.start <= span.end.wrapping_add(1),
+            "invalid span {:?} for haystack of length {}",
+            span,
+            self.haystack.len(),
+        );
+        self.span = span;
+    }
+
+    /// Set the span for this search configuration given any range.
+    ///
+    /// This is like the [`Input::range`] method, except this mutates the
+    /// span in place.
+    ///
+    /// # Panics
+    ///
+    /// This routine will panic if the given range could not be converted
+    /// to a valid [`Range`]. For example, this would panic when given
+    /// `0..=usize::MAX` since it cannot be represented using a half-open
+    /// interval in terms of `usize`.
+    ///
+    /// This routine also panics if the given range does not correspond to
+    /// valid bounds in the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_range(2..=4);
+    /// assert_eq!(2..5, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_range<R: RangeBounds<usize>>(&mut self, range: R) {
+        use core::ops::Bound;
+
+        // It's a little weird to convert ranges into spans, and then spans
+        // back into ranges when we actually slice the haystack. Because
+        // of that process, we always represent everything as a half-open
+        // internal. Therefore, handling things like m..=n is a little awkward.
+        let start = match range.start_bound() {
+            Bound::Included(&i) => i,
+            // Can this case ever happen? Range syntax doesn't support it...
+            Bound::Excluded(&i) => i.checked_add(1).unwrap(),
+            Bound::Unbounded => 0,
+        };
+        let end = match range.end_bound() {
+            Bound::Included(&i) => i.checked_add(1).unwrap(),
+            Bound::Excluded(&i) => i,
+            Bound::Unbounded => self.haystack().len(),
+        };
+        self.set_span(Span { start, end });
+    }
+
+    /// Set the starting offset for the span for this search configuration.
+    ///
+    /// This is a convenience routine for only mutating the start of a span
+    /// without having to set the entire span.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given span does not correspond to valid bounds in
+    /// the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_start(5);
+    /// assert_eq!(5..6, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_start(&mut self, start: usize) {
+        self.set_span(Span { start, ..self.get_span() });
+    }
+
+    /// Set the ending offset for the span for this search configuration.
+    ///
+    /// This is a convenience routine for only mutating the end of a span
+    /// without having to set the entire span.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given span does not correspond to valid bounds in
+    /// the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_end(5);
+    /// assert_eq!(0..5, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_end(&mut self, end: usize) {
+        self.set_span(Span { end, ..self.get_span() });
+    }
+
+    /// Set the anchor mode of a search.
+    ///
+    /// This is like [`Input::anchored`], except it mutates the search
+    /// configuration in place.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::{Anchored, Input};
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(Anchored::No, input.get_anchored());
+    ///
+    /// input.set_anchored(Anchored::Yes);
+    /// assert_eq!(Anchored::Yes, input.get_anchored());
+    /// ```
+    #[inline]
+    pub fn set_anchored(&mut self, mode: Anchored) {
+        self.anchored = mode;
+    }
+
+    /// Set whether the search should execute in "earliest" mode or not.
+    ///
+    /// This is like [`Input::earliest`], except it mutates the search
+    /// configuration in place.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert!(!input.get_earliest());
+    /// input.set_earliest(true);
+    /// assert!(input.get_earliest());
+    /// ```
+    #[inline]
+    pub fn set_earliest(&mut self, yes: bool) {
+        self.earliest = yes;
+    }
+
+    /// Return a borrow of the underlying haystack as a slice of bytes.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(b"foobar", input.haystack());
+    /// ```
+    #[inline]
+    pub fn haystack(&self) -> &[u8] {
+        self.haystack
+    }
+
+    /// Return the start position of this search.
+    ///
+    /// This is a convenience routine for `search.get_span().start()`.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(0, input.start());
+    ///
+    /// let input = Input::new("foobar").span(2..4);
+    /// assert_eq!(2, input.start());
+    /// ```
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.get_span().start
+    }
+
+    /// Return the end position of this search.
+    ///
+    /// This is a convenience routine for `search.get_span().end()`.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(6, input.end());
+    ///
+    /// let input = Input::new("foobar").span(2..4);
+    /// assert_eq!(4, input.end());
+    /// ```
+    #[inline]
+    pub fn end(&self) -> usize {
+        self.get_span().end
+    }
+
+    /// Return the span for this search configuration.
+    ///
+    /// If one was not explicitly set, then the span corresponds to the entire
+    /// range of the haystack.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::{Input, Span};
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(Span { start: 0, end: 6 }, input.get_span());
+    /// ```
+    #[inline]
+    pub fn get_span(&self) -> Span {
+        self.span
+    }
+
+    /// Return the span as a range for this search configuration.
+    ///
+    /// If one was not explicitly set, then the span corresponds to the entire
+    /// range of the haystack.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// ```
+    #[inline]
+    pub fn get_range(&self) -> Range<usize> {
+        self.get_span().range()
+    }
+
+    /// Return the anchored mode for this search configuration.
+    ///
+    /// If no anchored mode was set, then it defaults to [`Anchored::No`].
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::{Anchored, Input};
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(Anchored::No, input.get_anchored());
+    ///
+    /// input.set_anchored(Anchored::Yes);
+    /// assert_eq!(Anchored::Yes, input.get_anchored());
+    /// ```
+    #[inline]
+    pub fn get_anchored(&self) -> Anchored {
+        self.anchored
+    }
+
+    /// Return whether this search should execute in "earliest" mode.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert!(!input.get_earliest());
+    /// ```
+    #[inline]
+    pub fn get_earliest(&self) -> bool {
+        self.earliest
+    }
+
+    /// Return true if this input has been exhausted, which in turn means all
+    /// subsequent searches will return no matches.
+    ///
+    /// This occurs precisely when the start position of this search is greater
+    /// than the end position of the search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert!(!input.is_done());
+    /// input.set_start(6);
+    /// assert!(!input.is_done());
+    /// input.set_start(7);
+    /// assert!(input.is_done());
+    /// ```
+    #[inline]
+    pub fn is_done(&self) -> bool {
+        self.get_span().start > self.get_span().end
+    }
+}
+
+impl<'h> core::fmt::Debug for Input<'h> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let mut fmter = f.debug_struct("Input");
+        match core::str::from_utf8(self.haystack()) {
+            Ok(nice) => fmter.field("haystack", &nice),
+            Err(_) => fmter.field("haystack", &self.haystack()),
+        }
+        .field("span", &self.span)
+        .field("anchored", &self.anchored)
+        .field("earliest", &self.earliest)
+        .finish()
+    }
+}
+
+impl<'h, H: ?Sized + AsRef<[u8]>> From<&'h H> for Input<'h> {
+    #[inline]
+    fn from(haystack: &'h H) -> Input<'h> {
+        Input::new(haystack)
+    }
+}
+
+/// A representation of a range in a haystack.
+///
+/// A span corresponds to the starting and ending _byte offsets_ of a
+/// contiguous region of bytes. The starting offset is inclusive while the
+/// ending offset is exclusive. That is, a span is a half-open interval.
+///
+/// A span is used to report the offsets of a match, but it is also used to
+/// convey which region of a haystack should be searched via routines like
+/// [`Input::span`].
+///
+/// This is basically equivalent to a `std::ops::Range<usize>`, except this
+/// type implements `Copy` which makes it more ergonomic to use in the context
+/// of this crate. Indeed, `Span` exists only because `Range<usize>` does
+/// not implement `Copy`. Like a range, this implements `Index` for `[u8]`
+/// and `str`, and `IndexMut` for `[u8]`. For convenience, this also impls
+/// `From<Range>`, which means things like `Span::from(5..10)` work.
+///
+/// There are no constraints on the values of a span. It is, for example, legal
+/// to create a span where `start > end`.
+#[derive(Clone, Copy, Eq, Hash, PartialEq)]
+pub struct Span {
+    /// The start offset of the span, inclusive.
+    pub start: usize,
+    /// The end offset of the span, exclusive.
+    pub end: usize,
+}
+
+impl Span {
+    /// Returns this span as a range.
+    #[inline]
+    pub fn range(&self) -> Range<usize> {
+        Range::from(*self)
+    }
+
+    /// Returns true when this span is empty. That is, when `start >= end`.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.start >= self.end
+    }
+
+    /// Returns the length of this span.
+    ///
+    /// This returns `0` in precisely the cases that `is_empty` returns `true`.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.end.saturating_sub(self.start)
+    }
+
+    /// Returns true when the given offset is contained within this span.
+    ///
+    /// Note that an empty span contains no offsets and will always return
+    /// false.
+    #[inline]
+    pub fn contains(&self, offset: usize) -> bool {
+        !self.is_empty() && self.start <= offset && offset <= self.end
+    }
+
+    /// Returns a new span with `offset` added to this span's `start` and `end`
+    /// values.
+    #[inline]
+    pub fn offset(&self, offset: usize) -> Span {
+        Span { start: self.start + offset, end: self.end + offset }
+    }
+}
+
+impl core::fmt::Debug for Span {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "{}..{}", self.start, self.end)
+    }
+}
+
+impl core::ops::Index<Span> for [u8] {
+    type Output = [u8];
+
+    #[inline]
+    fn index(&self, index: Span) -> &[u8] {
+        &self[index.range()]
+    }
+}
+
+impl core::ops::IndexMut<Span> for [u8] {
+    #[inline]
+    fn index_mut(&mut self, index: Span) -> &mut [u8] {
+        &mut self[index.range()]
+    }
+}
+
+impl core::ops::Index<Span> for str {
+    type Output = str;
+
+    #[inline]
+    fn index(&self, index: Span) -> &str {
+        &self[index.range()]
+    }
+}
+
+impl From<Range<usize>> for Span {
+    #[inline]
+    fn from(range: Range<usize>) -> Span {
+        Span { start: range.start, end: range.end }
+    }
+}
+
+impl From<Span> for Range<usize> {
+    #[inline]
+    fn from(span: Span) -> Range<usize> {
+        Range { start: span.start, end: span.end }
+    }
+}
+
+impl PartialEq<Range<usize>> for Span {
+    #[inline]
+    fn eq(&self, range: &Range<usize>) -> bool {
+        self.start == range.start && self.end == range.end
+    }
+}
+
+impl PartialEq<Span> for Range<usize> {
+    #[inline]
+    fn eq(&self, span: &Span) -> bool {
+        self.start == span.start && self.end == span.end
+    }
+}
+
+/// The type of anchored search to perform.
+///
+/// If an Aho-Corasick searcher does not support the anchored mode selected,
+/// then the search will return an error or panic, depending on whether a
+/// fallible or an infallible routine was called.
+#[non_exhaustive]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Anchored {
+    /// Run an unanchored search. This means a match may occur anywhere at or
+    /// after the start position of the search up until the end position of the
+    /// search.
+    No,
+    /// Run an anchored search. This means that a match must begin at the start
+    /// position of the search and end before the end position of the search.
+    Yes,
+}
+
+impl Anchored {
+    /// Returns true if and only if this anchor mode corresponds to an anchored
+    /// search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Anchored;
+    ///
+    /// assert!(!Anchored::No.is_anchored());
+    /// assert!(Anchored::Yes.is_anchored());
+    /// ```
+    #[inline]
+    pub fn is_anchored(&self) -> bool {
+        matches!(*self, Anchored::Yes)
+    }
+}
+
+/// A representation of a match reported by an Aho-Corasick searcher.
+///
+/// A match has two essential pieces of information: the [`PatternID`] that
+/// matches, and the [`Span`] of the match in a haystack.
+///
+/// The pattern is identified by an ID, which corresponds to its position
+/// (starting from `0`) relative to other patterns used to construct the
+/// corresponding searcher. If only a single pattern is provided, then all
+/// matches are guaranteed to have a pattern ID of `0`.
+///
+/// Every match reported by a searcher guarantees that its span has its start
+/// offset as less than or equal to its end offset.
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct Match {
+    /// The pattern ID.
+    pattern: PatternID,
+    /// The underlying match span.
+    span: Span,
+}
+
+impl Match {
+    /// Create a new match from a pattern ID and a span.
+    ///
+    /// This constructor is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `end < start`.
+    ///
+    /// # Example
+    ///
+    /// This shows how to create a match for the first pattern in an
+    /// Aho-Corasick searcher using convenient range syntax.
+    ///
+    /// ```
+    /// use aho_corasick::{Match, PatternID};
+    ///
+    /// let m = Match::new(PatternID::ZERO, 5..10);
+    /// assert_eq!(0, m.pattern().as_usize());
+    /// assert_eq!(5, m.start());
+    /// assert_eq!(10, m.end());
+    /// ```
+    #[inline]
+    pub fn new<S: Into<Span>>(pattern: PatternID, span: S) -> Match {
+        let span = span.into();
+        assert!(span.start <= span.end, "invalid match span");
+        Match { pattern, span }
+    }
+
+    /// Create a new match from a pattern ID and a byte offset span.
+    ///
+    /// This constructor is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`.
+    ///
+    /// This is like [`Match::new`], but accepts a `usize` instead of a
+    /// [`PatternID`]. This panics if the given `usize` is not representable
+    /// as a `PatternID`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `end < start` or if `pattern > PatternID::MAX`.
+    ///
+    /// # Example
+    ///
+    /// This shows how to create a match for the third pattern in an
+    /// Aho-Corasick searcher using convenient range syntax.
+    ///
+    /// ```
+    /// use aho_corasick::Match;
+    ///
+    /// let m = Match::must(3, 5..10);
+    /// assert_eq!(3, m.pattern().as_usize());
+    /// assert_eq!(5, m.start());
+    /// assert_eq!(10, m.end());
+    /// ```
+    #[inline]
+    pub fn must<S: Into<Span>>(pattern: usize, span: S) -> Match {
+        Match::new(PatternID::must(pattern), span)
+    }
+
+    /// Returns the ID of the pattern that matched.
+    ///
+    /// The ID of a pattern is derived from the position in which it was
+    /// originally inserted into the corresponding searcher. The first pattern
+    /// has identifier `0`, and each subsequent pattern is `1`, `2` and so on.
+    #[inline]
+    pub fn pattern(&self) -> PatternID {
+        self.pattern
+    }
+
+    /// The starting position of the match.
+    ///
+    /// This is a convenience routine for `Match::span().start`.
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.span().start
+    }
+
+    /// The ending position of the match.
+    ///
+    /// This is a convenience routine for `Match::span().end`.
+    #[inline]
+    pub fn end(&self) -> usize {
+        self.span().end
+    }
+
+    /// Returns the match span as a range.
+    ///
+    /// This is a convenience routine for `Match::span().range()`.
+    #[inline]
+    pub fn range(&self) -> core::ops::Range<usize> {
+        self.span().range()
+    }
+
+    /// Returns the span for this match.
+    #[inline]
+    pub fn span(&self) -> Span {
+        self.span
+    }
+
+    /// Returns true when the span in this match is empty.
+    ///
+    /// An empty match can only be returned when empty pattern is in the
+    /// Aho-Corasick searcher.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.span().is_empty()
+    }
+
+    /// Returns the length of this match.
+    ///
+    /// This returns `0` in precisely the cases that `is_empty` returns `true`.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.span().len()
+    }
+
+    /// Returns a new match with `offset` added to its span's `start` and `end`
+    /// values.
+    #[inline]
+    pub fn offset(&self, offset: usize) -> Match {
+        Match {
+            pattern: self.pattern,
+            span: Span {
+                start: self.start() + offset,
+                end: self.end() + offset,
+            },
+        }
+    }
+}
+
+/// A knob for controlling the match semantics of an Aho-Corasick automaton.
+///
+/// There are two generally different ways that Aho-Corasick automatons can
+/// report matches. The first way is the "standard" approach that results from
+/// implementing most textbook explanations of Aho-Corasick. The second way is
+/// to report only the leftmost non-overlapping matches. The leftmost approach
+/// is in turn split into two different ways of resolving ambiguous matches:
+/// leftmost-first and leftmost-longest.
+///
+/// The `Standard` match kind is the default and is the only one that supports
+/// overlapping matches and stream searching. (Trying to find overlapping or
+/// streaming matches using leftmost match semantics will result in an error in
+/// fallible APIs and a panic when using infallibe APIs.) The `Standard` match
+/// kind will report matches as they are seen. When searching for overlapping
+/// matches, then all possible matches are reported. When searching for
+/// non-overlapping matches, the first match seen is reported. For example, for
+/// non-overlapping matches, given the patterns `abcd` and `b` and the haystack
+/// `abcdef`, only a match for `b` is reported since it is detected first. The
+/// `abcd` match is never reported since it overlaps with the `b` match.
+///
+/// In contrast, the leftmost match kind always prefers the leftmost match
+/// among all possible matches. Given the same example as above with `abcd` and
+/// `b` as patterns and `abcdef` as the haystack, the leftmost match is `abcd`
+/// since it begins before the `b` match, even though the `b` match is detected
+/// before the `abcd` match. In this case, the `b` match is not reported at all
+/// since it overlaps with the `abcd` match.
+///
+/// The difference between leftmost-first and leftmost-longest is in how they
+/// resolve ambiguous matches when there are multiple leftmost matches to
+/// choose from. Leftmost-first always chooses the pattern that was provided
+/// earliest, where as leftmost-longest always chooses the longest matching
+/// pattern. For example, given the patterns `a` and `ab` and the subject
+/// string `ab`, the leftmost-first match is `a` but the leftmost-longest match
+/// is `ab`. Conversely, if the patterns were given in reverse order, i.e.,
+/// `ab` and `a`, then both the leftmost-first and leftmost-longest matches
+/// would be `ab`. Stated differently, the leftmost-first match depends on the
+/// order in which the patterns were given to the Aho-Corasick automaton.
+/// Because of that, when leftmost-first matching is used, if a pattern `A`
+/// that appears before a pattern `B` is a prefix of `B`, then it is impossible
+/// to ever observe a match of `B`.
+///
+/// If you're not sure which match kind to pick, then stick with the standard
+/// kind, which is the default. In particular, if you need overlapping or
+/// streaming matches, then you _must_ use the standard kind. The leftmost
+/// kinds are useful in specific circumstances. For example, leftmost-first can
+/// be very useful as a way to implement match priority based on the order of
+/// patterns given and leftmost-longest can be useful for dictionary searching
+/// such that only the longest matching words are reported.
+///
+/// # Relationship with regular expression alternations
+///
+/// Understanding match semantics can be a little tricky, and one easy way
+/// to conceptualize non-overlapping matches from an Aho-Corasick automaton
+/// is to think about them as a simple alternation of literals in a regular
+/// expression. For example, let's say we wanted to match the strings
+/// `Sam` and `Samwise`, which would turn into the regex `Sam|Samwise`. It
+/// turns out that regular expression engines have two different ways of
+/// matching this alternation. The first way, leftmost-longest, is commonly
+/// found in POSIX compatible implementations of regular expressions (such as
+/// `grep`). The second way, leftmost-first, is commonly found in backtracking
+/// implementations such as Perl. (Some regex engines, such as RE2 and Rust's
+/// regex engine do not use backtracking, but still implement leftmost-first
+/// semantics in an effort to match the behavior of dominant backtracking
+/// regex engines such as those found in Perl, Ruby, Python, Javascript and
+/// PHP.)
+///
+/// That is, when matching `Sam|Samwise` against `Samwise`, a POSIX regex
+/// will match `Samwise` because it is the longest possible match, but a
+/// Perl-like regex will match `Sam` since it appears earlier in the
+/// alternation. Indeed, the regex `Sam|Samwise` in a Perl-like regex engine
+/// will never match `Samwise` since `Sam` will always have higher priority.
+/// Conversely, matching the regex `Samwise|Sam` against `Samwise` will lead to
+/// a match of `Samwise` in both POSIX and Perl-like regexes since `Samwise` is
+/// still longest match, but it also appears earlier than `Sam`.
+///
+/// The "standard" match semantics of Aho-Corasick generally don't correspond
+/// to the match semantics of any large group of regex implementations, so
+/// there's no direct analogy that can be made here. Standard match semantics
+/// are generally useful for overlapping matches, or if you just want to see
+/// matches as they are detected.
+///
+/// The main conclusion to draw from this section is that the match semantics
+/// can be tweaked to precisely match either Perl-like regex alternations or
+/// POSIX regex alternations.
+#[non_exhaustive]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum MatchKind {
+    /// Use standard match semantics, which support overlapping matches. When
+    /// used with non-overlapping matches, matches are reported as they are
+    /// seen.
+    Standard,
+    /// Use leftmost-first match semantics, which reports leftmost matches.
+    /// When there are multiple possible leftmost matches, the match
+    /// corresponding to the pattern that appeared earlier when constructing
+    /// the automaton is reported.
+    ///
+    /// This does **not** support overlapping matches or stream searching. If
+    /// this match kind is used, attempting to find overlapping matches or
+    /// stream matches will fail.
+    LeftmostFirst,
+    /// Use leftmost-longest match semantics, which reports leftmost matches.
+    /// When there are multiple possible leftmost matches, the longest match
+    /// is chosen.
+    ///
+    /// This does **not** support overlapping matches or stream searching. If
+    /// this match kind is used, attempting to find overlapping matches or
+    /// stream matches will fail.
+    LeftmostLongest,
+}
+
+/// The default match kind is `MatchKind::Standard`.
+impl Default for MatchKind {
+    fn default() -> MatchKind {
+        MatchKind::Standard
+    }
+}
+
+impl MatchKind {
+    #[inline]
+    pub(crate) fn is_standard(&self) -> bool {
+        matches!(*self, MatchKind::Standard)
+    }
+
+    #[inline]
+    pub(crate) fn is_leftmost(&self) -> bool {
+        matches!(*self, MatchKind::LeftmostFirst | MatchKind::LeftmostLongest)
+    }
+
+    #[inline]
+    pub(crate) fn is_leftmost_first(&self) -> bool {
+        matches!(*self, MatchKind::LeftmostFirst)
+    }
+
+    /// Convert this match kind into a packed match kind. If this match kind
+    /// corresponds to standard semantics, then this returns None, since
+    /// packed searching does not support standard semantics.
+    #[inline]
+    pub(crate) fn as_packed(&self) -> Option<crate::packed::MatchKind> {
+        match *self {
+            MatchKind::Standard => None,
+            MatchKind::LeftmostFirst => {
+                Some(crate::packed::MatchKind::LeftmostFirst)
+            }
+            MatchKind::LeftmostLongest => {
+                Some(crate::packed::MatchKind::LeftmostLongest)
+            }
+        }
+    }
+}
+
+/// The kind of anchored starting configurations to support in an Aho-Corasick
+/// searcher.
+///
+/// Depending on which searcher is used internally by
+/// [`AhoCorasick`](crate::AhoCorasick), supporting both unanchored
+/// and anchored searches can be quite costly. For this reason,
+/// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind)
+/// can be used to configure whether your searcher supports unanchored,
+/// anchored or both kinds of searches.
+///
+/// This searcher configuration knob works in concert with the search time
+/// configuration [`Input::anchored`]. Namely, if one requests an unsupported
+/// anchored mode, then the search will either panic or return an error,
+/// depending on whether you're using infallible or fallibe APIs, respectively.
+///
+/// `AhoCorasick` by default only supports unanchored searches.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum StartKind {
+    /// Support both anchored and unanchored searches.
+    Both,
+    /// Support only unanchored searches. Requesting an anchored search will
+    /// return an error in fallible APIs and panic in infallible APIs.
+    Unanchored,
+    /// Support only anchored searches. Requesting an unanchored search will
+    /// return an error in fallible APIs and panic in infallible APIs.
+    Anchored,
+}
+
+impl Default for StartKind {
+    fn default() -> StartKind {
+        StartKind::Unanchored
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/special.rs b/third_party/rust/aho-corasick/src/util/special.rs
new file mode 100644
index 0000000000..beeba40c89
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/special.rs
@@ -0,0 +1,42 @@
+use crate::util::primitives::StateID;
+
+/// A collection of sentinel state IDs for Aho-Corasick automata.
+///
+/// This specifically enables the technique by which we determine which states
+/// are dead, matches or start states. Namely, by arranging states in a
+/// particular order, we can determine the type of a state simply by looking at
+/// its ID.
+#[derive(Clone, Debug)]
+pub(crate) struct Special {
+    /// The maximum ID of all the "special" states. This corresponds either to
+    /// start_anchored_id when a prefilter is active and max_match_id when a
+    /// prefilter is not active. The idea here is that if there is no prefilter,
+    /// then there is no point in treating start states as special.
+    pub(crate) max_special_id: StateID,
+    /// The maximum ID of all the match states. Any state ID bigger than this
+    /// is guaranteed to be a non-match ID.
+    ///
+    /// It is possible and legal for max_match_id to be equal to
+    /// start_anchored_id, which occurs precisely in the case where the empty
+    /// string is a pattern that was added to the underlying automaton.
+    pub(crate) max_match_id: StateID,
+    /// The state ID of the start state used for unanchored searches.
+    pub(crate) start_unanchored_id: StateID,
+    /// The state ID of the start state used for anchored searches. This is
+    /// always start_unanchored_id+1.
+    pub(crate) start_anchored_id: StateID,
+}
+
+impl Special {
+    /// Create a new set of "special" state IDs with all IDs initialized to
+    /// zero. The general idea here is that they will be updated and set to
+    /// correct values later.
+    pub(crate) fn zero() -> Special {
+        Special {
+            max_special_id: StateID::ZERO,
+            max_match_id: StateID::ZERO,
+            start_unanchored_id: StateID::ZERO,
+            start_anchored_id: StateID::ZERO,
+        }
+    }
+}