Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/aho-corasick
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
41 files changed, 22055 insertions, 0 deletions
diff --git a/third_party/rust/aho-corasick/.cargo-checksum.json b/third_party/rust/aho-corasick/.cargo-checksum.json
new file mode 100644
index 0000000000..233f8202c8
--- /dev/null
+++ b/third_party/rust/aho-corasick/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"55608b09e18c96a0c245d8af2546e63bb4576fa378c1f2ce38c7909f3b225007","DESIGN.md":"59c960e1b73b1d7fb41e4df6c0c1b1fcf44dd2ebc8a349597a7d0595f8cb5130","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"afc4d559a98cf190029af0bf320fc0022725e349cd2a303aac860254e28f3c53","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"c699c07df70be45c666e128509ad571a7649d2073e4ae16ac1efd6793c9c6890","src/automaton.rs":"22258a3e118672413119f8f543a9b912cce954e63524575c0ebfdf9011f9c2dd","src/dfa.rs":"c495d615545981e1d0a4174bf0a6ab87fd81c30c7d4527acf2d64a56323bdc36","src/lib.rs":"2a92d5c5e930f2d306508802e8a929135e1f41c9f5f8deda8f7eb98947179dd2","src/macros.rs":"c6c52ae05b24433cffaca7b78b3645d797862c5d5feffddf9f54909095ed6e05","src/nfa/contiguous.rs":"aeb6ee5fd80eea04decbc4b46aa27d1ab270b78d416a644da25b7934f009ee66","src/nfa/mod.rs":"ee7b3109774d14bbad5239c16bb980dd6b8185ec136d94fbaf2f0dc27d5ffa15","src/nfa/noncontiguous.rs":"de94f02b04efd8744fb096759a8897c22012b0e0ca3ace161fd87c71befefe04","src/packed/api.rs":"160d3b10823316f7b0924e13c3afd222c8a7db5c0a00432401f311ef27d6a1b7","src/packed/ext.rs":"66be06fde8558429da23a290584d4b9fae665bf64c2578db4fe5f5f3ee864869","src/packed/mod.rs":"0020cd6f07ba5c8955923a9516d7f758864260eda53a6b6f629131c45ddeec62","src/packed/pattern.rs":"1e3a289a730c141fc30b295811e372d046c6619c7fd670308299b889a06c7673","src/packed/rabinkarp.rs":"403146eb1d838a84601d171393542340513cd1ee7ff750f2372161dd47746586","src/packed/teddy/README.md":"3a43194b64e221543d885176aba3beb1224a927385a20eca842daf6b0ea2f342","src/packed/teddy/builder.rs":"720735ea6c7ff92b081426513e6e82feed24a922849297bb538d28f7b8129f81","src/packed/teddy/generic.rs":"ea252ab05b32cea7dd9d71e332071d243db7dd0362e049252a27e5881ba2bf39","src/packed/teddy/mod.rs":"17d741f7e2fb9dbac5ba7d1bd4542cf1e35e9f146ace728e23fe6bbed20028b2","src/packed/tests.rs":"8e2f56eb3890ed3876ecb47d3121996e416563127b6430110d7b516df3f83b4b","src/packed/vector.rs":"6e0400422de015e181c758ef3a4ff517fc8d0481b078a82de00f6e29e9d2e1c8","src/tests.rs":"c68192ab97b6161d0d6ee96fefd80cc7d14e4486ddcd8d1f82b5c92432c24ed5","src/transducer.rs":"02daa33a5d6dac41dcfd67f51df7c0d4a91c5131c781fb54c4de3520c585a6e1","src/util/alphabet.rs":"6dc22658a38deddc0279892035b18870d4585069e35ba7c7e649a24509acfbcc","src/util/buffer.rs":"f9e37f662c46c6ecd734458dedbe76c3bb0e84a93b6b0117c0d4ad3042413891","src/util/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/util/debug.rs":"ab301ad59aa912529cb97233a54a05914dd3cb2ec43e6fec7334170b97ac5998","src/util/error.rs":"ecccd60e7406305023efcc6adcc826eeeb083ab8f7fbfe3d97469438cd4c4e5c","src/util/int.rs":"4ab6dbdba10027ddec2af63a9b28ce4eee30ded0daa5d8eb068b2b55542b6039","src/util/mod.rs":"7ab28d11323ecdbd982087f32eb8bceeee84f1a2583f3aae27039c36d58cf12c","src/util/prefilter.rs":"9fa4498f18bf70478b1996c1a013698b626d15f119aa81dbc536673c9f045718","src/util/primitives.rs":"f89f3fa1d8db4e37de9ca767c6d05e346404837cade6d063bba68972fafa610b","src/util/remapper.rs":"9f12d911583a325c11806eeceb46d0dfec863cfcfa241aed84d31af73da746e5","src/util/search.rs":"6af803e08b8b8c8a33db100623f1621b0d741616524ce40893d8316897f27ffe","src/util/special.rs":"7d2f9cb9dd9771f59816e829b2d96b1239996f32939ba98764e121696c52b146"},"package":"0f2135563fb5c609d2b2b87c1e8ce7bc41b0b45430fa9661f457981503dd5bf0"}
+\ No newline at end of file
diff --git a/third_party/rust/aho-corasick/COPYING b/third_party/rust/aho-corasick/COPYING
new file mode 100644
index 0000000000..bb9c20a094
--- /dev/null
+++ b/third_party/rust/aho-corasick/COPYING
@@ -0,0 +1,3 @@
+This project is dual-licensed under the Unlicense and MIT licenses.
+
+You may use this code under the terms of either license.
diff --git a/third_party/rust/aho-corasick/Cargo.toml b/third_party/rust/aho-corasick/Cargo.toml
new file mode 100644
index 0000000000..f2ebca9b9b
--- /dev/null
+++ b/third_party/rust/aho-corasick/Cargo.toml
@@ -0,0 +1,74 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+rust-version = "1.60.0"
+name = "aho-corasick"
+version = "1.1.0"
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+exclude = [
+    "/aho-corasick-debug",
+    "/benchmarks",
+    "/tmp",
+]
+autotests = false
+description = "Fast multiple substring searching."
+homepage = "https://github.com/BurntSushi/aho-corasick"
+readme = "README.md"
+keywords = [
+    "string",
+    "search",
+    "text",
+    "pattern",
+    "multi",
+]
+categories = ["text-processing"]
+license = "Unlicense OR MIT"
+repository = "https://github.com/BurntSushi/aho-corasick"
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = [
+    "--cfg",
+    "docsrs",
+    "--generate-link-to-definition",
+]
+
+[profile.bench]
+debug = 2
+
+[profile.release]
+debug = 2
+
+[lib]
+name = "aho_corasick"
+
+[dependencies.log]
+version = "0.4.17"
+optional = true
+
+[dependencies.memchr]
+version = "2.4.0"
+optional = true
+default-features = false
+
+[dev-dependencies.doc-comment]
+version = "0.3.3"
+
+[features]
+default = [
+    "std",
+    "perf-literal",
+]
+logging = ["dep:log"]
+perf-literal = ["dep:memchr"]
+std = ["memchr?/std"]
diff --git a/third_party/rust/aho-corasick/DESIGN.md b/third_party/rust/aho-corasick/DESIGN.md
new file mode 100644
index 0000000000..f911f0c3ad
--- /dev/null
+++ b/third_party/rust/aho-corasick/DESIGN.md
@@ -0,0 +1,481 @@
+This document describes the internal design of this crate, which is an object
+lesson in what happens when you take a fairly simple old algorithm like
+Aho-Corasick and make it fast and production ready.
+
+The target audience of this document is Rust programmers that have some
+familiarity with string searching, however, one does not need to know the
+Aho-Corasick algorithm in order to read this (it is explained below). One
+should, however, know what a trie is. (If you don't, go read its Wikipedia
+article.)
+
+The center-piece of this crate is an implementation of Aho-Corasick. On its
+own, Aho-Corasick isn't that complicated. The complex pieces come from the
+different variants of Aho-Corasick implemented in this crate. Specifically,
+they are:
+
+* Aho-Corasick as a noncontiguous NFA. States have their transitions
+  represented sparsely, and each state puts its transitions in its own separate
+  allocation. Hence the same "noncontiguous."
+* Aho-Corasick as a contiguous NFA. This NFA uses a single allocation to
+  represent the transitions of all states. That is, transitions are laid out
+  contiguously in memory. Moreover, states near the starting state are
+  represented densely, such that finding the next state ID takes a constant
+  number of instructions.
+* Aho-Corasick as a DFA. In this case, all states are represented densely in
+  a transition table that uses one allocation.
+* Supporting "standard" match semantics, along with its overlapping variant,
+  in addition to leftmost-first and leftmost-longest semantics. The "standard"
+  semantics are typically what you see in a textbook description of
+  Aho-Corasick. However, Aho-Corasick is also useful as an optimization in
+  regex engines, which often use leftmost-first or leftmost-longest semantics.
+  Thus, it is useful to implement those semantics here. The "standard" and
+  "leftmost" search algorithms are subtly different, and also require slightly
+  different construction algorithms.
+* Support for ASCII case insensitive matching.
+* Support for accelerating searches when the patterns all start with a small
+  number of fixed bytes. Or alternatively, when the  patterns all contain a
+  small number of rare bytes. (Searching for these bytes uses SIMD vectorized
+  code courtesy of `memchr`.)
+* Transparent support for alternative SIMD vectorized search routines for
+  smaller number of literals, such as the Teddy algorithm. We called these
+  "packed" search routines because they use SIMD. They can often be an order of
+  magnitude faster than just Aho-Corasick, but don't scale as well.
+* Support for searching streams. This can reuse most of the underlying code,
+  but does require careful buffering support.
+* Support for anchored searches, which permit efficient "is prefix" checks for
+  a large number of patterns.
+
+When you combine all of this together along with trying to make everything as
+fast as possible, what you end up with is enitrely too much code with too much
+`unsafe`. Alas, I was not smart enough to figure out how to reduce it. Instead,
+we will explain it.
+
+
+# Basics
+
+The fundamental problem this crate is trying to solve is to determine the
+occurrences of possibly many patterns in a haystack. The naive way to solve
+this is to look for a match for each pattern at each position in the haystack:
+
+    for i in 0..haystack.len():
+      for p in patterns.iter():
+        if haystack[i..].starts_with(p.bytes()):
+          return Match(p.id(), i, i + p.bytes().len())
+
+Those four lines are effectively all this crate does. The problem with those
+four lines is that they are very slow, especially when you're searching for a
+large number of patterns.
+
+While there are many different algorithms available to solve this, a popular
+one is Aho-Corasick. It's a common solution because it's not too hard to
+implement, scales quite well even when searching for thousands of patterns and
+is generally pretty fast. Aho-Corasick does well here because, regardless of
+the number of patterns you're searching for, it always visits each byte in the
+haystack exactly once. This means, generally speaking, adding more patterns to
+an Aho-Corasick automaton does not make it slower. (Strictly speaking, however,
+this is not true, since a larger automaton will make less effective use of the
+CPU's cache.)
+
+Aho-Corasick can be succinctly described as a trie with state transitions
+between some of the nodes that efficiently instruct the search algorithm to
+try matching alternative keys in the trie. The trick is that these state
+transitions are arranged such that each byte of input needs to be inspected
+only once. These state transitions are typically called "failure transitions,"
+because they instruct the searcher (the thing traversing the automaton while
+reading from the haystack) what to do when a byte in the haystack does not
+correspond to a valid transition in the current state of the trie.
+
+More formally, a failure transition points to a state in the automaton that may
+lead to a match whose prefix is a proper suffix of the path traversed through
+the trie so far. (If no such proper suffix exists, then the failure transition
+points back to the start state of the trie, effectively restarting the search.)
+This is perhaps simpler to explain pictorally. For example, let's say we built
+an Aho-Corasick automaton with the following patterns: 'abcd' and 'cef'. The
+trie looks like this:
+
+         a - S1 - b - S2 - c - S3 - d - S4*
+        /
+    S0 - c - S5 - e - S6 - f - S7*
+
+where states marked with a `*` are match states (meaning, the search algorithm
+should stop and report a match to the caller).
+
+So given this trie, it should be somewhat straight-forward to see how it can
+be used to determine whether any particular haystack *starts* with either
+`abcd` or `cef`. It's easy to express this in code:
+
+    fn has_prefix(trie: &Trie, haystack: &[u8]) -> bool {
+      let mut state_id = trie.start();
+      // If the empty pattern is in trie, then state_id is a match state.
+      if trie.is_match(state_id) {
+        return true;
+      }
+      for (i, &b) in haystack.iter().enumerate() {
+        state_id = match trie.next_state(state_id, b) {
+          Some(id) => id,
+          // If there was no transition for this state and byte, then we know
+          // the haystack does not start with one of the patterns in our trie.
+          None => return false,
+        };
+        if trie.is_match(state_id) {
+          return true;
+        }
+      }
+      false
+    }
+
+And that's pretty much it. All we do is move through the trie starting with the
+bytes at the beginning of the haystack. If we find ourselves in a position
+where we can't move, or if we've looked through the entire haystack without
+seeing a match state, then we know the haystack does not start with any of the
+patterns in the trie.
+
+The meat of the Aho-Corasick algorithm is in how we add failure transitions to
+our trie to keep searching efficient. Specifically, it permits us to not only
+check whether a haystack *starts* with any one of a number of patterns, but
+rather, whether the haystack contains any of a number of patterns *anywhere* in
+the haystack.
+
+As mentioned before, failure transitions connect a proper suffix of the path
+traversed through the trie before, with a path that leads to a match that has a
+prefix corresponding to that proper suffix. So in our case, for patterns `abcd`
+and `cef`, with a haystack `abcef`, we want to transition to state `S5` (from
+the diagram above) from `S3` upon seeing that the byte following `c` is not
+`d`. Namely, the proper suffix in this example is `c`, which is a prefix of
+`cef`. So the modified diagram looks like this:
+
+
+         a - S1 - b - S2 - c - S3 - d - S4*
+        /                      /
+       /       ----------------
+      /       /
+    S0 - c - S5 - e - S6 - f - S7*
+
+One thing that isn't shown in this diagram is that *all* states have a failure
+transition, but only `S3` has a *non-trivial* failure transition. That is, all
+other states have a failure transition back to the start state. So if our
+haystack was `abzabcd`, then the searcher would transition back to `S0` after
+seeing `z`, which effectively restarts the search. (Because there is no pattern
+in our trie that has a prefix of `bz` or `z`.)
+
+The code for traversing this *automaton* or *finite state machine* (it is no
+longer just a trie) is not that much different from the `has_prefix` code
+above:
+
+    fn contains(fsm: &FiniteStateMachine, haystack: &[u8]) -> bool {
+      let mut state_id = fsm.start();
+      // If the empty pattern is in fsm, then state_id is a match state.
+      if fsm.is_match(state_id) {
+        return true;
+      }
+      for (i, &b) in haystack.iter().enumerate() {
+        // While the diagram above doesn't show this, we may wind up needing
+        // to follow multiple failure transitions before we land on a state
+        // in which we can advance. Therefore, when searching for the next
+        // state, we need to loop until we don't see a failure transition.
+        //
+        // This loop terminates because the start state has no empty
+        // transitions. Every transition from the start state either points to
+        // another state, or loops back to the start state.
+        loop {
+          match fsm.next_state(state_id, b) {
+            Some(id) => {
+              state_id = id;
+              break;
+            }
+            // Unlike our code above, if there was no transition for this
+            // state, then we don't quit. Instead, we look for this state's
+            // failure transition and follow that instead.
+            None => {
+              state_id = fsm.next_fail_state(state_id);
+            }
+          };
+        }
+        if fsm.is_match(state_id) {
+          return true;
+        }
+      }
+      false
+    }
+
+Other than the complication around traversing failure transitions, this code
+is still roughly "traverse the automaton with bytes from the haystack, and quit
+when a match is seen."
+
+And that concludes our section on the basics. While we didn't go deep into how
+the automaton is built (see `src/nfa/noncontiguous.rs`, which has detailed
+comments about that), the basic structure of Aho-Corasick should be reasonably
+clear.
+
+
+# NFAs and DFAs
+
+There are generally two types of finite automata: non-deterministic finite
+automata (NFA) and deterministic finite automata (DFA). The difference between
+them is, principally, that an NFA can be in multiple states at once. This is
+typically accomplished by things called _epsilon_ transitions, where one could
+move to a new state without consuming any bytes from the input. (The other
+mechanism by which NFAs can be in more than one state is where the same byte in
+a particular state transitions to multiple distinct states.) In contrast, a DFA
+can only ever be in one state at a time. A DFA has no epsilon transitions, and
+for any given state, a byte transitions to at most one other state.
+
+By this formulation, the Aho-Corasick automaton described in the previous
+section is an NFA. This is because failure transitions are, effectively,
+epsilon transitions. That is, whenever the automaton is in state `S`, it is
+actually in the set of states that are reachable by recursively following
+failure transitions from `S` until you reach the start state. (This means
+that, for example, the start state is always active since the start state is
+reachable via failure transitions from any state in the automaton.)
+
+NFAs have a lot of nice properties. They tend to be easier to construct, and
+also tend to use less memory. However, their primary downside is that they are
+typically slower to execute a search with. For example, the code above showing
+how to search with an Aho-Corasick automaton needs to potentially iterate
+through many failure transitions for every byte of input. While this is a
+fairly small amount of overhead, this can add up, especially if the automaton
+has a lot of overlapping patterns with a lot of failure transitions.
+
+A DFA's search code, by contrast, looks like this:
+
+    fn contains(dfa: &DFA, haystack: &[u8]) -> bool {
+      let mut state_id = dfa.start();
+      // If the empty pattern is in dfa, then state_id is a match state.
+      if dfa.is_match(state_id) {
+        return true;
+      }
+      for (i, &b) in haystack.iter().enumerate() {
+        // An Aho-Corasick DFA *never* has a missing state that requires
+        // failure transitions to be followed. One byte of input advances the
+        // automaton by one state. Always.
+        state_id = dfa.next_state(state_id, b);
+        if dfa.is_match(state_id) {
+          return true;
+        }
+      }
+      false
+    }
+
+The search logic here is much simpler than for the NFA, and this tends to
+translate into significant performance benefits as well, since there's a lot
+less work being done for each byte in the haystack. How is this accomplished?
+It's done by pre-following all failure transitions for all states for all bytes
+in the alphabet, and then building a single state transition table. Building
+this DFA can be much more costly than building the NFA, and use much more
+memory, but the better performance can be worth it.
+
+Users of this crate can actually choose between using one of two possible NFAs
+(noncontiguous or contiguous) or a DFA. By default, a contiguous NFA is used,
+in most circumstances, but if the number of patterns is small enough a DFA will
+be used. A contiguous NFA is chosen because it uses orders of magnitude less
+memory than a DFA, takes only a little longer to build than a noncontiguous
+NFA and usually gets pretty close to the search speed of a DFA. (Callers can
+override this automatic selection via the `AhoCorasickBuilder::start_kind`
+configuration.)
+
+
+# More DFA tricks
+
+As described in the previous section, one of the downsides of using a DFA
+is that it uses more memory and can take longer to build. One small way of
+mitigating these concerns is to map the alphabet used by the automaton into
+a smaller space. Typically, the alphabet of a DFA has 256 elements in it:
+one element for each possible value that fits into a byte. However, in many
+cases, one does not need the full alphabet. For example, if all patterns in an
+Aho-Corasick automaton are ASCII letters, then this only uses up 52 distinct
+bytes. As far as the automaton is concerned, the rest of the 204 bytes are
+indistinguishable from one another: they will never disrciminate between a
+match or a non-match. Therefore, in cases like that, the alphabet can be shrunk
+to just 53 elements. One for each ASCII letter, and then another to serve as a
+placeholder for every other unused byte.
+
+In practice, this library doesn't quite compute the optimal set of equivalence
+classes, but it's close enough in most cases. The key idea is that this then
+allows the transition table for the DFA to be potentially much smaller. The
+downside of doing this, however, is that since the transition table is defined
+in terms of this smaller alphabet space, every byte in the haystack must be
+re-mapped to this smaller space. This requires an additional 256-byte table.
+In practice, this can lead to a small search time hit, but it can be difficult
+to measure. Moreover, it can sometimes lead to faster search times for bigger
+automata, since it could be difference between more parts of the automaton
+staying in the CPU cache or not.
+
+One other trick for DFAs employed by this crate is the notion of premultiplying
+state identifiers. Specifically, the normal way to compute the next transition
+in a DFA is via the following (assuming that the transition table is laid out
+sequentially in memory, in row-major order, where the rows are states):
+
+    next_state_id = dfa.transitions[current_state_id * 256 + current_byte]
+
+However, since the value `256` is a fixed constant, we can actually premultiply
+the state identifiers in the table when we build the table initially. Then, the
+next transition computation simply becomes:
+
+    next_state_id = dfa.transitions[current_state_id + current_byte]
+
+This doesn't seem like much, but when this is being executed for every byte of
+input that you're searching, saving that extra multiplication instruction can
+add up.
+
+The same optimization works even when equivalence classes are enabled, as
+described above. The only difference is that the premultiplication is by the
+total number of equivalence classes instead of 256.
+
+There isn't much downside to premultiplying state identifiers, other than it
+imposes a smaller limit on the total number of states in the DFA. Namely, with
+premultiplied state identifiers, you run out of room in your state identifier
+representation more rapidly than if the identifiers are just state indices.
+
+Both equivalence classes and premultiplication are always enabled. There is a
+`AhoCorasickBuilder::byte_classes` configuration, but disabling this just makes
+it so there are always 256 equivalence classes, i.e., every class corresponds
+to precisely one byte. When it's disabled, the equivalence class map itself is
+still used. The purpose of disabling it is when one is debugging the underlying
+automaton. It can be easier to comprehend when it uses actual byte values for
+its transitions instead of equivalence classes.
+
+
+# Match semantics
+
+One of the more interesting things about this implementation of Aho-Corasick
+that (as far as this author knows) separates it from other implementations, is
+that it natively supports leftmost-first and leftmost-longest match semantics.
+Briefly, match semantics refer to the decision procedure by which searching
+will disambiguate matches when there are multiple to choose from:
+
+* **standard** match semantics emits matches as soon as they are detected by
+  the automaton. This is typically equivalent to the textbook non-overlapping
+  formulation of Aho-Corasick.
+* **leftmost-first** match semantics means that 1) the next match is the match
+  starting at the leftmost position and 2) among multiple matches starting at
+  the same leftmost position, the match corresponding to the pattern provided
+  first by the caller is reported.
+* **leftmost-longest** is like leftmost-first, except when there are multiple
+  matches starting at the same leftmost position, the pattern corresponding to
+  the longest match is returned.
+
+(The crate API documentation discusses these differences, with examples, in
+more depth on the `MatchKind` type.)
+
+The reason why supporting these match semantics is important is because it
+gives the user more control over the match procedure. For example,
+leftmost-first permits users to implement match priority by simply putting the
+higher priority patterns first. Leftmost-longest, on the other hand, permits
+finding the longest possible match, which might be useful when trying to find
+words matching a dictionary. Additionally, regex engines often want to use
+Aho-Corasick as an optimization when searching for an alternation of literals.
+In order to preserve correct match semantics, regex engines typically can't use
+the standard textbook definition directly, since regex engines will implement
+either leftmost-first (Perl-like) or leftmost-longest (POSIX) match semantics.
+
+Supporting leftmost semantics requires a couple key changes:
+
+* Constructing the Aho-Corasick automaton changes a bit in both how the trie is
+  constructed and how failure transitions are found. Namely, only a subset
+  of the failure transitions are added. Specifically, only the failure
+  transitions that either do not occur after a match or do occur after a match
+  but preserve that match are kept. (More details on this can be found in
+  `src/nfa/noncontiguous.rs`.)
+* The search algorithm changes slightly. Since we are looking for the leftmost
+  match, we cannot quit as soon as a match is detected. Instead, after a match
+  is detected, we must keep searching until either the end of the input or
+  until a dead state is seen. (Dead states are not used for standard match
+  semantics. Dead states mean that searching should stop after a match has been
+  found.)
+
+Most other implementations of Aho-Corasick do support leftmost match semantics,
+but they do it with more overhead at search time, or even worse, with a queue
+of matches and sophisticated hijinks to disambiguate the matches. While our
+construction algorithm becomes a bit more complicated, the correct match
+semantics fall out from the structure of the automaton itself.
+
+
+# Overlapping matches
+
+One of the nice properties of an Aho-Corasick automaton is that it can report
+all possible matches, even when they overlap with one another. In this mode,
+the match semantics don't matter, since all possible matches are reported.
+Overlapping searches work just like regular searches, except the state
+identifier at which the previous search left off is carried over to the next
+search, so that it can pick up where it left off. If there are additional
+matches at that state, then they are reported before resuming the search.
+
+Enabling leftmost-first or leftmost-longest match semantics causes the
+automaton to use a subset of all failure transitions, which means that
+overlapping searches cannot be used. Therefore, if leftmost match semantics are
+used, attempting to do an overlapping search will return an error (or panic
+when using the infallible APIs). Thus, to get overlapping searches, the caller
+must use the default standard match semantics. This behavior was chosen because
+there are only two alternatives, which were deemed worse:
+
+* Compile two automatons internally, one for standard semantics and one for
+  the semantics requested by the caller (if not standard).
+* Create a new type, distinct from the `AhoCorasick` type, which has different
+  capabilities based on the configuration options.
+
+The first is untenable because of the amount of memory used by the automaton.
+The second increases the complexity of the API too much by adding too many
+types that do similar things. It is conceptually much simpler to keep all
+searching isolated to a single type.
+
+
+# Stream searching
+
+Since Aho-Corasick is an automaton, it is possible to do partial searches on
+partial parts of the haystack, and then resume that search on subsequent pieces
+of the haystack. This is useful when the haystack you're trying to search is
+not stored contiguously in memory, or if one does not want to read the entire
+haystack into memory at once.
+
+Currently, only standard semantics are supported for stream searching. This is
+some of the more complicated code in this crate, and is something I would very
+much like to improve. In particular, it currently has the restriction that it
+must buffer at least enough of the haystack in memory in order to fit the
+longest possible match. The difficulty in getting stream searching right is
+that the implementation choices (such as the buffer size) often impact what the
+API looks like and what it's allowed to do.
+
+
+# Prefilters
+
+In some cases, Aho-Corasick is not the fastest way to find matches containing
+multiple patterns. Sometimes, the search can be accelerated using highly
+optimized SIMD routines. For example, consider searching the following
+patterns:
+
+    Sherlock
+    Moriarty
+    Watson
+
+It is plausible that it would be much faster to quickly look for occurrences of
+the leading bytes, `S`, `M` or `W`, before trying to start searching via the
+automaton. Indeed, this is exactly what this crate will do.
+
+When there are more than three distinct starting bytes, then this crate will
+look for three distinct bytes occurring at any position in the patterns, while
+preferring bytes that are heuristically determined to be rare over others. For
+example:
+
+    Abuzz
+    Sanchez
+    Vasquez
+    Topaz
+    Waltz
+
+Here, we have more than 3 distinct starting bytes, but all of the patterns
+contain `z`, which is typically a rare byte. In this case, the prefilter will
+scan for `z`, back up a bit, and then execute the Aho-Corasick automaton.
+
+If all of that fails, then a packed multiple substring algorithm will be
+attempted. Currently, the only algorithm available for this is Teddy, but more
+may be added in the future. Teddy is unlike the above prefilters in that it
+confirms its own matches, so when Teddy is active, it might not be necessary
+for Aho-Corasick to run at all. However, the current Teddy implementation
+only works in `x86_64` when SSSE3 or AVX2 are available or in `aarch64`
+(using NEON), and moreover, only works _well_ when there are a small number
+of patterns (say, less than 100). Teddy also requires the haystack to be of a
+certain length (more than 16-34 bytes). When the haystack is shorter than that,
+Rabin-Karp is used instead. (See `src/packed/rabinkarp.rs`.)
+
+There is a more thorough description of Teddy at
+[`src/packed/teddy/README.md`](src/packed/teddy/README.md).
diff --git a/third_party/rust/aho-corasick/LICENSE-MIT b/third_party/rust/aho-corasick/LICENSE-MIT
new file mode 100644
index 0000000000..3b0a5dc09c
--- /dev/null
+++ b/third_party/rust/aho-corasick/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/third_party/rust/aho-corasick/README.md b/third_party/rust/aho-corasick/README.md
new file mode 100644
index 0000000000..c0f525fdc6
--- /dev/null
+++ b/third_party/rust/aho-corasick/README.md
@@ -0,0 +1,174 @@
+aho-corasick
+============
+A library for finding occurrences of many patterns at once with SIMD
+acceleration in some cases. This library provides multiple pattern
+search principally through an implementation of the
+[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
+which builds a finite state machine for executing searches in linear time.
+Features include case insensitive matching, overlapping matches, fast searching
+via SIMD and optional full DFA construction and search & replace in streams.
+
+[![Build status](https://github.com/BurntSushi/aho-corasick/workflows/ci/badge.svg)](https://github.com/BurntSushi/aho-corasick/actions)
+[![crates.io](https://img.shields.io/crates/v/aho-corasick.svg)](https://crates.io/crates/aho-corasick)
+
+Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org/).
+
+
+### Documentation
+
+https://docs.rs/aho-corasick
+
+
+### Usage
+
+Run `cargo add aho-corasick` to automatically add this crate as a dependency
+in your `Cargo.toml` file.
+
+
+### Example: basic searching
+
+This example shows how to search for occurrences of multiple patterns
+simultaneously. Each match includes the pattern that matched along with the
+byte offsets of the match.
+
+```rust
+use aho_corasick::{AhoCorasick, PatternID};
+
+let patterns = &["apple", "maple", "Snapple"];
+let haystack = "Nobody likes maple in their apple flavored Snapple.";
+
+let ac = AhoCorasick::new(patterns).unwrap();
+let mut matches = vec![];
+for mat in ac.find_iter(haystack) {
+    matches.push((mat.pattern(), mat.start(), mat.end()));
+}
+assert_eq!(matches, vec![
+    (PatternID::must(1), 13, 18),
+    (PatternID::must(0), 28, 33),
+    (PatternID::must(2), 43, 50),
+]);
+```
+
+
+### Example: ASCII case insensitivity
+
+This is like the previous example, but matches `Snapple` case insensitively
+using `AhoCorasickBuilder`:
+
+```rust
+use aho_corasick::{AhoCorasick, PatternID};
+
+let patterns = &["apple", "maple", "snapple"];
+let haystack = "Nobody likes maple in their apple flavored Snapple.";
+
+let ac = AhoCorasick::builder()
+    .ascii_case_insensitive(true)
+    .build(patterns)
+    .unwrap();
+let mut matches = vec![];
+for mat in ac.find_iter(haystack) {
+    matches.push((mat.pattern(), mat.start(), mat.end()));
+}
+assert_eq!(matches, vec![
+    (PatternID::must(1), 13, 18),
+    (PatternID::must(0), 28, 33),
+    (PatternID::must(2), 43, 50),
+]);
+```
+
+
+### Example: replacing matches in a stream
+
+This example shows how to execute a search and replace on a stream without
+loading the entire stream into memory first.
+
+```rust,ignore
+use aho_corasick::AhoCorasick;
+
+let patterns = &["fox", "brown", "quick"];
+let replace_with = &["sloth", "grey", "slow"];
+
+// In a real example, these might be `std::fs::File`s instead. All you need to
+// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
+let rdr = "The quick brown fox.";
+let mut wtr = vec![];
+
+let ac = AhoCorasick::new(patterns).unwrap();
+ac.stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)
+    .expect("stream_replace_all failed");
+assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
+```
+
+
+### Example: finding the leftmost first match
+
+In the textbook description of Aho-Corasick, its formulation is typically
+structured such that it reports all possible matches, even when they overlap
+with another. In many cases, overlapping matches may not be desired, such as
+the case of finding all successive non-overlapping matches like you might with
+a standard regular expression.
+
+Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
+this doesn't always work in the expected way, since it will report matches as
+soon as they are seen. For example, consider matching the regex `Samwise|Sam`
+against the text `Samwise`. Most regex engines (that are Perl-like, or
+non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
+algorithm modified for reporting non-overlapping matches will report `Sam`.
+
+A novel contribution of this library is the ability to change the match
+semantics of Aho-Corasick (without additional search time overhead) such that
+`Samwise` is reported instead. For example, here's the standard approach:
+
+```rust
+use aho_corasick::AhoCorasick;
+
+let patterns = &["Samwise", "Sam"];
+let haystack = "Samwise";
+
+let ac = AhoCorasick::new(patterns).unwrap();
+let mat = ac.find(haystack).expect("should have a match");
+assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
+```
+
+And now here's the leftmost-first version, which matches how a Perl-like
+regex will work:
+
+```rust
+use aho_corasick::{AhoCorasick, MatchKind};
+
+let patterns = &["Samwise", "Sam"];
+let haystack = "Samwise";
+
+let ac = AhoCorasick::builder()
+    .match_kind(MatchKind::LeftmostFirst)
+    .build(patterns)
+    .unwrap();
+let mat = ac.find(haystack).expect("should have a match");
+assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
+```
+
+In addition to leftmost-first semantics, this library also supports
+leftmost-longest semantics, which match the POSIX behavior of a regular
+expression alternation. See `MatchKind` in the docs for more details.
+
+
+### Minimum Rust version policy
+
+This crate's minimum supported `rustc` version is `1.60.0`.
+
+The current policy is that the minimum Rust version required to use this crate
+can be increased in minor version updates. For example, if `crate 1.0` requires
+Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust
+1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum
+version of Rust.
+
+In general, this crate will be conservative with respect to the minimum
+supported version of Rust.
+
+
+### FFI bindings
+
+* [G-Research/ahocorasick_rs](https://github.com/G-Research/ahocorasick_rs/)
+is a Python wrapper for this library.
+* [tmikus/ahocorasick_rs](https://github.com/tmikus/ahocorasick_rs) is a Go
+    wrapper for this library.
diff --git a/third_party/rust/aho-corasick/UNLICENSE b/third_party/rust/aho-corasick/UNLICENSE
new file mode 100644
index 0000000000..68a49daad8
--- /dev/null
+++ b/third_party/rust/aho-corasick/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/third_party/rust/aho-corasick/rustfmt.toml b/third_party/rust/aho-corasick/rustfmt.toml
new file mode 100644
index 0000000000..aa37a218b9
--- /dev/null
+++ b/third_party/rust/aho-corasick/rustfmt.toml
@@ -0,0 +1,2 @@
+max_width = 79
+use_small_heuristics = "max"
diff --git a/third_party/rust/aho-corasick/src/ahocorasick.rs b/third_party/rust/aho-corasick/src/ahocorasick.rs
new file mode 100644
index 0000000000..2947627704
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/ahocorasick.rs
@@ -0,0 +1,2789 @@
+use core::{
+    fmt::Debug,
+    panic::{RefUnwindSafe, UnwindSafe},
+};
+
+use alloc::{string::String, sync::Arc, vec::Vec};
+
+use crate::{
+    automaton::{self, Automaton, OverlappingState},
+    dfa,
+    nfa::{contiguous, noncontiguous},
+    util::{
+        error::{BuildError, MatchError},
+        prefilter::Prefilter,
+        primitives::{PatternID, StateID},
+        search::{Anchored, Input, Match, MatchKind, StartKind},
+    },
+};
+
+/// An automaton for searching multiple strings in linear time.
+///
+/// The `AhoCorasick` type supports a few basic ways of constructing an
+/// automaton, with the default being [`AhoCorasick::new`]. However, there
+/// are a fair number of configurable options that can be set by using
+/// [`AhoCorasickBuilder`] instead. Such options include, but are not limited
+/// to, how matches are determined, simple case insensitivity, whether to use a
+/// DFA or not and various knobs for controlling the space-vs-time trade offs
+/// taken when building the automaton.
+///
+/// # Resource usage
+///
+/// Aho-Corasick automatons are always constructed in `O(p)` time, where
+/// `p` is the combined length of all patterns being searched. With that
+/// said, building an automaton can be fairly costly because of high constant
+/// factors, particularly when enabling the [DFA](AhoCorasickKind::DFA) option
+/// with [`AhoCorasickBuilder::kind`]. For this reason, it's generally a good
+/// idea to build an automaton once and reuse it as much as possible.
+///
+/// Aho-Corasick automatons can also use a fair bit of memory. To get
+/// a concrete idea of how much memory is being used, try using the
+/// [`AhoCorasick::memory_usage`] method.
+///
+/// To give a quick idea of the differences between Aho-Corasick
+/// implementations and their resource usage, here's a sample of construction
+/// times and heap memory used after building an automaton from 100,000
+/// randomly selected titles from Wikipedia:
+///
+/// * 99MB for a [`noncontiguous::NFA`] in 240ms.
+/// * 21MB for a [`contiguous::NFA`] in 275ms.
+/// * 1.6GB for a [`dfa::DFA`] in 1.88s.
+///
+/// (Note that the memory usage above reflects the size of each automaton and
+/// not peak memory usage. For example, building a contiguous NFA requires
+/// first building a noncontiguous NFA. Once the contiguous NFA is built, the
+/// noncontiguous NFA is freed.)
+///
+/// This experiment very strongly argues that a contiguous NFA is often the
+/// best balance in terms of resource usage. It takes a little longer to build,
+/// but its memory usage is quite small. Its search speed (not listed) is
+/// also often faster than a noncontiguous NFA, but a little slower than a
+/// DFA. Indeed, when no specific [`AhoCorasickKind`] is used (which is the
+/// default), a contiguous NFA is used in most cases.
+///
+/// The only "catch" to using a contiguous NFA is that, because of its variety
+/// of compression tricks, it may not be able to support automatons as large as
+/// what the noncontiguous NFA supports. In which case, building a contiguous
+/// NFA will fail and (by default) `AhoCorasick` will automatically fall
+/// back to a noncontiguous NFA. (This typically only happens when building
+/// automatons from millions of patterns.) Otherwise, the small additional time
+/// for building a contiguous NFA is almost certainly worth it.
+///
+/// # Cloning
+///
+/// The `AhoCorasick` type uses thread safe reference counting internally. It
+/// is guaranteed that it is cheap to clone.
+///
+/// # Search configuration
+///
+/// Most of the search routines accept anything that can be cheaply converted
+/// to an [`Input`]. This includes `&[u8]`, `&str` and `Input` itself.
+///
+/// # Construction failure
+///
+/// It is generally possible for building an Aho-Corasick automaton to fail.
+/// Construction can fail in generally one way: when the inputs provided are
+/// too big. Whether that's a pattern that is too long, too many patterns
+/// or some combination of both. A first approximation for the scale at which
+/// construction can fail is somewhere around "millions of patterns."
+///
+/// For that reason, if you're building an Aho-Corasick automaton from
+/// untrusted input (or input that doesn't have any reasonable bounds on its
+/// size), then it is strongly recommended to handle the possibility of an
+/// error.
+///
+/// If you're constructing an Aho-Corasick automaton from static or trusted
+/// data, then it is likely acceptable to panic (by calling `unwrap()` or
+/// `expect()`) if construction fails.
+///
+/// # Fallibility
+///
+/// The `AhoCorasick` type provides a number of methods for searching, as one
+/// might expect. Depending on how the Aho-Corasick automaton was built and
+/// depending on the search configuration, it is possible for a search to
+/// return an error. Since an error is _never_ dependent on the actual contents
+/// of the haystack, this type provides both infallible and fallible methods
+/// for searching. The infallible methods panic if an error occurs, and can be
+/// used for convenience and when you know the search will never return an
+/// error.
+///
+/// For example, the [`AhoCorasick::find_iter`] method is the infallible
+/// version of the [`AhoCorasick::try_find_iter`] method.
+///
+/// Examples of errors that can occur:
+///
+/// * Running a search that requires [`MatchKind::Standard`] semantics (such
+/// as a stream or overlapping search) with an automaton that was built with
+/// [`MatchKind::LeftmostFirst`] or [`MatchKind::LeftmostLongest`] semantics.
+/// * Running an anchored search with an automaton that only supports
+/// unanchored searches. (By default, `AhoCorasick` only supports unanchored
+/// searches. But this can be toggled with [`AhoCorasickBuilder::start_kind`].)
+/// * Running an unanchored search with an automaton that only supports
+/// anchored searches.
+///
+/// The common thread between the different types of errors is that they are
+/// all rooted in the automaton construction and search configurations. If
+/// those configurations are a static property of your program, then it is
+/// reasonable to call infallible routines since you know an error will never
+/// occur. And if one _does_ occur, then it's a bug in your program.
+///
+/// To re-iterate, if the patterns, build or search configuration come from
+/// user or untrusted data, then you should handle errors at build or search
+/// time. If only the haystack comes from user or untrusted data, then there
+/// should be no need to handle errors anywhere and it is generally encouraged
+/// to `unwrap()` (or `expect()`) both build and search time calls.
+///
+/// # Examples
+///
+/// This example shows how to search for occurrences of multiple patterns
+/// simultaneously in a case insensitive fashion. Each match includes the
+/// pattern that matched along with the byte offsets of the match.
+///
+/// ```
+/// use aho_corasick::{AhoCorasick, PatternID};
+///
+/// let patterns = &["apple", "maple", "snapple"];
+/// let haystack = "Nobody likes maple in their apple flavored Snapple.";
+///
+/// let ac = AhoCorasick::builder()
+///     .ascii_case_insensitive(true)
+///     .build(patterns)
+///     .unwrap();
+/// let mut matches = vec![];
+/// for mat in ac.find_iter(haystack) {
+///     matches.push((mat.pattern(), mat.start(), mat.end()));
+/// }
+/// assert_eq!(matches, vec![
+///     (PatternID::must(1), 13, 18),
+///     (PatternID::must(0), 28, 33),
+///     (PatternID::must(2), 43, 50),
+/// ]);
+/// ```
+///
+/// This example shows how to replace matches with some other string:
+///
+/// ```
+/// use aho_corasick::AhoCorasick;
+///
+/// let patterns = &["fox", "brown", "quick"];
+/// let haystack = "The quick brown fox.";
+/// let replace_with = &["sloth", "grey", "slow"];
+///
+/// let ac = AhoCorasick::new(patterns).unwrap();
+/// let result = ac.replace_all(haystack, replace_with);
+/// assert_eq!(result, "The slow grey sloth.");
+/// ```
+#[derive(Clone)]
+pub struct AhoCorasick {
+    /// The underlying Aho-Corasick automaton. It's one of
+    /// nfa::noncontiguous::NFA, nfa::contiguous::NFA or dfa::DFA.
+    aut: Arc<dyn AcAutomaton>,
+    /// The specific Aho-Corasick kind chosen. This makes it possible to
+    /// inspect any `AhoCorasick` and know what kind of search strategy it
+    /// uses.
+    kind: AhoCorasickKind,
+    /// The start kind of this automaton as configured by the caller.
+    ///
+    /// We don't really *need* to put this here, since the underlying automaton
+    /// will correctly return errors if the caller requests an unsupported
+    /// search type. But we do keep this here for API behavior consistency.
+    /// Namely, the NFAs in this crate support both unanchored and anchored
+    /// searches unconditionally. There's no way to disable one or the other.
+    /// They always both work. But the DFA in this crate specifically only
+    /// supports both unanchored and anchored searches if it's configured to
+    /// do so. Why? Because for the DFA, supporting both essentially requires
+    /// two copies of the transition table: one generated by following failure
+    /// transitions from the original NFA and one generated by not following
+    /// those failure transitions.
+    ///
+    /// So why record the start kind here? Well, consider what happens
+    /// when no specific 'AhoCorasickKind' is selected by the caller and
+    /// 'StartKind::Unanchored' is used (both are the default). It *might*
+    /// result in using a DFA or it might pick an NFA. If it picks an NFA, the
+    /// caller would then be able to run anchored searches, even though the
+    /// caller only asked for support for unanchored searches. Maybe that's
+    /// fine, but what if the DFA was chosen instead? Oops, the caller would
+    /// get an error.
+    ///
+    /// Basically, it seems bad to return an error or not based on some
+    /// internal implementation choice. So we smooth things out and ensure
+    /// anchored searches *always* report an error when only unanchored support
+    /// was asked for (and vice versa), even if the underlying automaton
+    /// supports it.
+    start_kind: StartKind,
+}
+
+/// Convenience constructors for an Aho-Corasick searcher. To configure the
+/// searcher, use an [`AhoCorasickBuilder`] instead.
+impl AhoCorasick {
+    /// Create a new Aho-Corasick automaton using the default configuration.
+    ///
+    /// The default configuration optimizes for less space usage, but at the
+    /// expense of longer search times. To change the configuration, use
+    /// [`AhoCorasickBuilder`].
+    ///
+    /// This uses the default [`MatchKind::Standard`] match semantics, which
+    /// reports a match as soon as it is found. This corresponds to the
+    /// standard match semantics supported by textbook descriptions of the
+    /// Aho-Corasick algorithm.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, PatternID};
+    ///
+    /// let ac = AhoCorasick::new(&["foo", "bar", "baz"]).unwrap();
+    /// assert_eq!(
+    ///     Some(PatternID::must(1)),
+    ///     ac.find("xxx bar xxx").map(|m| m.pattern()),
+    /// );
+    /// ```
+    pub fn new<I, P>(patterns: I) -> Result<AhoCorasick, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        AhoCorasickBuilder::new().build(patterns)
+    }
+
+    /// A convenience method for returning a new Aho-Corasick builder.
+    ///
+    /// This usually permits one to just import the `AhoCorasick` type.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Match, MatchKind};
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(&["samwise", "sam"])
+    ///     .unwrap();
+    /// assert_eq!(Some(Match::must(0, 0..7)), ac.find("samwise"));
+    /// ```
+    pub fn builder() -> AhoCorasickBuilder {
+        AhoCorasickBuilder::new()
+    }
+}
+
+/// Infallible search routines. These APIs panic when the underlying search
+/// would otherwise fail. Infallible routines are useful because the errors are
+/// a result of both search-time configuration and what configuration is used
+/// to build the Aho-Corasick searcher. Both of these things are not usually
+/// the result of user input, and thus, an error is typically indicative of a
+/// programmer error. In cases where callers want errors instead of panics, use
+/// the corresponding `try` method in the section below.
+impl AhoCorasick {
+    /// Returns true if and only if this automaton matches the haystack at any
+    /// position.
+    ///
+    /// `input` may be any type that is cheaply convertible to an `Input`. This
+    /// includes, but is not limited to, `&str` and `&[u8]`.
+    ///
+    /// Aside from convenience, when `AhoCorasick` was built with
+    /// leftmost-first or leftmost-longest semantics, this might result in a
+    /// search that visits less of the haystack than [`AhoCorasick::find`]
+    /// would otherwise. (For standard semantics, matches are always
+    /// immediately returned once they are seen, so there is no way for this to
+    /// do less work in that case.)
+    ///
+    /// Note that there is no corresponding fallible routine for this method.
+    /// If you need a fallible version of this, then [`AhoCorasick::try_find`]
+    /// can be used with [`Input::earliest`] enabled.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::AhoCorasick;
+    ///
+    /// let ac = AhoCorasick::new(&[
+    ///     "foo", "bar", "quux", "baz",
+    /// ]).unwrap();
+    /// assert!(ac.is_match("xxx bar xxx"));
+    /// assert!(!ac.is_match("xxx qux xxx"));
+    /// ```
+    pub fn is_match<'h, I: Into<Input<'h>>>(&self, input: I) -> bool {
+        self.aut
+            .try_find(&input.into().earliest(true))
+            .expect("AhoCorasick::try_find is not expected to fail")
+            .is_some()
+    }
+
+    /// Returns the location of the first match according to the match
+    /// semantics that this automaton was constructed with.
+    ///
+    /// `input` may be any type that is cheaply convertible to an `Input`. This
+    /// includes, but is not limited to, `&str` and `&[u8]`.
+    ///
+    /// This is the infallible version of [`AhoCorasick::try_find`].
+    ///
+    /// # Panics
+    ///
+    /// This panics when [`AhoCorasick::try_find`] would return an error.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage, with standard semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::Standard) // default, not necessary
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mat = ac.find(haystack).expect("should have a match");
+    /// assert_eq!("b", &haystack[mat.start()..mat.end()]);
+    /// ```
+    ///
+    /// Now with leftmost-first semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mat = ac.find(haystack).expect("should have a match");
+    /// assert_eq!("abc", &haystack[mat.start()..mat.end()]);
+    /// ```
+    ///
+    /// And finally, leftmost-longest semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostLongest)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mat = ac.find(haystack).expect("should have a match");
+    /// ```
+    ///
+    /// # Example: configuring a search
+    ///
+    /// Because this method accepts anything that can be turned into an
+    /// [`Input`], it's possible to provide an `Input` directly in order to
+    /// configure the search. In this example, we show how to use the
+    /// `earliest` option to force the search to return as soon as it knows
+    /// a match has occurred.
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Input, MatchKind};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostLongest)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mat = ac.find(Input::new(haystack).earliest(true))
+    ///     .expect("should have a match");
+    /// // The correct leftmost-longest match here is 'abcd', but since we
+    /// // told the search to quit as soon as it knows a match has occurred,
+    /// // we get a different match back.
+    /// assert_eq!("b", &haystack[mat.start()..mat.end()]);
+    /// ```
+    pub fn find<'h, I: Into<Input<'h>>>(&self, input: I) -> Option<Match> {
+        self.try_find(input)
+            .expect("AhoCorasick::try_find is not expected to fail")
+    }
+
+    /// Returns the location of the first overlapping match in the given
+    /// input with respect to the current state of the underlying searcher.
+    ///
+    /// `input` may be any type that is cheaply convertible to an `Input`. This
+    /// includes, but is not limited to, `&str` and `&[u8]`.
+    ///
+    /// Overlapping searches do not report matches in their return value.
+    /// Instead, matches can be accessed via [`OverlappingState::get_match`]
+    /// after a search call.
+    ///
+    /// This is the infallible version of
+    /// [`AhoCorasick::try_find_overlapping`].
+    ///
+    /// # Panics
+    ///
+    /// This panics when [`AhoCorasick::try_find_overlapping`] would
+    /// return an error. For example, when the Aho-Corasick searcher
+    /// doesn't support overlapping searches. (Only searchers built with
+    /// [`MatchKind::Standard`] semantics support overlapping searches.)
+    ///
+    /// # Example
+    ///
+    /// This shows how we can repeatedly call an overlapping search without
+    /// ever needing to explicitly re-slice the haystack. Overlapping search
+    /// works this way because searches depend on state saved during the
+    /// previous search.
+    ///
+    /// ```
+    /// use aho_corasick::{
+    ///     automaton::OverlappingState,
+    ///     AhoCorasick, Input, Match,
+    /// };
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::new(patterns).unwrap();
+    /// let mut state = OverlappingState::start();
+    ///
+    /// ac.find_overlapping(haystack, &mut state);
+    /// assert_eq!(Some(Match::must(2, 0..3)), state.get_match());
+    ///
+    /// ac.find_overlapping(haystack, &mut state);
+    /// assert_eq!(Some(Match::must(0, 0..6)), state.get_match());
+    ///
+    /// ac.find_overlapping(haystack, &mut state);
+    /// assert_eq!(Some(Match::must(2, 11..14)), state.get_match());
+    ///
+    /// ac.find_overlapping(haystack, &mut state);
+    /// assert_eq!(Some(Match::must(2, 22..25)), state.get_match());
+    ///
+    /// ac.find_overlapping(haystack, &mut state);
+    /// assert_eq!(Some(Match::must(0, 22..28)), state.get_match());
+    ///
+    /// ac.find_overlapping(haystack, &mut state);
+    /// assert_eq!(Some(Match::must(1, 22..31)), state.get_match());
+    ///
+    /// // No more match matches to be found.
+    /// ac.find_overlapping(haystack, &mut state);
+    /// assert_eq!(None, state.get_match());
+    /// ```
+    pub fn find_overlapping<'h, I: Into<Input<'h>>>(
+        &self,
+        input: I,
+        state: &mut OverlappingState,
+    ) {
+        self.try_find_overlapping(input, state).expect(
+            "AhoCorasick::try_find_overlapping is not expected to fail",
+        )
+    }
+
+    /// Returns an iterator of non-overlapping matches, using the match
+    /// semantics that this automaton was constructed with.
+    ///
+    /// `input` may be any type that is cheaply convertible to an `Input`. This
+    /// includes, but is not limited to, `&str` and `&[u8]`.
+    ///
+    /// This is the infallible version of [`AhoCorasick::try_find_iter`].
+    ///
+    /// # Panics
+    ///
+    /// This panics when [`AhoCorasick::try_find_iter`] would return an error.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage, with standard semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind, PatternID};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::Standard) // default, not necessary
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let matches: Vec<PatternID> = ac
+    ///     .find_iter(haystack)
+    ///     .map(|mat| mat.pattern())
+    ///     .collect();
+    /// assert_eq!(vec![
+    ///     PatternID::must(2),
+    ///     PatternID::must(2),
+    ///     PatternID::must(2),
+    /// ], matches);
+    /// ```
+    ///
+    /// Now with leftmost-first semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind, PatternID};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let matches: Vec<PatternID> = ac
+    ///     .find_iter(haystack)
+    ///     .map(|mat| mat.pattern())
+    ///     .collect();
+    /// assert_eq!(vec![
+    ///     PatternID::must(0),
+    ///     PatternID::must(2),
+    ///     PatternID::must(0),
+    /// ], matches);
+    /// ```
+    ///
+    /// And finally, leftmost-longest semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind, PatternID};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostLongest)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let matches: Vec<PatternID> = ac
+    ///     .find_iter(haystack)
+    ///     .map(|mat| mat.pattern())
+    ///     .collect();
+    /// assert_eq!(vec![
+    ///     PatternID::must(0),
+    ///     PatternID::must(2),
+    ///     PatternID::must(1),
+    /// ], matches);
+    /// ```
+    pub fn find_iter<'a, 'h, I: Into<Input<'h>>>(
+        &'a self,
+        input: I,
+    ) -> FindIter<'a, 'h> {
+        self.try_find_iter(input)
+            .expect("AhoCorasick::try_find_iter is not expected to fail")
+    }
+
+    /// Returns an iterator of overlapping matches. Stated differently, this
+    /// returns an iterator of all possible matches at every position.
+    ///
+    /// `input` may be any type that is cheaply convertible to an `Input`. This
+    /// includes, but is not limited to, `&str` and `&[u8]`.
+    ///
+    /// This is the infallible version of
+    /// [`AhoCorasick::try_find_overlapping_iter`].
+    ///
+    /// # Panics
+    ///
+    /// This panics when `AhoCorasick::try_find_overlapping_iter` would return
+    /// an error. For example, when the Aho-Corasick searcher is built with
+    /// either leftmost-first or leftmost-longest match semantics. Stated
+    /// differently, overlapping searches require one to build the searcher
+    /// with [`MatchKind::Standard`] (it is the default).
+    ///
+    /// # Example: basic usage
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, PatternID};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::new(patterns).unwrap();
+    /// let matches: Vec<PatternID> = ac
+    ///     .find_overlapping_iter(haystack)
+    ///     .map(|mat| mat.pattern())
+    ///     .collect();
+    /// assert_eq!(vec![
+    ///     PatternID::must(2),
+    ///     PatternID::must(0),
+    ///     PatternID::must(2),
+    ///     PatternID::must(2),
+    ///     PatternID::must(0),
+    ///     PatternID::must(1),
+    /// ], matches);
+    /// ```
+    pub fn find_overlapping_iter<'a, 'h, I: Into<Input<'h>>>(
+        &'a self,
+        input: I,
+    ) -> FindOverlappingIter<'a, 'h> {
+        self.try_find_overlapping_iter(input).expect(
+            "AhoCorasick::try_find_overlapping_iter is not expected to fail",
+        )
+    }
+
+    /// Replace all matches with a corresponding value in the `replace_with`
+    /// slice given. Matches correspond to the same matches as reported by
+    /// [`AhoCorasick::find_iter`].
+    ///
+    /// Replacements are determined by the index of the matching pattern.
+    /// For example, if the pattern with index `2` is found, then it is
+    /// replaced by `replace_with[2]`.
+    ///
+    /// This is the infallible version of [`AhoCorasick::try_replace_all`].
+    ///
+    /// # Panics
+    ///
+    /// This panics when [`AhoCorasick::try_replace_all`] would return an
+    /// error.
+    ///
+    /// This also panics when `replace_with.len()` does not equal
+    /// [`AhoCorasick::patterns_len`].
+    ///
+    /// # Example: basic usage
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let result = ac.replace_all(haystack, &["x", "y", "z"]);
+    /// assert_eq!("x the z to the xage", result);
+    /// ```
+    pub fn replace_all<B>(&self, haystack: &str, replace_with: &[B]) -> String
+    where
+        B: AsRef<str>,
+    {
+        self.try_replace_all(haystack, replace_with)
+            .expect("AhoCorasick::try_replace_all is not expected to fail")
+    }
+
+    /// Replace all matches using raw bytes with a corresponding value in the
+    /// `replace_with` slice given. Matches correspond to the same matches as
+    /// reported by [`AhoCorasick::find_iter`].
+    ///
+    /// Replacements are determined by the index of the matching pattern.
+    /// For example, if the pattern with index `2` is found, then it is
+    /// replaced by `replace_with[2]`.
+    ///
+    /// This is the infallible version of
+    /// [`AhoCorasick::try_replace_all_bytes`].
+    ///
+    /// # Panics
+    ///
+    /// This panics when [`AhoCorasick::try_replace_all_bytes`] would return an
+    /// error.
+    ///
+    /// This also panics when `replace_with.len()` does not equal
+    /// [`AhoCorasick::patterns_len`].
+    ///
+    /// # Example: basic usage
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = b"append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let result = ac.replace_all_bytes(haystack, &["x", "y", "z"]);
+    /// assert_eq!(b"x the z to the xage".to_vec(), result);
+    /// ```
+    pub fn replace_all_bytes<B>(
+        &self,
+        haystack: &[u8],
+        replace_with: &[B],
+    ) -> Vec<u8>
+    where
+        B: AsRef<[u8]>,
+    {
+        self.try_replace_all_bytes(haystack, replace_with)
+            .expect("AhoCorasick::try_replace_all_bytes should not fail")
+    }
+
+    /// Replace all matches using a closure called on each match.
+    /// Matches correspond to the same matches as reported by
+    /// [`AhoCorasick::find_iter`].
+    ///
+    /// The closure accepts three parameters: the match found, the text of
+    /// the match and a string buffer with which to write the replaced text
+    /// (if any). If the closure returns `true`, then it continues to the next
+    /// match. If the closure returns `false`, then searching is stopped.
+    ///
+    /// Note that any matches with boundaries that don't fall on a valid UTF-8
+    /// boundary are silently skipped.
+    ///
+    /// This is the infallible version of
+    /// [`AhoCorasick::try_replace_all_with`].
+    ///
+    /// # Panics
+    ///
+    /// This panics when [`AhoCorasick::try_replace_all_with`] would return an
+    /// error.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mut result = String::new();
+    /// ac.replace_all_with(haystack, &mut result, |mat, _, dst| {
+    ///     dst.push_str(&mat.pattern().as_usize().to_string());
+    ///     true
+    /// });
+    /// assert_eq!("0 the 2 to the 0age", result);
+    /// ```
+    ///
+    /// Stopping the replacement by returning `false` (continued from the
+    /// example above):
+    ///
+    /// ```
+    /// # use aho_corasick::{AhoCorasick, MatchKind, PatternID};
+    /// # let patterns = &["append", "appendage", "app"];
+    /// # let haystack = "append the app to the appendage";
+    /// # let ac = AhoCorasick::builder()
+    /// #    .match_kind(MatchKind::LeftmostFirst)
+    /// #    .build(patterns)
+    /// #    .unwrap();
+    /// let mut result = String::new();
+    /// ac.replace_all_with(haystack, &mut result, |mat, _, dst| {
+    ///     dst.push_str(&mat.pattern().as_usize().to_string());
+    ///     mat.pattern() != PatternID::must(2)
+    /// });
+    /// assert_eq!("0 the 2 to the appendage", result);
+    /// ```
+    pub fn replace_all_with<F>(
+        &self,
+        haystack: &str,
+        dst: &mut String,
+        replace_with: F,
+    ) where
+        F: FnMut(&Match, &str, &mut String) -> bool,
+    {
+        self.try_replace_all_with(haystack, dst, replace_with)
+            .expect("AhoCorasick::try_replace_all_with should not fail")
+    }
+
+    /// Replace all matches using raw bytes with a closure called on each
+    /// match. Matches correspond to the same matches as reported by
+    /// [`AhoCorasick::find_iter`].
+    ///
+    /// The closure accepts three parameters: the match found, the text of
+    /// the match and a byte buffer with which to write the replaced text
+    /// (if any). If the closure returns `true`, then it continues to the next
+    /// match. If the closure returns `false`, then searching is stopped.
+    ///
+    /// This is the infallible version of
+    /// [`AhoCorasick::try_replace_all_with_bytes`].
+    ///
+    /// # Panics
+    ///
+    /// This panics when [`AhoCorasick::try_replace_all_with_bytes`] would
+    /// return an error.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = b"append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mut result = vec![];
+    /// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| {
+    ///     dst.extend(mat.pattern().as_usize().to_string().bytes());
+    ///     true
+    /// });
+    /// assert_eq!(b"0 the 2 to the 0age".to_vec(), result);
+    /// ```
+    ///
+    /// Stopping the replacement by returning `false` (continued from the
+    /// example above):
+    ///
+    /// ```
+    /// # use aho_corasick::{AhoCorasick, MatchKind, PatternID};
+    /// # let patterns = &["append", "appendage", "app"];
+    /// # let haystack = b"append the app to the appendage";
+    /// # let ac = AhoCorasick::builder()
+    /// #    .match_kind(MatchKind::LeftmostFirst)
+    /// #    .build(patterns)
+    /// #    .unwrap();
+    /// let mut result = vec![];
+    /// ac.replace_all_with_bytes(haystack, &mut result, |mat, _, dst| {
+    ///     dst.extend(mat.pattern().as_usize().to_string().bytes());
+    ///     mat.pattern() != PatternID::must(2)
+    /// });
+    /// assert_eq!(b"0 the 2 to the appendage".to_vec(), result);
+    /// ```
+    pub fn replace_all_with_bytes<F>(
+        &self,
+        haystack: &[u8],
+        dst: &mut Vec<u8>,
+        replace_with: F,
+    ) where
+        F: FnMut(&Match, &[u8], &mut Vec<u8>) -> bool,
+    {
+        self.try_replace_all_with_bytes(haystack, dst, replace_with)
+            .expect("AhoCorasick::try_replace_all_with_bytes should not fail")
+    }
+
+    /// Returns an iterator of non-overlapping matches in the given
+    /// stream. Matches correspond to the same matches as reported by
+    /// [`AhoCorasick::find_iter`].
+    ///
+    /// The matches yielded by this iterator use absolute position offsets in
+    /// the stream given, where the first byte has index `0`. Matches are
+    /// yieled until the stream is exhausted.
+    ///
+    /// Each item yielded by the iterator is an `Result<Match,
+    /// std::io::Error>`, where an error is yielded if there was a problem
+    /// reading from the reader given.
+    ///
+    /// When searching a stream, an internal buffer is used. Therefore, callers
+    /// should avoiding providing a buffered reader, if possible.
+    ///
+    /// This is the infallible version of
+    /// [`AhoCorasick::try_stream_find_iter`]. Note that both methods return
+    /// iterators that produce `Result` values. The difference is that this
+    /// routine panics if _construction_ of the iterator failed. The `Result`
+    /// values yield by the iterator come from whether the given reader returns
+    /// an error or not during the search.
+    ///
+    /// # Memory usage
+    ///
+    /// In general, searching streams will use a constant amount of memory for
+    /// its internal buffer. The one requirement is that the internal buffer
+    /// must be at least the size of the longest possible match. In most use
+    /// cases, the default buffer size will be much larger than any individual
+    /// match.
+    ///
+    /// # Panics
+    ///
+    /// This panics when [`AhoCorasick::try_stream_find_iter`] would return
+    /// an error. For example, when the Aho-Corasick searcher doesn't support
+    /// stream searches. (Only searchers built with [`MatchKind::Standard`]
+    /// semantics support stream searches.)
+    ///
+    /// # Example: basic usage
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, PatternID};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::new(patterns).unwrap();
+    /// let mut matches = vec![];
+    /// for result in ac.stream_find_iter(haystack.as_bytes()) {
+    ///     let mat = result?;
+    ///     matches.push(mat.pattern());
+    /// }
+    /// assert_eq!(vec![
+    ///     PatternID::must(2),
+    ///     PatternID::must(2),
+    ///     PatternID::must(2),
+    /// ], matches);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "std")]
+    pub fn stream_find_iter<'a, R: std::io::Read>(
+        &'a self,
+        rdr: R,
+    ) -> StreamFindIter<'a, R> {
+        self.try_stream_find_iter(rdr)
+            .expect("AhoCorasick::try_stream_find_iter should not fail")
+    }
+}
+
+/// Fallible search routines. These APIs return an error in cases where the
+/// infallible routines would panic.
+impl AhoCorasick {
+    /// Returns the location of the first match according to the match
+    /// semantics that this automaton was constructed with, and according
+    /// to the given `Input` configuration.
+    ///
+    /// This is the fallible version of [`AhoCorasick::find`].
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the given `Input` configuration.
+    ///
+    /// For example, if the Aho-Corasick searcher only supports anchored
+    /// searches or only supports unanchored searches, then providing an
+    /// `Input` that requests an anchored (or unanchored) search when it isn't
+    /// supported would result in an error.
+    ///
+    /// # Example: leftmost-first searching
+    ///
+    /// Basic usage with leftmost-first semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind, Input};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "foo abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mat = ac.try_find(haystack)?.expect("should have a match");
+    /// assert_eq!("abc", &haystack[mat.span()]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: anchored leftmost-first searching
+    ///
+    /// This shows how to anchor the search, so that even if the haystack
+    /// contains a match somewhere, a match won't be reported unless one can
+    /// be found that starts at the beginning of the search:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Anchored, Input, MatchKind, StartKind};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "foo abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .start_kind(StartKind::Anchored)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let input = Input::new(haystack).anchored(Anchored::Yes);
+    /// assert_eq!(None, ac.try_find(input)?);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// If the beginning of the search is changed to where a match begins, then
+    /// it will be found:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Anchored, Input, MatchKind, StartKind};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "foo abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .start_kind(StartKind::Anchored)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let input = Input::new(haystack).range(4..).anchored(Anchored::Yes);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// assert_eq!("abc", &haystack[mat.span()]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: earliest leftmost-first searching
+    ///
+    /// This shows how to run an "earliest" search even when the Aho-Corasick
+    /// searcher was compiled with leftmost-first match semantics. In this
+    /// case, the search is stopped as soon as it is known that a match has
+    /// occurred, even if it doesn't correspond to the leftmost-first match.
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Input, MatchKind};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "foo abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let input = Input::new(haystack).earliest(true);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// assert_eq!("b", &haystack[mat.span()]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn try_find<'h, I: Into<Input<'h>>>(
+        &self,
+        input: I,
+    ) -> Result<Option<Match>, MatchError> {
+        let input = input.into();
+        enforce_anchored_consistency(self.start_kind, input.get_anchored())?;
+        self.aut.try_find(&input)
+    }
+
+    /// Returns the location of the first overlapping match in the given
+    /// input with respect to the current state of the underlying searcher.
+    ///
+    /// Overlapping searches do not report matches in their return value.
+    /// Instead, matches can be accessed via [`OverlappingState::get_match`]
+    /// after a search call.
+    ///
+    /// This is the fallible version of [`AhoCorasick::find_overlapping`].
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the given `Input` configuration or if overlapping search is not
+    /// supported.
+    ///
+    /// One example is that only Aho-Corasicker searchers built with
+    /// [`MatchKind::Standard`] semantics support overlapping searches. Using
+    /// any other match semantics will result in this returning an error.
+    ///
+    /// # Example: basic usage
+    ///
+    /// This shows how we can repeatedly call an overlapping search without
+    /// ever needing to explicitly re-slice the haystack. Overlapping search
+    /// works this way because searches depend on state saved during the
+    /// previous search.
+    ///
+    /// ```
+    /// use aho_corasick::{
+    ///     automaton::OverlappingState,
+    ///     AhoCorasick, Input, Match,
+    /// };
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::new(patterns).unwrap();
+    /// let mut state = OverlappingState::start();
+    ///
+    /// ac.try_find_overlapping(haystack, &mut state)?;
+    /// assert_eq!(Some(Match::must(2, 0..3)), state.get_match());
+    ///
+    /// ac.try_find_overlapping(haystack, &mut state)?;
+    /// assert_eq!(Some(Match::must(0, 0..6)), state.get_match());
+    ///
+    /// ac.try_find_overlapping(haystack, &mut state)?;
+    /// assert_eq!(Some(Match::must(2, 11..14)), state.get_match());
+    ///
+    /// ac.try_find_overlapping(haystack, &mut state)?;
+    /// assert_eq!(Some(Match::must(2, 22..25)), state.get_match());
+    ///
+    /// ac.try_find_overlapping(haystack, &mut state)?;
+    /// assert_eq!(Some(Match::must(0, 22..28)), state.get_match());
+    ///
+    /// ac.try_find_overlapping(haystack, &mut state)?;
+    /// assert_eq!(Some(Match::must(1, 22..31)), state.get_match());
+    ///
+    /// // No more match matches to be found.
+    /// ac.try_find_overlapping(haystack, &mut state)?;
+    /// assert_eq!(None, state.get_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: implementing your own overlapping iteration
+    ///
+    /// The previous example can be easily adapted to implement your own
+    /// iteration by repeatedly calling `try_find_overlapping` until either
+    /// an error occurs or no more matches are reported.
+    ///
+    /// This is effectively equivalent to the iterator returned by
+    /// [`AhoCorasick::try_find_overlapping_iter`], with the only difference
+    /// being that the iterator checks for errors before construction and
+    /// absolves the caller of needing to check for errors on every search
+    /// call. (Indeed, if the first `try_find_overlapping` call succeeds and
+    /// the same `Input` is given to subsequent calls, then all subsequent
+    /// calls are guaranteed to succeed.)
+    ///
+    /// ```
+    /// use aho_corasick::{
+    ///     automaton::OverlappingState,
+    ///     AhoCorasick, Input, Match,
+    /// };
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::new(patterns).unwrap();
+    /// let mut state = OverlappingState::start();
+    /// let mut matches = vec![];
+    ///
+    /// loop {
+    ///     ac.try_find_overlapping(haystack, &mut state)?;
+    ///     let mat = match state.get_match() {
+    ///         None => break,
+    ///         Some(mat) => mat,
+    ///     };
+    ///     matches.push(mat);
+    /// }
+    /// let expected = vec![
+    ///     Match::must(2, 0..3),
+    ///     Match::must(0, 0..6),
+    ///     Match::must(2, 11..14),
+    ///     Match::must(2, 22..25),
+    ///     Match::must(0, 22..28),
+    ///     Match::must(1, 22..31),
+    /// ];
+    /// assert_eq!(expected, matches);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: anchored iteration
+    ///
+    /// The previous example can also be adapted to implement
+    /// iteration over all anchored matches. In particular,
+    /// [`AhoCorasick::try_find_overlapping_iter`] does not support this
+    /// because it isn't totally clear what the match semantics ought to be.
+    ///
+    /// In this example, we will find all overlapping matches that start at
+    /// the beginning of our search.
+    ///
+    /// ```
+    /// use aho_corasick::{
+    ///     automaton::OverlappingState,
+    ///     AhoCorasick, Anchored, Input, Match, StartKind,
+    /// };
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .start_kind(StartKind::Anchored)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let input = Input::new(haystack).anchored(Anchored::Yes);
+    /// let mut state = OverlappingState::start();
+    /// let mut matches = vec![];
+    ///
+    /// loop {
+    ///     ac.try_find_overlapping(input.clone(), &mut state)?;
+    ///     let mat = match state.get_match() {
+    ///         None => break,
+    ///         Some(mat) => mat,
+    ///     };
+    ///     matches.push(mat);
+    /// }
+    /// let expected = vec![
+    ///     Match::must(2, 0..3),
+    ///     Match::must(0, 0..6),
+    /// ];
+    /// assert_eq!(expected, matches);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn try_find_overlapping<'h, I: Into<Input<'h>>>(
+        &self,
+        input: I,
+        state: &mut OverlappingState,
+    ) -> Result<(), MatchError> {
+        let input = input.into();
+        enforce_anchored_consistency(self.start_kind, input.get_anchored())?;
+        self.aut.try_find_overlapping(&input, state)
+    }
+
+    /// Returns an iterator of non-overlapping matches, using the match
+    /// semantics that this automaton was constructed with.
+    ///
+    /// This is the fallible version of [`AhoCorasick::find_iter`].
+    ///
+    /// Note that the error returned by this method occurs during construction
+    /// of the iterator. The iterator itself yields `Match` values. That is,
+    /// once the iterator is constructed, the iteration itself will never
+    /// report an error.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the given `Input` configuration.
+    ///
+    /// For example, if the Aho-Corasick searcher only supports anchored
+    /// searches or only supports unanchored searches, then providing an
+    /// `Input` that requests an anchored (or unanchored) search when it isn't
+    /// supported would result in an error.
+    ///
+    /// # Example: leftmost-first searching
+    ///
+    /// Basic usage with leftmost-first semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Input, MatchKind, PatternID};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let matches: Vec<PatternID> = ac
+    ///     .try_find_iter(Input::new(haystack))?
+    ///     .map(|mat| mat.pattern())
+    ///     .collect();
+    /// assert_eq!(vec![
+    ///     PatternID::must(0),
+    ///     PatternID::must(2),
+    ///     PatternID::must(0),
+    /// ], matches);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: anchored leftmost-first searching
+    ///
+    /// This shows how to anchor the search, such that all matches must begin
+    /// at the starting location of the search. For an iterator, an anchored
+    /// search implies that all matches are adjacent.
+    ///
+    /// ```
+    /// use aho_corasick::{
+    ///     AhoCorasick, Anchored, Input, MatchKind, PatternID, StartKind,
+    /// };
+    ///
+    /// let patterns = &["foo", "bar", "quux"];
+    /// let haystack = "fooquuxbar foo";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .start_kind(StartKind::Anchored)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let matches: Vec<PatternID> = ac
+    ///     .try_find_iter(Input::new(haystack).anchored(Anchored::Yes))?
+    ///     .map(|mat| mat.pattern())
+    ///     .collect();
+    /// assert_eq!(vec![
+    ///     PatternID::must(0),
+    ///     PatternID::must(2),
+    ///     PatternID::must(1),
+    ///     // The final 'foo' is not found because it is not adjacent to the
+    ///     // 'bar' match. It needs to be adjacent because our search is
+    ///     // anchored.
+    /// ], matches);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn try_find_iter<'a, 'h, I: Into<Input<'h>>>(
+        &'a self,
+        input: I,
+    ) -> Result<FindIter<'a, 'h>, MatchError> {
+        let input = input.into();
+        enforce_anchored_consistency(self.start_kind, input.get_anchored())?;
+        Ok(FindIter(self.aut.try_find_iter(input)?))
+    }
+
+    /// Returns an iterator of overlapping matches.
+    ///
+    /// This is the fallible version of [`AhoCorasick::find_overlapping_iter`].
+    ///
+    /// Note that the error returned by this method occurs during construction
+    /// of the iterator. The iterator itself yields `Match` values. That is,
+    /// once the iterator is constructed, the iteration itself will never
+    /// report an error.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the given `Input` configuration or does not support overlapping
+    /// searches.
+    ///
+    /// One example is that only Aho-Corasicker searchers built with
+    /// [`MatchKind::Standard`] semantics support overlapping searches. Using
+    /// any other match semantics will result in this returning an error.
+    ///
+    /// # Example: basic usage
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Input, PatternID};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::new(patterns).unwrap();
+    /// let matches: Vec<PatternID> = ac
+    ///     .try_find_overlapping_iter(Input::new(haystack))?
+    ///     .map(|mat| mat.pattern())
+    ///     .collect();
+    /// assert_eq!(vec![
+    ///     PatternID::must(2),
+    ///     PatternID::must(0),
+    ///     PatternID::must(2),
+    ///     PatternID::must(2),
+    ///     PatternID::must(0),
+    ///     PatternID::must(1),
+    /// ], matches);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: anchored overlapping search returns an error
+    ///
+    /// It isn't clear what the match semantics for anchored overlapping
+    /// iterators *ought* to be, so currently an error is returned. Callers
+    /// may use [`AhoCorasick::try_find_overlapping`] to implement their own
+    /// semantics if desired.
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Anchored, Input, StartKind};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "appendappendage app";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .start_kind(StartKind::Anchored)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let input = Input::new(haystack).anchored(Anchored::Yes);
+    /// assert!(ac.try_find_overlapping_iter(input).is_err());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn try_find_overlapping_iter<'a, 'h, I: Into<Input<'h>>>(
+        &'a self,
+        input: I,
+    ) -> Result<FindOverlappingIter<'a, 'h>, MatchError> {
+        let input = input.into();
+        enforce_anchored_consistency(self.start_kind, input.get_anchored())?;
+        Ok(FindOverlappingIter(self.aut.try_find_overlapping_iter(input)?))
+    }
+
+    /// Replace all matches with a corresponding value in the `replace_with`
+    /// slice given. Matches correspond to the same matches as reported by
+    /// [`AhoCorasick::try_find_iter`].
+    ///
+    /// Replacements are determined by the index of the matching pattern.
+    /// For example, if the pattern with index `2` is found, then it is
+    /// replaced by `replace_with[2]`.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `replace_with.len()` does not equal
+    /// [`AhoCorasick::patterns_len`].
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the default `Input` configuration. More specifically, this occurs only
+    /// when the Aho-Corasick searcher does not support unanchored searches
+    /// since this replacement routine always does an unanchored search.
+    ///
+    /// # Example: basic usage
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let result = ac.try_replace_all(haystack, &["x", "y", "z"])?;
+    /// assert_eq!("x the z to the xage", result);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn try_replace_all<B>(
+        &self,
+        haystack: &str,
+        replace_with: &[B],
+    ) -> Result<String, MatchError>
+    where
+        B: AsRef<str>,
+    {
+        enforce_anchored_consistency(self.start_kind, Anchored::No)?;
+        self.aut.try_replace_all(haystack, replace_with)
+    }
+
+    /// Replace all matches using raw bytes with a corresponding value in the
+    /// `replace_with` slice given. Matches correspond to the same matches as
+    /// reported by [`AhoCorasick::try_find_iter`].
+    ///
+    /// Replacements are determined by the index of the matching pattern.
+    /// For example, if the pattern with index `2` is found, then it is
+    /// replaced by `replace_with[2]`.
+    ///
+    /// This is the fallible version of [`AhoCorasick::replace_all_bytes`].
+    ///
+    /// # Panics
+    ///
+    /// This panics when `replace_with.len()` does not equal
+    /// [`AhoCorasick::patterns_len`].
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the default `Input` configuration. More specifically, this occurs only
+    /// when the Aho-Corasick searcher does not support unanchored searches
+    /// since this replacement routine always does an unanchored search.
+    ///
+    /// # Example: basic usage
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = b"append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let result = ac.try_replace_all_bytes(haystack, &["x", "y", "z"])?;
+    /// assert_eq!(b"x the z to the xage".to_vec(), result);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn try_replace_all_bytes<B>(
+        &self,
+        haystack: &[u8],
+        replace_with: &[B],
+    ) -> Result<Vec<u8>, MatchError>
+    where
+        B: AsRef<[u8]>,
+    {
+        enforce_anchored_consistency(self.start_kind, Anchored::No)?;
+        self.aut.try_replace_all_bytes(haystack, replace_with)
+    }
+
+    /// Replace all matches using a closure called on each match.
+    /// Matches correspond to the same matches as reported by
+    /// [`AhoCorasick::try_find_iter`].
+    ///
+    /// The closure accepts three parameters: the match found, the text of
+    /// the match and a string buffer with which to write the replaced text
+    /// (if any). If the closure returns `true`, then it continues to the next
+    /// match. If the closure returns `false`, then searching is stopped.
+    ///
+    /// Note that any matches with boundaries that don't fall on a valid UTF-8
+    /// boundary are silently skipped.
+    ///
+    /// This is the fallible version of [`AhoCorasick::replace_all_with`].
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the default `Input` configuration. More specifically, this occurs only
+    /// when the Aho-Corasick searcher does not support unanchored searches
+    /// since this replacement routine always does an unanchored search.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mut result = String::new();
+    /// ac.try_replace_all_with(haystack, &mut result, |mat, _, dst| {
+    ///     dst.push_str(&mat.pattern().as_usize().to_string());
+    ///     true
+    /// })?;
+    /// assert_eq!("0 the 2 to the 0age", result);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// Stopping the replacement by returning `false` (continued from the
+    /// example above):
+    ///
+    /// ```
+    /// # use aho_corasick::{AhoCorasick, MatchKind, PatternID};
+    /// # let patterns = &["append", "appendage", "app"];
+    /// # let haystack = "append the app to the appendage";
+    /// # let ac = AhoCorasick::builder()
+    /// #    .match_kind(MatchKind::LeftmostFirst)
+    /// #    .build(patterns)
+    /// #    .unwrap();
+    /// let mut result = String::new();
+    /// ac.try_replace_all_with(haystack, &mut result, |mat, _, dst| {
+    ///     dst.push_str(&mat.pattern().as_usize().to_string());
+    ///     mat.pattern() != PatternID::must(2)
+    /// })?;
+    /// assert_eq!("0 the 2 to the appendage", result);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn try_replace_all_with<F>(
+        &self,
+        haystack: &str,
+        dst: &mut String,
+        replace_with: F,
+    ) -> Result<(), MatchError>
+    where
+        F: FnMut(&Match, &str, &mut String) -> bool,
+    {
+        enforce_anchored_consistency(self.start_kind, Anchored::No)?;
+        self.aut.try_replace_all_with(haystack, dst, replace_with)
+    }
+
+    /// Replace all matches using raw bytes with a closure called on each
+    /// match. Matches correspond to the same matches as reported by
+    /// [`AhoCorasick::try_find_iter`].
+    ///
+    /// The closure accepts three parameters: the match found, the text of
+    /// the match and a byte buffer with which to write the replaced text
+    /// (if any). If the closure returns `true`, then it continues to the next
+    /// match. If the closure returns `false`, then searching is stopped.
+    ///
+    /// This is the fallible version of
+    /// [`AhoCorasick::replace_all_with_bytes`].
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the default `Input` configuration. More specifically, this occurs only
+    /// when the Aho-Corasick searcher does not support unanchored searches
+    /// since this replacement routine always does an unanchored search.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = b"append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mut result = vec![];
+    /// ac.try_replace_all_with_bytes(haystack, &mut result, |mat, _, dst| {
+    ///     dst.extend(mat.pattern().as_usize().to_string().bytes());
+    ///     true
+    /// })?;
+    /// assert_eq!(b"0 the 2 to the 0age".to_vec(), result);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// Stopping the replacement by returning `false` (continued from the
+    /// example above):
+    ///
+    /// ```
+    /// # use aho_corasick::{AhoCorasick, MatchKind, PatternID};
+    /// # let patterns = &["append", "appendage", "app"];
+    /// # let haystack = b"append the app to the appendage";
+    /// # let ac = AhoCorasick::builder()
+    /// #    .match_kind(MatchKind::LeftmostFirst)
+    /// #    .build(patterns)
+    /// #    .unwrap();
+    /// let mut result = vec![];
+    /// ac.try_replace_all_with_bytes(haystack, &mut result, |mat, _, dst| {
+    ///     dst.extend(mat.pattern().as_usize().to_string().bytes());
+    ///     mat.pattern() != PatternID::must(2)
+    /// })?;
+    /// assert_eq!(b"0 the 2 to the appendage".to_vec(), result);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn try_replace_all_with_bytes<F>(
+        &self,
+        haystack: &[u8],
+        dst: &mut Vec<u8>,
+        replace_with: F,
+    ) -> Result<(), MatchError>
+    where
+        F: FnMut(&Match, &[u8], &mut Vec<u8>) -> bool,
+    {
+        enforce_anchored_consistency(self.start_kind, Anchored::No)?;
+        self.aut.try_replace_all_with_bytes(haystack, dst, replace_with)
+    }
+
+    /// Returns an iterator of non-overlapping matches in the given
+    /// stream. Matches correspond to the same matches as reported by
+    /// [`AhoCorasick::try_find_iter`].
+    ///
+    /// The matches yielded by this iterator use absolute position offsets in
+    /// the stream given, where the first byte has index `0`. Matches are
+    /// yieled until the stream is exhausted.
+    ///
+    /// Each item yielded by the iterator is an `Result<Match,
+    /// std::io::Error>`, where an error is yielded if there was a problem
+    /// reading from the reader given.
+    ///
+    /// When searching a stream, an internal buffer is used. Therefore, callers
+    /// should avoiding providing a buffered reader, if possible.
+    ///
+    /// This is the fallible version of [`AhoCorasick::stream_find_iter`].
+    /// Note that both methods return iterators that produce `Result` values.
+    /// The difference is that this routine returns an error if _construction_
+    /// of the iterator failed. The `Result` values yield by the iterator
+    /// come from whether the given reader returns an error or not during the
+    /// search.
+    ///
+    /// # Memory usage
+    ///
+    /// In general, searching streams will use a constant amount of memory for
+    /// its internal buffer. The one requirement is that the internal buffer
+    /// must be at least the size of the longest possible match. In most use
+    /// cases, the default buffer size will be much larger than any individual
+    /// match.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the default `Input` configuration. More specifically, this occurs only
+    /// when the Aho-Corasick searcher does not support unanchored searches
+    /// since this stream searching routine always does an unanchored search.
+    ///
+    /// This also returns an error if the searcher does not support stream
+    /// searches. Only searchers built with [`MatchKind::Standard`] semantics
+    /// support stream searches.
+    ///
+    /// # Example: basic usage
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, PatternID};
+    ///
+    /// let patterns = &["append", "appendage", "app"];
+    /// let haystack = "append the app to the appendage";
+    ///
+    /// let ac = AhoCorasick::new(patterns).unwrap();
+    /// let mut matches = vec![];
+    /// for result in ac.try_stream_find_iter(haystack.as_bytes())? {
+    ///     let mat = result?;
+    ///     matches.push(mat.pattern());
+    /// }
+    /// assert_eq!(vec![
+    ///     PatternID::must(2),
+    ///     PatternID::must(2),
+    ///     PatternID::must(2),
+    /// ], matches);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "std")]
+    pub fn try_stream_find_iter<'a, R: std::io::Read>(
+        &'a self,
+        rdr: R,
+    ) -> Result<StreamFindIter<'a, R>, MatchError> {
+        enforce_anchored_consistency(self.start_kind, Anchored::No)?;
+        self.aut.try_stream_find_iter(rdr).map(StreamFindIter)
+    }
+
+    /// Search for and replace all matches of this automaton in
+    /// the given reader, and write the replacements to the given
+    /// writer. Matches correspond to the same matches as reported by
+    /// [`AhoCorasick::try_find_iter`].
+    ///
+    /// Replacements are determined by the index of the matching pattern. For
+    /// example, if the pattern with index `2` is found, then it is replaced by
+    /// `replace_with[2]`.
+    ///
+    /// After all matches are replaced, the writer is _not_ flushed.
+    ///
+    /// If there was a problem reading from the given reader or writing to the
+    /// given writer, then the corresponding `io::Error` is returned and all
+    /// replacement is stopped.
+    ///
+    /// When searching a stream, an internal buffer is used. Therefore, callers
+    /// should avoiding providing a buffered reader, if possible. However,
+    /// callers may want to provide a buffered writer.
+    ///
+    /// Note that there is currently no infallible version of this routine.
+    ///
+    /// # Memory usage
+    ///
+    /// In general, searching streams will use a constant amount of memory for
+    /// its internal buffer. The one requirement is that the internal buffer
+    /// must be at least the size of the longest possible match. In most use
+    /// cases, the default buffer size will be much larger than any individual
+    /// match.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `replace_with.len()` does not equal
+    /// [`AhoCorasick::patterns_len`].
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the default `Input` configuration. More specifically, this occurs only
+    /// when the Aho-Corasick searcher does not support unanchored searches
+    /// since this stream searching routine always does an unanchored search.
+    ///
+    /// This also returns an error if the searcher does not support stream
+    /// searches. Only searchers built with [`MatchKind::Standard`] semantics
+    /// support stream searches.
+    ///
+    /// # Example: basic usage
+    ///
+    /// ```
+    /// use aho_corasick::AhoCorasick;
+    ///
+    /// let patterns = &["fox", "brown", "quick"];
+    /// let haystack = "The quick brown fox.";
+    /// let replace_with = &["sloth", "grey", "slow"];
+    ///
+    /// let ac = AhoCorasick::new(patterns).unwrap();
+    /// let mut result = vec![];
+    /// ac.try_stream_replace_all(
+    ///     haystack.as_bytes(),
+    ///     &mut result,
+    ///     replace_with,
+    /// )?;
+    /// assert_eq!(b"The slow grey sloth.".to_vec(), result);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "std")]
+    pub fn try_stream_replace_all<R, W, B>(
+        &self,
+        rdr: R,
+        wtr: W,
+        replace_with: &[B],
+    ) -> Result<(), std::io::Error>
+    where
+        R: std::io::Read,
+        W: std::io::Write,
+        B: AsRef<[u8]>,
+    {
+        enforce_anchored_consistency(self.start_kind, Anchored::No)
+            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
+        self.aut.try_stream_replace_all(rdr, wtr, replace_with)
+    }
+
+    /// Search the given reader and replace all matches of this automaton
+    /// using the given closure. The result is written to the given
+    /// writer. Matches correspond to the same matches as reported by
+    /// [`AhoCorasick::try_find_iter`].
+    ///
+    /// The closure accepts three parameters: the match found, the text of
+    /// the match and the writer with which to write the replaced text (if any).
+    ///
+    /// After all matches are replaced, the writer is _not_ flushed.
+    ///
+    /// If there was a problem reading from the given reader or writing to the
+    /// given writer, then the corresponding `io::Error` is returned and all
+    /// replacement is stopped.
+    ///
+    /// When searching a stream, an internal buffer is used. Therefore, callers
+    /// should avoiding providing a buffered reader, if possible. However,
+    /// callers may want to provide a buffered writer.
+    ///
+    /// Note that there is currently no infallible version of this routine.
+    ///
+    /// # Memory usage
+    ///
+    /// In general, searching streams will use a constant amount of memory for
+    /// its internal buffer. The one requirement is that the internal buffer
+    /// must be at least the size of the longest possible match. In most use
+    /// cases, the default buffer size will be much larger than any individual
+    /// match.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when this Aho-Corasick searcher does not support
+    /// the default `Input` configuration. More specifically, this occurs only
+    /// when the Aho-Corasick searcher does not support unanchored searches
+    /// since this stream searching routine always does an unanchored search.
+    ///
+    /// This also returns an error if the searcher does not support stream
+    /// searches. Only searchers built with [`MatchKind::Standard`] semantics
+    /// support stream searches.
+    ///
+    /// # Example: basic usage
+    ///
+    /// ```
+    /// use std::io::Write;
+    /// use aho_corasick::AhoCorasick;
+    ///
+    /// let patterns = &["fox", "brown", "quick"];
+    /// let haystack = "The quick brown fox.";
+    ///
+    /// let ac = AhoCorasick::new(patterns).unwrap();
+    /// let mut result = vec![];
+    /// ac.try_stream_replace_all_with(
+    ///     haystack.as_bytes(),
+    ///     &mut result,
+    ///     |mat, _, wtr| {
+    ///         wtr.write_all(mat.pattern().as_usize().to_string().as_bytes())
+    ///     },
+    /// )?;
+    /// assert_eq!(b"The 2 1 0.".to_vec(), result);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "std")]
+    pub fn try_stream_replace_all_with<R, W, F>(
+        &self,
+        rdr: R,
+        wtr: W,
+        replace_with: F,
+    ) -> Result<(), std::io::Error>
+    where
+        R: std::io::Read,
+        W: std::io::Write,
+        F: FnMut(&Match, &[u8], &mut W) -> Result<(), std::io::Error>,
+    {
+        enforce_anchored_consistency(self.start_kind, Anchored::No)
+            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
+        self.aut.try_stream_replace_all_with(rdr, wtr, replace_with)
+    }
+}
+
+/// Routines for querying information about the Aho-Corasick automaton.
+impl AhoCorasick {
+    /// Returns the kind of the Aho-Corasick automaton used by this searcher.
+    ///
+    /// Knowing the Aho-Corasick kind is principally useful for diagnostic
+    /// purposes. In particular, if no specific kind was given to
+    /// [`AhoCorasickBuilder::kind`], then one is automatically chosen and
+    /// this routine will report which one.
+    ///
+    /// Note that the heuristics used for choosing which `AhoCorasickKind`
+    /// may be changed in a semver compatible release.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, AhoCorasickKind};
+    ///
+    /// let ac = AhoCorasick::new(&["foo", "bar", "quux", "baz"]).unwrap();
+    /// // The specific Aho-Corasick kind chosen is not guaranteed!
+    /// assert_eq!(AhoCorasickKind::DFA, ac.kind());
+    /// ```
+    pub fn kind(&self) -> AhoCorasickKind {
+        self.kind
+    }
+
+    /// Returns the type of starting search configuration supported by this
+    /// Aho-Corasick automaton.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, StartKind};
+    ///
+    /// let ac = AhoCorasick::new(&["foo", "bar", "quux", "baz"]).unwrap();
+    /// assert_eq!(StartKind::Unanchored, ac.start_kind());
+    /// ```
+    pub fn start_kind(&self) -> StartKind {
+        self.start_kind
+    }
+
+    /// Returns the match kind used by this automaton.
+    ///
+    /// The match kind is important because it determines what kinds of
+    /// matches are returned. Also, some operations (such as overlapping
+    /// search and stream searching) are only supported when using the
+    /// [`MatchKind::Standard`] match kind.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let ac = AhoCorasick::new(&["foo", "bar", "quux", "baz"]).unwrap();
+    /// assert_eq!(MatchKind::Standard, ac.match_kind());
+    /// ```
+    pub fn match_kind(&self) -> MatchKind {
+        self.aut.match_kind()
+    }
+
+    /// Returns the length of the shortest pattern matched by this automaton.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::AhoCorasick;
+    ///
+    /// let ac = AhoCorasick::new(&["foo", "bar", "quux", "baz"]).unwrap();
+    /// assert_eq!(3, ac.min_pattern_len());
+    /// ```
+    ///
+    /// Note that an `AhoCorasick` automaton has a minimum length of `0` if
+    /// and only if it can match the empty string:
+    ///
+    /// ```
+    /// use aho_corasick::AhoCorasick;
+    ///
+    /// let ac = AhoCorasick::new(&["foo", "", "quux", "baz"]).unwrap();
+    /// assert_eq!(0, ac.min_pattern_len());
+    /// ```
+    pub fn min_pattern_len(&self) -> usize {
+        self.aut.min_pattern_len()
+    }
+
+    /// Returns the length of the longest pattern matched by this automaton.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::AhoCorasick;
+    ///
+    /// let ac = AhoCorasick::new(&["foo", "bar", "quux", "baz"]).unwrap();
+    /// assert_eq!(4, ac.max_pattern_len());
+    /// ```
+    pub fn max_pattern_len(&self) -> usize {
+        self.aut.max_pattern_len()
+    }
+
+    /// Return the total number of patterns matched by this automaton.
+    ///
+    /// This includes patterns that may never participate in a match. For
+    /// example, if [`MatchKind::LeftmostFirst`] match semantics are used, and
+    /// the patterns `Sam` and `Samwise` were used to build the automaton (in
+    /// that order), then `Samwise` can never participate in a match because
+    /// `Sam` will always take priority.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::AhoCorasick;
+    ///
+    /// let ac = AhoCorasick::new(&["foo", "bar", "baz"]).unwrap();
+    /// assert_eq!(3, ac.patterns_len());
+    /// ```
+    pub fn patterns_len(&self) -> usize {
+        self.aut.patterns_len()
+    }
+
+    /// Returns the approximate total amount of heap used by this automaton, in
+    /// units of bytes.
+    ///
+    /// # Examples
+    ///
+    /// This example shows the difference in heap usage between a few
+    /// configurations:
+    ///
+    /// ```
+    /// # if !cfg!(target_pointer_width = "64") { return; }
+    /// use aho_corasick::{AhoCorasick, AhoCorasickKind, MatchKind};
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .kind(None) // default
+    ///     .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"])
+    ///     .unwrap();
+    /// assert_eq!(5_632, ac.memory_usage());
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .kind(None) // default
+    ///     .ascii_case_insensitive(true)
+    ///     .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"])
+    ///     .unwrap();
+    /// assert_eq!(11_136, ac.memory_usage());
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .kind(Some(AhoCorasickKind::NoncontiguousNFA))
+    ///     .ascii_case_insensitive(true)
+    ///     .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"])
+    ///     .unwrap();
+    /// assert_eq!(10_879, ac.memory_usage());
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .kind(Some(AhoCorasickKind::ContiguousNFA))
+    ///     .ascii_case_insensitive(true)
+    ///     .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"])
+    ///     .unwrap();
+    /// assert_eq!(2_584, ac.memory_usage());
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .kind(Some(AhoCorasickKind::DFA))
+    ///     .ascii_case_insensitive(true)
+    ///     .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"])
+    ///     .unwrap();
+    /// // While this shows the DFA being the biggest here by a small margin,
+    /// // don't let the difference fool you. With such a small number of
+    /// // patterns, the difference is small, but a bigger number of patterns
+    /// // will reveal that the rate of growth of the DFA is far bigger than
+    /// // the NFAs above. For a large number of patterns, it is easy for the
+    /// // DFA to take an order of magnitude more heap space (or more!).
+    /// assert_eq!(11_136, ac.memory_usage());
+    /// ```
+    pub fn memory_usage(&self) -> usize {
+        self.aut.memory_usage()
+    }
+}
+
+// We provide a manual debug impl so that we don't include the 'start_kind',
+// principally because it's kind of weird to do so and because it screws with
+// the carefully curated debug output for the underlying automaton.
+impl core::fmt::Debug for AhoCorasick {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_tuple("AhoCorasick").field(&self.aut).finish()
+    }
+}
+
+/// An iterator of non-overlapping matches in a particular haystack.
+///
+/// This iterator yields matches according to the [`MatchKind`] used by this
+/// automaton.
+///
+/// This iterator is constructed via the [`AhoCorasick::find_iter`] and
+/// [`AhoCorasick::try_find_iter`] methods.
+///
+/// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton.
+///
+/// The lifetime `'h` refers to the lifetime of the haystack being searched.
+#[derive(Debug)]
+pub struct FindIter<'a, 'h>(automaton::FindIter<'a, 'h, Arc<dyn AcAutomaton>>);
+
+impl<'a, 'h> Iterator for FindIter<'a, 'h> {
+    type Item = Match;
+
+    #[inline]
+    fn next(&mut self) -> Option<Match> {
+        self.0.next()
+    }
+}
+
+/// An iterator of overlapping matches in a particular haystack.
+///
+/// This iterator will report all possible matches in a particular haystack,
+/// even when the matches overlap.
+///
+/// This iterator is constructed via the [`AhoCorasick::find_overlapping_iter`]
+/// and [`AhoCorasick::try_find_overlapping_iter`] methods.
+///
+/// The lifetime `'a` refers to the lifetime of the `AhoCorasick` automaton.
+///
+/// The lifetime `'h` refers to the lifetime of the haystack being searched.
+#[derive(Debug)]
+pub struct FindOverlappingIter<'a, 'h>(
+    automaton::FindOverlappingIter<'a, 'h, Arc<dyn AcAutomaton>>,
+);
+
+impl<'a, 'h> Iterator for FindOverlappingIter<'a, 'h> {
+    type Item = Match;
+
+    #[inline]
+    fn next(&mut self) -> Option<Match> {
+        self.0.next()
+    }
+}
+
+/// An iterator that reports Aho-Corasick matches in a stream.
+///
+/// This iterator yields elements of type `Result<Match, std::io::Error>`,
+/// where an error is reported if there was a problem reading from the
+/// underlying stream. The iterator terminates only when the underlying stream
+/// reaches `EOF`.
+///
+/// This iterator is constructed via the [`AhoCorasick::stream_find_iter`] and
+/// [`AhoCorasick::try_stream_find_iter`] methods.
+///
+/// The type variable `R` refers to the `io::Read` stream that is being read
+/// from.
+///
+/// The lifetime `'a` refers to the lifetime of the corresponding
+/// [`AhoCorasick`] searcher.
+#[cfg(feature = "std")]
+#[derive(Debug)]
+pub struct StreamFindIter<'a, R>(
+    automaton::StreamFindIter<'a, Arc<dyn AcAutomaton>, R>,
+);
+
+#[cfg(feature = "std")]
+impl<'a, R: std::io::Read> Iterator for StreamFindIter<'a, R> {
+    type Item = Result<Match, std::io::Error>;
+
+    fn next(&mut self) -> Option<Result<Match, std::io::Error>> {
+        self.0.next()
+    }
+}
+
+/// A builder for configuring an Aho-Corasick automaton.
+///
+/// # Quick advice
+///
+/// * Use [`AhoCorasickBuilder::match_kind`] to configure your searcher
+/// with [`MatchKind::LeftmostFirst`] if you want to match how backtracking
+/// regex engines execute searches for `pat1|pat2|..|patN`. Use
+/// [`MatchKind::LeftmostLongest`] if you want to match how POSIX regex engines
+/// do it.
+/// * If you need an anchored search, use [`AhoCorasickBuilder::start_kind`] to
+/// set the [`StartKind::Anchored`] mode since [`StartKind::Unanchored`] is the
+/// default. Or just use [`StartKind::Both`] to support both types of searches.
+/// * You might want to use [`AhoCorasickBuilder::kind`] to set your searcher
+/// to always use a [`AhoCorasickKind::DFA`] if search speed is critical and
+/// memory usage isn't a concern. Otherwise, not setting a kind will probably
+/// make the right choice for you. Beware that if you use [`StartKind::Both`]
+/// to build a searcher that supports both unanchored and anchored searches
+/// _and_ you set [`AhoCorasickKind::DFA`], then the DFA will essentially be
+/// duplicated to support both simultaneously. This results in very high memory
+/// usage.
+/// * For all other options, their defaults are almost certainly what you want.
+#[derive(Clone, Debug, Default)]
+pub struct AhoCorasickBuilder {
+    nfa_noncontiguous: noncontiguous::Builder,
+    nfa_contiguous: contiguous::Builder,
+    dfa: dfa::Builder,
+    kind: Option<AhoCorasickKind>,
+    start_kind: StartKind,
+}
+
+impl AhoCorasickBuilder {
+    /// Create a new builder for configuring an Aho-Corasick automaton.
+    ///
+    /// The builder provides a way to configure a number of things, including
+    /// ASCII case insensitivity and what kind of match semantics are used.
+    pub fn new() -> AhoCorasickBuilder {
+        AhoCorasickBuilder::default()
+    }
+
+    /// Build an Aho-Corasick automaton using the configuration set on this
+    /// builder.
+    ///
+    /// A builder may be reused to create more automatons.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasickBuilder, PatternID};
+    ///
+    /// let patterns = &["foo", "bar", "baz"];
+    /// let ac = AhoCorasickBuilder::new().build(patterns).unwrap();
+    /// assert_eq!(
+    ///     Some(PatternID::must(1)),
+    ///     ac.find("xxx bar xxx").map(|m| m.pattern()),
+    /// );
+    /// ```
+    pub fn build<I, P>(&self, patterns: I) -> Result<AhoCorasick, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        let nfa = self.nfa_noncontiguous.build(patterns)?;
+        let (aut, kind): (Arc<dyn AcAutomaton>, AhoCorasickKind) =
+            match self.kind {
+                None => {
+                    debug!(
+                        "asked for automatic Aho-Corasick implementation, \
+                     criteria: <patterns: {:?}, max pattern len: {:?}, \
+                     start kind: {:?}>",
+                        nfa.patterns_len(),
+                        nfa.max_pattern_len(),
+                        self.start_kind,
+                    );
+                    self.build_auto(nfa)
+                }
+                Some(AhoCorasickKind::NoncontiguousNFA) => {
+                    debug!("forcefully chose noncontiguous NFA");
+                    (Arc::new(nfa), AhoCorasickKind::NoncontiguousNFA)
+                }
+                Some(AhoCorasickKind::ContiguousNFA) => {
+                    debug!("forcefully chose contiguous NFA");
+                    let cnfa =
+                        self.nfa_contiguous.build_from_noncontiguous(&nfa)?;
+                    (Arc::new(cnfa), AhoCorasickKind::ContiguousNFA)
+                }
+                Some(AhoCorasickKind::DFA) => {
+                    debug!("forcefully chose DFA");
+                    let dfa = self.dfa.build_from_noncontiguous(&nfa)?;
+                    (Arc::new(dfa), AhoCorasickKind::DFA)
+                }
+            };
+        Ok(AhoCorasick { aut, kind, start_kind: self.start_kind })
+    }
+
+    /// Implements the automatic selection logic for the Aho-Corasick
+    /// implementation to use. Since all Aho-Corasick automatons are built
+    /// from a non-contiguous NFA, the caller is responsible for building
+    /// that first.
+    fn build_auto(
+        &self,
+        nfa: noncontiguous::NFA,
+    ) -> (Arc<dyn AcAutomaton>, AhoCorasickKind) {
+        // We try to build a DFA if we have a very small number of patterns,
+        // otherwise the memory usage just gets too crazy. We also only do it
+        // when the start kind is unanchored or anchored, but not both, because
+        // both implies two full copies of the transition table.
+        let try_dfa = !matches!(self.start_kind, StartKind::Both)
+            && nfa.patterns_len() <= 100;
+        if try_dfa {
+            match self.dfa.build_from_noncontiguous(&nfa) {
+                Ok(dfa) => {
+                    debug!("chose a DFA");
+                    return (Arc::new(dfa), AhoCorasickKind::DFA);
+                }
+                Err(_err) => {
+                    debug!(
+                        "failed to build DFA, trying something else: {}",
+                        _err
+                    );
+                }
+            }
+        }
+        // We basically always want a contiguous NFA if the limited
+        // circumstances in which we use a DFA are not true. It is quite fast
+        // and has excellent memory usage. The only way we don't use it is if
+        // there are so many states that it can't fit in a contiguous NFA.
+        // And the only way to know that is to try to build it. Building a
+        // contiguous NFA is mostly just reshuffling data from a noncontiguous
+        // NFA, so it isn't too expensive, especially relative to building a
+        // noncontiguous NFA in the first place.
+        match self.nfa_contiguous.build_from_noncontiguous(&nfa) {
+            Ok(nfa) => {
+                debug!("chose contiguous NFA");
+                return (Arc::new(nfa), AhoCorasickKind::ContiguousNFA);
+            }
+            #[allow(unused_variables)] // unused when 'logging' is disabled
+            Err(_err) => {
+                debug!(
+                    "failed to build contiguous NFA, \
+                     trying something else: {}",
+                    _err
+                );
+            }
+        }
+        debug!("chose non-contiguous NFA");
+        (Arc::new(nfa), AhoCorasickKind::NoncontiguousNFA)
+    }
+
+    /// Set the desired match semantics.
+    ///
+    /// The default is [`MatchKind::Standard`], which corresponds to the match
+    /// semantics supported by the standard textbook description of the
+    /// Aho-Corasick algorithm. Namely, matches are reported as soon as they
+    /// are found. Moreover, this is the only way to get overlapping matches
+    /// or do stream searching.
+    ///
+    /// The other kinds of match semantics that are supported are
+    /// [`MatchKind::LeftmostFirst`] and [`MatchKind::LeftmostLongest`]. The
+    /// former corresponds to the match you would get if you were to try to
+    /// match each pattern at each position in the haystack in the same order
+    /// that you give to the automaton. That is, it returns the leftmost match
+    /// corresponding to the earliest pattern given to the automaton. The
+    /// latter corresponds to finding the longest possible match among all
+    /// leftmost matches.
+    ///
+    /// For more details on match semantics, see the [documentation for
+    /// `MatchKind`](MatchKind).
+    ///
+    /// Note that setting this to [`MatchKind::LeftmostFirst`] or
+    /// [`MatchKind::LeftmostLongest`] will cause some search routines on
+    /// [`AhoCorasick`] to return an error (or panic if you're using the
+    /// infallible API). Notably, this includes stream and overlapping
+    /// searches.
+    ///
+    /// # Examples
+    ///
+    /// In these examples, we demonstrate the differences between match
+    /// semantics for a particular set of patterns in a specific order:
+    /// `b`, `abc`, `abcd`.
+    ///
+    /// Standard semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::Standard) // default, not necessary
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mat = ac.find(haystack).expect("should have a match");
+    /// assert_eq!("b", &haystack[mat.start()..mat.end()]);
+    /// ```
+    ///
+    /// Leftmost-first semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mat = ac.find(haystack).expect("should have a match");
+    /// assert_eq!("abc", &haystack[mat.start()..mat.end()]);
+    /// ```
+    ///
+    /// Leftmost-longest semantics:
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, MatchKind};
+    ///
+    /// let patterns = &["b", "abc", "abcd"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostLongest)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let mat = ac.find(haystack).expect("should have a match");
+    /// assert_eq!("abcd", &haystack[mat.start()..mat.end()]);
+    /// ```
+    pub fn match_kind(&mut self, kind: MatchKind) -> &mut AhoCorasickBuilder {
+        self.nfa_noncontiguous.match_kind(kind);
+        self.nfa_contiguous.match_kind(kind);
+        self.dfa.match_kind(kind);
+        self
+    }
+
+    /// Sets the starting state configuration for the automaton.
+    ///
+    /// Every Aho-Corasick automaton is capable of having two start states: one
+    /// that is used for unanchored searches and one that is used for anchored
+    /// searches. Some automatons, like the NFAs, support this with almost zero
+    /// additional cost. Other automatons, like the DFA, require two copies of
+    /// the underlying transition table to support both simultaneously.
+    ///
+    /// Because there may be an added non-trivial cost to supporting both, it
+    /// is possible to configure which starting state configuration is needed.
+    ///
+    /// Indeed, since anchored searches tend to be somewhat more rare,
+    /// _only_ unanchored searches are supported by default. Thus,
+    /// [`StartKind::Unanchored`] is the default.
+    ///
+    /// Note that when this is set to [`StartKind::Unanchored`], then
+    /// running an anchored search will result in an error (or a panic
+    /// if using the infallible APIs). Similarly, when this is set to
+    /// [`StartKind::Anchored`], then running an unanchored search will
+    /// result in an error (or a panic if using the infallible APIs). When
+    /// [`StartKind::Both`] is used, then both unanchored and anchored searches
+    /// are always supported.
+    ///
+    /// Also note that even if an `AhoCorasick` searcher is using an NFA
+    /// internally (which always supports both unanchored and anchored
+    /// searches), an error will still be reported for a search that isn't
+    /// supported by the configuration set via this method. This means,
+    /// for example, that an error is never dependent on which internal
+    /// implementation of Aho-Corasick is used.
+    ///
+    /// # Example: anchored search
+    ///
+    /// This shows how to build a searcher that only supports anchored
+    /// searches:
+    ///
+    /// ```
+    /// use aho_corasick::{
+    ///     AhoCorasick, Anchored, Input, Match, MatchKind, StartKind,
+    /// };
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .start_kind(StartKind::Anchored)
+    ///     .build(&["b", "abc", "abcd"])
+    ///     .unwrap();
+    ///
+    /// // An unanchored search is not supported! An error here is guaranteed
+    /// // given the configuration above regardless of which kind of
+    /// // Aho-Corasick implementation ends up being used internally.
+    /// let input = Input::new("foo abcd").anchored(Anchored::No);
+    /// assert!(ac.try_find(input).is_err());
+    ///
+    /// let input = Input::new("foo abcd").anchored(Anchored::Yes);
+    /// assert_eq!(None, ac.try_find(input)?);
+    ///
+    /// let input = Input::new("abcd").anchored(Anchored::Yes);
+    /// assert_eq!(Some(Match::must(1, 0..3)), ac.try_find(input)?);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: unanchored and anchored searches
+    ///
+    /// This shows how to build a searcher that supports both unanchored and
+    /// anchored searches:
+    ///
+    /// ```
+    /// use aho_corasick::{
+    ///     AhoCorasick, Anchored, Input, Match, MatchKind, StartKind,
+    /// };
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .start_kind(StartKind::Both)
+    ///     .build(&["b", "abc", "abcd"])
+    ///     .unwrap();
+    ///
+    /// let input = Input::new("foo abcd").anchored(Anchored::No);
+    /// assert_eq!(Some(Match::must(1, 4..7)), ac.try_find(input)?);
+    ///
+    /// let input = Input::new("foo abcd").anchored(Anchored::Yes);
+    /// assert_eq!(None, ac.try_find(input)?);
+    ///
+    /// let input = Input::new("abcd").anchored(Anchored::Yes);
+    /// assert_eq!(Some(Match::must(1, 0..3)), ac.try_find(input)?);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn start_kind(&mut self, kind: StartKind) -> &mut AhoCorasickBuilder {
+        self.dfa.start_kind(kind);
+        self.start_kind = kind;
+        self
+    }
+
+    /// Enable ASCII-aware case insensitive matching.
+    ///
+    /// When this option is enabled, searching will be performed without
+    /// respect to case for ASCII letters (`a-z` and `A-Z`) only.
+    ///
+    /// Enabling this option does not change the search algorithm, but it may
+    /// increase the size of the automaton.
+    ///
+    /// **NOTE:** It is unlikely that support for Unicode case folding will
+    /// be added in the future. The ASCII case works via a simple hack to the
+    /// underlying automaton, but full Unicode handling requires a fair bit of
+    /// sophistication. If you do need Unicode handling, you might consider
+    /// using the [`regex` crate](https://docs.rs/regex) or the lower level
+    /// [`regex-automata` crate](https://docs.rs/regex-automata).
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::AhoCorasick;
+    ///
+    /// let patterns = &["FOO", "bAr", "BaZ"];
+    /// let haystack = "foo bar baz";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .ascii_case_insensitive(true)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// assert_eq!(3, ac.find_iter(haystack).count());
+    /// ```
+    pub fn ascii_case_insensitive(
+        &mut self,
+        yes: bool,
+    ) -> &mut AhoCorasickBuilder {
+        self.nfa_noncontiguous.ascii_case_insensitive(yes);
+        self.nfa_contiguous.ascii_case_insensitive(yes);
+        self.dfa.ascii_case_insensitive(yes);
+        self
+    }
+
+    /// Choose the type of underlying automaton to use.
+    ///
+    /// Currently, there are four choices:
+    ///
+    /// * [`AhoCorasickKind::NoncontiguousNFA`] instructs the searcher to
+    /// use a [`noncontiguous::NFA`]. A noncontiguous NFA is the fastest to
+    /// be built, has moderate memory usage and is typically the slowest to
+    /// execute a search.
+    /// * [`AhoCorasickKind::ContiguousNFA`] instructs the searcher to use a
+    /// [`contiguous::NFA`]. A contiguous NFA is a little slower to build than
+    /// a noncontiguous NFA, has excellent memory usage and is typically a
+    /// little slower than a DFA for a search.
+    /// * [`AhoCorasickKind::DFA`] instructs the searcher to use a
+    /// [`dfa::DFA`]. A DFA is very slow to build, uses exorbitant amounts of
+    /// memory, but will typically execute searches the fastest.
+    /// * `None` (the default) instructs the searcher to choose the "best"
+    /// Aho-Corasick implementation. This choice is typically based primarily
+    /// on the number of patterns.
+    ///
+    /// Setting this configuration does not change the time complexity for
+    /// constructing the Aho-Corasick automaton (which is `O(p)` where `p`
+    /// is the total number of patterns being compiled). Setting this to
+    /// [`AhoCorasickKind::DFA`] does however reduce the time complexity of
+    /// non-overlapping searches from `O(n + p)` to `O(n)`, where `n` is the
+    /// length of the haystack.
+    ///
+    /// In general, you should probably stick to the default unless you have
+    /// some kind of reason to use a specific Aho-Corasick implementation. For
+    /// example, you might choose `AhoCorasickKind::DFA` if you don't care
+    /// about memory usage and want the fastest possible search times.
+    ///
+    /// Setting this guarantees that the searcher returned uses the chosen
+    /// implementation. If that implementation could not be constructed, then
+    /// an error will be returned. In contrast, when `None` is used, it is
+    /// possible for it to attempt to construct, for example, a contiguous
+    /// NFA and have it fail. In which case, it will fall back to using a
+    /// noncontiguous NFA.
+    ///
+    /// If `None` is given, then one may use [`AhoCorasick::kind`] to determine
+    /// which Aho-Corasick implementation was chosen.
+    ///
+    /// Note that the heuristics used for choosing which `AhoCorasickKind`
+    /// may be changed in a semver compatible release.
+    pub fn kind(
+        &mut self,
+        kind: Option<AhoCorasickKind>,
+    ) -> &mut AhoCorasickBuilder {
+        self.kind = kind;
+        self
+    }
+
+    /// Enable heuristic prefilter optimizations.
+    ///
+    /// When enabled, searching will attempt to quickly skip to match
+    /// candidates using specialized literal search routines. A prefilter
+    /// cannot always be used, and is generally treated as a heuristic. It
+    /// can be useful to disable this if the prefilter is observed to be
+    /// sub-optimal for a particular workload.
+    ///
+    /// Currently, prefilters are typically only active when building searchers
+    /// with a small (less than 100) number of patterns.
+    ///
+    /// This is enabled by default.
+    pub fn prefilter(&mut self, yes: bool) -> &mut AhoCorasickBuilder {
+        self.nfa_noncontiguous.prefilter(yes);
+        self.nfa_contiguous.prefilter(yes);
+        self.dfa.prefilter(yes);
+        self
+    }
+
+    /// Set the limit on how many states use a dense representation for their
+    /// transitions. Other states will generally use a sparse representation.
+    ///
+    /// A dense representation uses more memory but is generally faster, since
+    /// the next transition in a dense representation can be computed in a
+    /// constant number of instructions. A sparse representation uses less
+    /// memory but is generally slower, since the next transition in a sparse
+    /// representation requires executing a variable number of instructions.
+    ///
+    /// This setting is only used when an Aho-Corasick implementation is used
+    /// that supports the dense versus sparse representation trade off. Not all
+    /// do.
+    ///
+    /// This limit is expressed in terms of the depth of a state, i.e., the
+    /// number of transitions from the starting state of the automaton. The
+    /// idea is that most of the time searching will be spent near the starting
+    /// state of the automaton, so states near the start state should use a
+    /// dense representation. States further away from the start state would
+    /// then use a sparse representation.
+    ///
+    /// By default, this is set to a low but non-zero number. Setting this to
+    /// `0` is almost never what you want, since it is likely to make searches
+    /// very slow due to the start state itself being forced to use a sparse
+    /// representation. However, it is unlikely that increasing this number
+    /// will help things much, since the most active states have a small depth.
+    /// More to the point, the memory usage increases superlinearly as this
+    /// number increases.
+    pub fn dense_depth(&mut self, depth: usize) -> &mut AhoCorasickBuilder {
+        self.nfa_noncontiguous.dense_depth(depth);
+        self.nfa_contiguous.dense_depth(depth);
+        self
+    }
+
+    /// A debug settting for whether to attempt to shrink the size of the
+    /// automaton's alphabet or not.
+    ///
+    /// This option is enabled by default and should never be disabled unless
+    /// one is debugging the underlying automaton.
+    ///
+    /// When enabled, some (but not all) Aho-Corasick automatons will use a map
+    /// from all possible bytes to their corresponding equivalence class. Each
+    /// equivalence class represents a set of bytes that does not discriminate
+    /// between a match and a non-match in the automaton.
+    ///
+    /// The advantage of this map is that the size of the transition table can
+    /// be reduced drastically from `#states * 256 * sizeof(u32)` to
+    /// `#states * k * sizeof(u32)` where `k` is the number of equivalence
+    /// classes (rounded up to the nearest power of 2). As a result, total
+    /// space usage can decrease substantially. Moreover, since a smaller
+    /// alphabet is used, automaton compilation becomes faster as well.
+    ///
+    /// **WARNING:** This is only useful for debugging automatons. Disabling
+    /// this does not yield any speed advantages. Namely, even when this is
+    /// disabled, a byte class map is still used while searching. The only
+    /// difference is that every byte will be forced into its own distinct
+    /// equivalence class. This is useful for debugging the actual generated
+    /// transitions because it lets one see the transitions defined on actual
+    /// bytes instead of the equivalence classes.
+    pub fn byte_classes(&mut self, yes: bool) -> &mut AhoCorasickBuilder {
+        self.nfa_contiguous.byte_classes(yes);
+        self.dfa.byte_classes(yes);
+        self
+    }
+}
+
+/// The type of Aho-Corasick implementation to use in an [`AhoCorasick`]
+/// searcher.
+///
+/// This is principally used as an input to the
+/// [`AhoCorasickBuilder::start_kind`] method. Its documentation goes into more
+/// detail about each choice.
+#[non_exhaustive]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum AhoCorasickKind {
+    /// Use a noncontiguous NFA.
+    NoncontiguousNFA,
+    /// Use a contiguous NFA.
+    ContiguousNFA,
+    /// Use a DFA. Warning: DFAs typically use a large amount of memory.
+    DFA,
+}
+
+/// A trait that effectively gives us practical dynamic dispatch over anything
+/// that impls `Automaton`, but without needing to add a bunch of bounds to
+/// the core `Automaton` trait. Basically, we provide all of the marker traits
+/// that our automatons have, in addition to `Debug` impls and requiring that
+/// there is no borrowed data. Without these, the main `AhoCorasick` type would
+/// not be able to meaningfully impl `Debug` or the marker traits without also
+/// requiring that all impls of `Automaton` do so, which would be not great.
+trait AcAutomaton:
+    Automaton + Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static
+{
+}
+
+impl<A> AcAutomaton for A where
+    A: Automaton + Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static
+{
+}
+
+impl crate::automaton::private::Sealed for Arc<dyn AcAutomaton> {}
+
+// I'm not sure why this trait impl shows up in the docs, as the AcAutomaton
+// trait is not exported. So we forcefully hide it.
+//
+// SAFETY: This just defers to the underlying 'AcAutomaton' and thus inherits
+// its safety properties.
+#[doc(hidden)]
+unsafe impl Automaton for Arc<dyn AcAutomaton> {
+    #[inline(always)]
+    fn start_state(&self, anchored: Anchored) -> Result<StateID, MatchError> {
+        (**self).start_state(anchored)
+    }
+
+    #[inline(always)]
+    fn next_state(
+        &self,
+        anchored: Anchored,
+        sid: StateID,
+        byte: u8,
+    ) -> StateID {
+        (**self).next_state(anchored, sid, byte)
+    }
+
+    #[inline(always)]
+    fn is_special(&self, sid: StateID) -> bool {
+        (**self).is_special(sid)
+    }
+
+    #[inline(always)]
+    fn is_dead(&self, sid: StateID) -> bool {
+        (**self).is_dead(sid)
+    }
+
+    #[inline(always)]
+    fn is_match(&self, sid: StateID) -> bool {
+        (**self).is_match(sid)
+    }
+
+    #[inline(always)]
+    fn is_start(&self, sid: StateID) -> bool {
+        (**self).is_start(sid)
+    }
+
+    #[inline(always)]
+    fn match_kind(&self) -> MatchKind {
+        (**self).match_kind()
+    }
+
+    #[inline(always)]
+    fn match_len(&self, sid: StateID) -> usize {
+        (**self).match_len(sid)
+    }
+
+    #[inline(always)]
+    fn match_pattern(&self, sid: StateID, index: usize) -> PatternID {
+        (**self).match_pattern(sid, index)
+    }
+
+    #[inline(always)]
+    fn patterns_len(&self) -> usize {
+        (**self).patterns_len()
+    }
+
+    #[inline(always)]
+    fn pattern_len(&self, pid: PatternID) -> usize {
+        (**self).pattern_len(pid)
+    }
+
+    #[inline(always)]
+    fn min_pattern_len(&self) -> usize {
+        (**self).min_pattern_len()
+    }
+
+    #[inline(always)]
+    fn max_pattern_len(&self) -> usize {
+        (**self).max_pattern_len()
+    }
+
+    #[inline(always)]
+    fn memory_usage(&self) -> usize {
+        (**self).memory_usage()
+    }
+
+    #[inline(always)]
+    fn prefilter(&self) -> Option<&Prefilter> {
+        (**self).prefilter()
+    }
+
+    // Even though 'try_find' and 'try_find_overlapping' each have their
+    // own default impls, we explicitly define them here to fix a perf bug.
+    // Without these explicit definitions, the default impl will wind up using
+    // dynamic dispatch for all 'Automaton' method calls, including things like
+    // 'next_state' that absolutely must get inlined or else perf is trashed.
+    // Defining them explicitly here like this still requires dynamic dispatch
+    // to call 'try_find' itself, but all uses of 'Automaton' within 'try_find'
+    // are monomorphized.
+    //
+    // We don't need to explicitly impl any other methods, I think, because
+    // they are all implemented themselves in terms of 'try_find' and
+    // 'try_find_overlapping'. We still might wind up with an extra virtual
+    // call here or there, but that's okay since it's outside of any perf
+    // critical areas.
+
+    #[inline(always)]
+    fn try_find(
+        &self,
+        input: &Input<'_>,
+    ) -> Result<Option<Match>, MatchError> {
+        (**self).try_find(input)
+    }
+
+    #[inline(always)]
+    fn try_find_overlapping(
+        &self,
+        input: &Input<'_>,
+        state: &mut OverlappingState,
+    ) -> Result<(), MatchError> {
+        (**self).try_find_overlapping(input, state)
+    }
+}
+
+/// Returns an error if the start state configuration does not support the
+/// desired search configuration. See the internal 'AhoCorasick::start_kind'
+/// field docs for more details.
+fn enforce_anchored_consistency(
+    have: StartKind,
+    want: Anchored,
+) -> Result<(), MatchError> {
+    match have {
+        StartKind::Both => Ok(()),
+        StartKind::Unanchored if !want.is_anchored() => Ok(()),
+        StartKind::Unanchored => Err(MatchError::invalid_input_anchored()),
+        StartKind::Anchored if want.is_anchored() => Ok(()),
+        StartKind::Anchored => Err(MatchError::invalid_input_unanchored()),
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/automaton.rs b/third_party/rust/aho-corasick/src/automaton.rs
new file mode 100644
index 0000000000..c41dc6e1db
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/automaton.rs
@@ -0,0 +1,1608 @@
+/*!
+Provides [`Automaton`] trait for abstracting over Aho-Corasick automata.
+
+The `Automaton` trait provides a way to write generic code over any
+Aho-Corasick automaton. It also provides access to lower level APIs that
+permit walking the state transitions of an Aho-Corasick automaton manually.
+*/
+
+use alloc::{string::String, vec::Vec};
+
+use crate::util::{
+    error::MatchError,
+    primitives::PatternID,
+    search::{Anchored, Input, Match, MatchKind, Span},
+};
+
+pub use crate::util::{
+    prefilter::{Candidate, Prefilter},
+    primitives::{StateID, StateIDError},
+};
+
+/// We seal the `Automaton` trait for now. It's a big trait, and it's
+/// conceivable that I might want to add new required methods, and sealing the
+/// trait permits doing that in a backwards compatible fashion. On other the
+/// hand, if you have a solid use case for implementing the trait yourself,
+/// please file an issue and we can discuss it. This was *mostly* done as a
+/// conservative step.
+pub(crate) mod private {
+    pub trait Sealed {}
+}
+impl private::Sealed for crate::nfa::noncontiguous::NFA {}
+impl private::Sealed for crate::nfa::contiguous::NFA {}
+impl private::Sealed for crate::dfa::DFA {}
+
+impl<'a, T: private::Sealed + ?Sized> private::Sealed for &'a T {}
+
+/// A trait that abstracts over Aho-Corasick automata.
+///
+/// This trait primarily exists for niche use cases such as:
+///
+/// * Using an NFA or DFA directly, bypassing the top-level
+/// [`AhoCorasick`](crate::AhoCorasick) searcher. Currently, these include
+/// [`noncontiguous::NFA`](crate::nfa::noncontiguous::NFA),
+/// [`contiguous::NFA`](crate::nfa::contiguous::NFA) and
+/// [`dfa::DFA`](crate::dfa::DFA).
+/// * Implementing your own custom search routine by walking the automaton
+/// yourself. This might be useful for implementing search on non-contiguous
+/// strings or streams.
+///
+/// For most use cases, it is not expected that users will need
+/// to use or even know about this trait. Indeed, the top level
+/// [`AhoCorasick`](crate::AhoCorasick) searcher does not expose any details
+/// about this trait, nor does it implement it itself.
+///
+/// Note that this trait defines a number of default methods, such as
+/// [`Automaton::try_find`] and [`Automaton::try_find_iter`], which implement
+/// higher level search routines in terms of the lower level automata API.
+///
+/// # Sealed
+///
+/// Currently, this trait is sealed. That means users of this crate can write
+/// generic routines over this trait but cannot implement it themselves. This
+/// restriction may be lifted in the future, but sealing the trait permits
+/// adding new required methods in a backwards compatible fashion.
+///
+/// # Special states
+///
+/// This trait encodes a notion of "special" states in an automaton. Namely,
+/// a state is treated as special if it is a dead, match or start state:
+///
+/// * A dead state is a state that cannot be left once entered. All transitions
+/// on a dead state lead back to itself. The dead state is meant to be treated
+/// as a sentinel indicating that the search should stop and return a match if
+/// one has been found, and nothing otherwise.
+/// * A match state is a state that indicates one or more patterns have
+/// matched. Depending on the [`MatchKind`] of the automaton, a search may
+/// stop once a match is seen, or it may continue looking for matches until
+/// it enters a dead state or sees the end of the haystack.
+/// * A start state is a state that a search begins in. It is useful to know
+/// when a search enters a start state because it may mean that a prefilter can
+/// be used to skip ahead and quickly look for candidate matches. Unlike dead
+/// and match states, it is never necessary to explicitly handle start states
+/// for correctness. Indeed, in this crate, implementations of `Automaton`
+/// will only treat start states as "special" when a prefilter is enabled and
+/// active. Otherwise, treating it as special has no purpose and winds up
+/// slowing down the overall search because it results in ping-ponging between
+/// the main state transition and the "special" state logic.
+///
+/// Since checking whether a state is special by doing three different
+/// checks would be too expensive inside a fast search loop, the
+/// [`Automaton::is_special`] method is provided for quickly checking whether
+/// the state is special. The `Automaton::is_dead`, `Automaton::is_match` and
+/// `Automaton::is_start` predicates can then be used to determine which kind
+/// of special state it is.
+///
+/// # Panics
+///
+/// Most of the APIs on this trait should panic or give incorrect results
+/// if invalid inputs are given to it. For example, `Automaton::next_state`
+/// has unspecified behavior if the state ID given to it is not a valid
+/// state ID for the underlying automaton. Valid state IDs can only be
+/// retrieved in one of two ways: calling `Automaton::start_state` or calling
+/// `Automaton::next_state` with a valid state ID.
+///
+/// # Safety
+///
+/// This trait is not safe to implement so that code may rely on the
+/// correctness of implementations of this trait to avoid undefined behavior.
+/// The primary correctness guarantees are:
+///
+/// * `Automaton::start_state` always returns a valid state ID or an error or
+/// panics.
+/// * `Automaton::next_state`, when given a valid state ID, always returns
+/// a valid state ID for all values of `anchored` and `byte`, or otherwise
+/// panics.
+///
+/// In general, the rest of the methods on `Automaton` need to uphold their
+/// contracts as well. For example, `Automaton::is_dead` should only returns
+/// true if the given state ID is actually a dead state.
+///
+/// Note that currently this crate does not rely on the safety property defined
+/// here to avoid undefined behavior. Instead, this was done to make it
+/// _possible_ to do in the future.
+///
+/// # Example
+///
+/// This example shows how one might implement a basic but correct search
+/// routine. We keep things simple by not using prefilters or worrying about
+/// anchored searches, but do make sure our search is correct for all possible
+/// [`MatchKind`] semantics. (The comments in the code below note the parts
+/// that are needed to support certain `MatchKind` semantics.)
+///
+/// ```
+/// use aho_corasick::{
+///     automaton::Automaton,
+///     nfa::noncontiguous::NFA,
+///     Anchored, Match, MatchError, MatchKind,
+/// };
+///
+/// // Run an unanchored search for 'aut' in 'haystack'. Return the first match
+/// // seen according to the automaton's match semantics. This returns an error
+/// // if the given automaton does not support unanchored searches.
+/// fn find<A: Automaton>(
+///     aut: A,
+///     haystack: &[u8],
+/// ) -> Result<Option<Match>, MatchError> {
+///     let mut sid = aut.start_state(Anchored::No)?;
+///     let mut at = 0;
+///     let mut mat = None;
+///     let get_match = |sid, at| {
+///         let pid = aut.match_pattern(sid, 0);
+///         let len = aut.pattern_len(pid);
+///         Match::new(pid, (at - len)..at)
+///     };
+///     // Start states can be match states!
+///     if aut.is_match(sid) {
+///         mat = Some(get_match(sid, at));
+///         // Standard semantics require matches to be reported as soon as
+///         // they're seen. Otherwise, we continue until we see a dead state
+///         // or the end of the haystack.
+///         if matches!(aut.match_kind(), MatchKind::Standard) {
+///             return Ok(mat);
+///         }
+///     }
+///     while at < haystack.len() {
+///         sid = aut.next_state(Anchored::No, sid, haystack[at]);
+///         if aut.is_special(sid) {
+///             if aut.is_dead(sid) {
+///                 return Ok(mat);
+///             } else if aut.is_match(sid) {
+///                 mat = Some(get_match(sid, at + 1));
+///                 // As above, standard semantics require that we return
+///                 // immediately once a match is found.
+///                 if matches!(aut.match_kind(), MatchKind::Standard) {
+///                     return Ok(mat);
+///                 }
+///             }
+///         }
+///         at += 1;
+///     }
+///     Ok(mat)
+/// }
+///
+/// // Show that it works for standard searches.
+/// let nfa = NFA::new(&["samwise", "sam"]).unwrap();
+/// assert_eq!(Some(Match::must(1, 0..3)), find(&nfa, b"samwise")?);
+///
+/// // But also works when using leftmost-first. Notice how the match result
+/// // has changed!
+/// let nfa = NFA::builder()
+///     .match_kind(MatchKind::LeftmostFirst)
+///     .build(&["samwise", "sam"])
+///     .unwrap();
+/// assert_eq!(Some(Match::must(0, 0..7)), find(&nfa, b"samwise")?);
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+pub unsafe trait Automaton: private::Sealed {
+    /// Returns the starting state for the given anchor mode.
+    ///
+    /// Upon success, the state ID returned is guaranteed to be valid for
+    /// this automaton.
+    ///
+    /// # Errors
+    ///
+    /// This returns an error when the given search configuration is not
+    /// supported by the underlying automaton. For example, if the underlying
+    /// automaton only supports unanchored searches but the given configuration
+    /// was set to an anchored search, then this must return an error.
+    fn start_state(&self, anchored: Anchored) -> Result<StateID, MatchError>;
+
+    /// Performs a state transition from `sid` for `byte` and returns the next
+    /// state.
+    ///
+    /// `anchored` should be [`Anchored::Yes`] when executing an anchored
+    /// search and [`Anchored::No`] otherwise. For some implementations of
+    /// `Automaton`, it is required to know whether the search is anchored
+    /// or not in order to avoid following failure transitions. Other
+    /// implementations may ignore `anchored` altogether and depend on
+    /// `Automaton::start_state` returning a state that walks a different path
+    /// through the automaton depending on whether the search is anchored or
+    /// not.
+    ///
+    /// # Panics
+    ///
+    /// This routine may panic or return incorrect results when the given state
+    /// ID is invalid. A state ID is valid if and only if:
+    ///
+    /// 1. It came from a call to `Automaton::start_state`, or
+    /// 2. It came from a previous call to `Automaton::next_state` with a
+    /// valid state ID.
+    ///
+    /// Implementations must treat all possible values of `byte` as valid.
+    ///
+    /// Implementations may panic on unsupported values of `anchored`, but are
+    /// not required to do so.
+    fn next_state(
+        &self,
+        anchored: Anchored,
+        sid: StateID,
+        byte: u8,
+    ) -> StateID;
+
+    /// Returns true if the given ID represents a "special" state. A special
+    /// state is a dead, match or start state.
+    ///
+    /// Note that implementations may choose to return false when the given ID
+    /// corresponds to a start state. Namely, it always correct to treat start
+    /// states as non-special. Implementations must return true for states that
+    /// are dead or contain matches.
+    ///
+    /// This has unspecified behavior when given an invalid state ID.
+    fn is_special(&self, sid: StateID) -> bool;
+
+    /// Returns true if the given ID represents a dead state.
+    ///
+    /// A dead state is a type of "sink" in a finite state machine. It
+    /// corresponds to a state whose transitions all loop back to itself. That
+    /// is, once entered, it can never be left. In practice, it serves as a
+    /// sentinel indicating that the search should terminate.
+    ///
+    /// This has unspecified behavior when given an invalid state ID.
+    fn is_dead(&self, sid: StateID) -> bool;
+
+    /// Returns true if the given ID represents a match state.
+    ///
+    /// A match state is always associated with one or more pattern IDs that
+    /// matched at the position in the haystack when the match state was
+    /// entered. When a match state is entered, the match semantics dictate
+    /// whether it should be returned immediately (for `MatchKind::Standard`)
+    /// or if the search should continue (for `MatchKind::LeftmostFirst` and
+    /// `MatchKind::LeftmostLongest`) until a dead state is seen or the end of
+    /// the haystack has been reached.
+    ///
+    /// This has unspecified behavior when given an invalid state ID.
+    fn is_match(&self, sid: StateID) -> bool;
+
+    /// Returns true if the given ID represents a start state.
+    ///
+    /// While it is never incorrect to ignore start states during a search
+    /// (except for the start of the search of course), knowing whether one has
+    /// entered a start state can be useful for certain classes of performance
+    /// optimizations. For example, if one is in a start state, it may be legal
+    /// to try to skip ahead and look for match candidates more quickly than
+    /// would otherwise be accomplished by walking the automaton.
+    ///
+    /// Implementations of `Automaton` in this crate "unspecialize" start
+    /// states when a prefilter is not active or enabled. In this case, it
+    /// is possible for `Automaton::is_special(sid)` to return false while
+    /// `Automaton::is_start(sid)` returns true.
+    ///
+    /// This has unspecified behavior when given an invalid state ID.
+    fn is_start(&self, sid: StateID) -> bool;
+
+    /// Returns the match semantics that this automaton was built with.
+    fn match_kind(&self) -> MatchKind;
+
+    /// Returns the total number of matches for the given state ID.
+    ///
+    /// This has unspecified behavior if the given ID does not refer to a match
+    /// state.
+    fn match_len(&self, sid: StateID) -> usize;
+
+    /// Returns the pattern ID for the match state given by `sid` at the
+    /// `index` given.
+    ///
+    /// Typically, `index` is only ever greater than `0` when implementing an
+    /// overlapping search. Otherwise, it's likely that your search only cares
+    /// about reporting the first pattern ID in a match state.
+    ///
+    /// This has unspecified behavior if the given ID does not refer to a match
+    /// state, or if the index is greater than or equal to the total number of
+    /// matches in this match state.
+    fn match_pattern(&self, sid: StateID, index: usize) -> PatternID;
+
+    /// Returns the total number of patterns compiled into this automaton.
+    fn patterns_len(&self) -> usize;
+
+    /// Returns the length of the pattern for the given ID.
+    ///
+    /// This has unspecified behavior when given an invalid pattern
+    /// ID. A pattern ID is valid if and only if it is less than
+    /// `Automaton::patterns_len`.
+    fn pattern_len(&self, pid: PatternID) -> usize;
+
+    /// Returns the length, in bytes, of the shortest pattern in this
+    /// automaton.
+    fn min_pattern_len(&self) -> usize;
+
+    /// Returns the length, in bytes, of the longest pattern in this automaton.
+    fn max_pattern_len(&self) -> usize;
+
+    /// Returns the heap memory usage, in bytes, used by this automaton.
+    fn memory_usage(&self) -> usize;
+
+    /// Returns a prefilter, if available, that can be used to accelerate
+    /// searches for this automaton.
+    ///
+    /// The typical way this is used is when the start state is entered during
+    /// a search. When that happens, one can use a prefilter to skip ahead and
+    /// look for candidate matches without having to walk the automaton on the
+    /// bytes between candidates.
+    ///
+    /// Typically a prefilter is only available when there are a small (<100)
+    /// number of patterns built into the automaton.
+    fn prefilter(&self) -> Option<&Prefilter>;
+
+    /// Executes a non-overlapping search with this automaton using the given
+    /// configuration.
+    ///
+    /// See
+    /// [`AhoCorasick::try_find`](crate::AhoCorasick::try_find)
+    /// for more documentation and examples.
+    fn try_find(
+        &self,
+        input: &Input<'_>,
+    ) -> Result<Option<Match>, MatchError> {
+        try_find_fwd(&self, input)
+    }
+
+    /// Executes a overlapping search with this automaton using the given
+    /// configuration.
+    ///
+    /// See
+    /// [`AhoCorasick::try_find_overlapping`](crate::AhoCorasick::try_find_overlapping)
+    /// for more documentation and examples.
+    fn try_find_overlapping(
+        &self,
+        input: &Input<'_>,
+        state: &mut OverlappingState,
+    ) -> Result<(), MatchError> {
+        try_find_overlapping_fwd(&self, input, state)
+    }
+
+    /// Returns an iterator of non-overlapping matches with this automaton
+    /// using the given configuration.
+    ///
+    /// See
+    /// [`AhoCorasick::try_find_iter`](crate::AhoCorasick::try_find_iter)
+    /// for more documentation and examples.
+    fn try_find_iter<'a, 'h>(
+        &'a self,
+        input: Input<'h>,
+    ) -> Result<FindIter<'a, 'h, Self>, MatchError>
+    where
+        Self: Sized,
+    {
+        FindIter::new(self, input)
+    }
+
+    /// Returns an iterator of overlapping matches with this automaton
+    /// using the given configuration.
+    ///
+    /// See
+    /// [`AhoCorasick::try_find_overlapping_iter`](crate::AhoCorasick::try_find_overlapping_iter)
+    /// for more documentation and examples.
+    fn try_find_overlapping_iter<'a, 'h>(
+        &'a self,
+        input: Input<'h>,
+    ) -> Result<FindOverlappingIter<'a, 'h, Self>, MatchError>
+    where
+        Self: Sized,
+    {
+        if !self.match_kind().is_standard() {
+            return Err(MatchError::unsupported_overlapping(
+                self.match_kind(),
+            ));
+        }
+        //  We might consider lifting this restriction. The reason why I added
+        // it was to ban the combination of "anchored search" and "overlapping
+        // iteration." The match semantics aren't totally clear in that case.
+        // Should we allow *any* matches that are adjacent to *any* previous
+        // match? Or only following the most recent one? Or only matches
+        // that start at the beginning of the search? We might also elect to
+        // just keep this restriction in place, as callers should be able to
+        // implement it themselves if they want to.
+        if input.get_anchored().is_anchored() {
+            return Err(MatchError::invalid_input_anchored());
+        }
+        let _ = self.start_state(input.get_anchored())?;
+        let state = OverlappingState::start();
+        Ok(FindOverlappingIter { aut: self, input, state })
+    }
+
+    /// Replaces all non-overlapping matches in `haystack` with
+    /// strings from `replace_with` depending on the pattern that
+    /// matched. The `replace_with` slice must have length equal to
+    /// `Automaton::patterns_len`.
+    ///
+    /// See
+    /// [`AhoCorasick::try_replace_all`](crate::AhoCorasick::try_replace_all)
+    /// for more documentation and examples.
+    fn try_replace_all<B>(
+        &self,
+        haystack: &str,
+        replace_with: &[B],
+    ) -> Result<String, MatchError>
+    where
+        Self: Sized,
+        B: AsRef<str>,
+    {
+        assert_eq!(
+            replace_with.len(),
+            self.patterns_len(),
+            "replace_all requires a replacement for every pattern \
+             in the automaton"
+        );
+        let mut dst = String::with_capacity(haystack.len());
+        self.try_replace_all_with(haystack, &mut dst, |mat, _, dst| {
+            dst.push_str(replace_with[mat.pattern()].as_ref());
+            true
+        })?;
+        Ok(dst)
+    }
+
+    /// Replaces all non-overlapping matches in `haystack` with
+    /// strings from `replace_with` depending on the pattern that
+    /// matched. The `replace_with` slice must have length equal to
+    /// `Automaton::patterns_len`.
+    ///
+    /// See
+    /// [`AhoCorasick::try_replace_all_bytes`](crate::AhoCorasick::try_replace_all_bytes)
+    /// for more documentation and examples.
+    fn try_replace_all_bytes<B>(
+        &self,
+        haystack: &[u8],
+        replace_with: &[B],
+    ) -> Result<Vec<u8>, MatchError>
+    where
+        Self: Sized,
+        B: AsRef<[u8]>,
+    {
+        assert_eq!(
+            replace_with.len(),
+            self.patterns_len(),
+            "replace_all requires a replacement for every pattern \
+             in the automaton"
+        );
+        let mut dst = Vec::with_capacity(haystack.len());
+        self.try_replace_all_with_bytes(haystack, &mut dst, |mat, _, dst| {
+            dst.extend(replace_with[mat.pattern()].as_ref());
+            true
+        })?;
+        Ok(dst)
+    }
+
+    /// Replaces all non-overlapping matches in `haystack` by calling the
+    /// `replace_with` closure given.
+    ///
+    /// See
+    /// [`AhoCorasick::try_replace_all_with`](crate::AhoCorasick::try_replace_all_with)
+    /// for more documentation and examples.
+    fn try_replace_all_with<F>(
+        &self,
+        haystack: &str,
+        dst: &mut String,
+        mut replace_with: F,
+    ) -> Result<(), MatchError>
+    where
+        Self: Sized,
+        F: FnMut(&Match, &str, &mut String) -> bool,
+    {
+        let mut last_match = 0;
+        for m in self.try_find_iter(Input::new(haystack))? {
+            // Since there are no restrictions on what kinds of patterns are
+            // in an Aho-Corasick automaton, we might get matches that split
+            // a codepoint, or even matches of a partial codepoint. When that
+            // happens, we just skip the match.
+            if !haystack.is_char_boundary(m.start())
+                || !haystack.is_char_boundary(m.end())
+            {
+                continue;
+            }
+            dst.push_str(&haystack[last_match..m.start()]);
+            last_match = m.end();
+            if !replace_with(&m, &haystack[m.start()..m.end()], dst) {
+                break;
+            };
+        }
+        dst.push_str(&haystack[last_match..]);
+        Ok(())
+    }
+
+    /// Replaces all non-overlapping matches in `haystack` by calling the
+    /// `replace_with` closure given.
+    ///
+    /// See
+    /// [`AhoCorasick::try_replace_all_with_bytes`](crate::AhoCorasick::try_replace_all_with_bytes)
+    /// for more documentation and examples.
+    fn try_replace_all_with_bytes<F>(
+        &self,
+        haystack: &[u8],
+        dst: &mut Vec<u8>,
+        mut replace_with: F,
+    ) -> Result<(), MatchError>
+    where
+        Self: Sized,
+        F: FnMut(&Match, &[u8], &mut Vec<u8>) -> bool,
+    {
+        let mut last_match = 0;
+        for m in self.try_find_iter(Input::new(haystack))? {
+            dst.extend(&haystack[last_match..m.start()]);
+            last_match = m.end();
+            if !replace_with(&m, &haystack[m.start()..m.end()], dst) {
+                break;
+            };
+        }
+        dst.extend(&haystack[last_match..]);
+        Ok(())
+    }
+
+    /// Returns an iterator of non-overlapping matches with this automaton
+    /// from the stream given.
+    ///
+    /// See
+    /// [`AhoCorasick::try_stream_find_iter`](crate::AhoCorasick::try_stream_find_iter)
+    /// for more documentation and examples.
+    #[cfg(feature = "std")]
+    fn try_stream_find_iter<'a, R: std::io::Read>(
+        &'a self,
+        rdr: R,
+    ) -> Result<StreamFindIter<'a, Self, R>, MatchError>
+    where
+        Self: Sized,
+    {
+        Ok(StreamFindIter { it: StreamChunkIter::new(self, rdr)? })
+    }
+
+    /// Replaces all non-overlapping matches in `rdr` with strings from
+    /// `replace_with` depending on the pattern that matched, and writes the
+    /// result to `wtr`. The `replace_with` slice must have length equal to
+    /// `Automaton::patterns_len`.
+    ///
+    /// See
+    /// [`AhoCorasick::try_stream_replace_all`](crate::AhoCorasick::try_stream_replace_all)
+    /// for more documentation and examples.
+    #[cfg(feature = "std")]
+    fn try_stream_replace_all<R, W, B>(
+        &self,
+        rdr: R,
+        wtr: W,
+        replace_with: &[B],
+    ) -> std::io::Result<()>
+    where
+        Self: Sized,
+        R: std::io::Read,
+        W: std::io::Write,
+        B: AsRef<[u8]>,
+    {
+        assert_eq!(
+            replace_with.len(),
+            self.patterns_len(),
+            "streaming replace_all requires a replacement for every pattern \
+             in the automaton",
+        );
+        self.try_stream_replace_all_with(rdr, wtr, |mat, _, wtr| {
+            wtr.write_all(replace_with[mat.pattern()].as_ref())
+        })
+    }
+
+    /// Replaces all non-overlapping matches in `rdr` by calling the
+    /// `replace_with` closure given and writing the result to `wtr`.
+    ///
+    /// See
+    /// [`AhoCorasick::try_stream_replace_all_with`](crate::AhoCorasick::try_stream_replace_all_with)
+    /// for more documentation and examples.
+    #[cfg(feature = "std")]
+    fn try_stream_replace_all_with<R, W, F>(
+        &self,
+        rdr: R,
+        mut wtr: W,
+        mut replace_with: F,
+    ) -> std::io::Result<()>
+    where
+        Self: Sized,
+        R: std::io::Read,
+        W: std::io::Write,
+        F: FnMut(&Match, &[u8], &mut W) -> std::io::Result<()>,
+    {
+        let mut it = StreamChunkIter::new(self, rdr).map_err(|e| {
+            let kind = std::io::ErrorKind::Other;
+            std::io::Error::new(kind, e)
+        })?;
+        while let Some(result) = it.next() {
+            let chunk = result?;
+            match chunk {
+                StreamChunk::NonMatch { bytes, .. } => {
+                    wtr.write_all(bytes)?;
+                }
+                StreamChunk::Match { bytes, mat } => {
+                    replace_with(&mat, bytes, &mut wtr)?;
+                }
+            }
+        }
+        Ok(())
+    }
+}
+
+// SAFETY: This just defers to the underlying 'AcAutomaton' and thus inherits
+// its safety properties.
+unsafe impl<'a, A: Automaton + ?Sized> Automaton for &'a A {
+    #[inline(always)]
+    fn start_state(&self, anchored: Anchored) -> Result<StateID, MatchError> {
+        (**self).start_state(anchored)
+    }
+
+    #[inline(always)]
+    fn next_state(
+        &self,
+        anchored: Anchored,
+        sid: StateID,
+        byte: u8,
+    ) -> StateID {
+        (**self).next_state(anchored, sid, byte)
+    }
+
+    #[inline(always)]
+    fn is_special(&self, sid: StateID) -> bool {
+        (**self).is_special(sid)
+    }
+
+    #[inline(always)]
+    fn is_dead(&self, sid: StateID) -> bool {
+        (**self).is_dead(sid)
+    }
+
+    #[inline(always)]
+    fn is_match(&self, sid: StateID) -> bool {
+        (**self).is_match(sid)
+    }
+
+    #[inline(always)]
+    fn is_start(&self, sid: StateID) -> bool {
+        (**self).is_start(sid)
+    }
+
+    #[inline(always)]
+    fn match_kind(&self) -> MatchKind {
+        (**self).match_kind()
+    }
+
+    #[inline(always)]
+    fn match_len(&self, sid: StateID) -> usize {
+        (**self).match_len(sid)
+    }
+
+    #[inline(always)]
+    fn match_pattern(&self, sid: StateID, index: usize) -> PatternID {
+        (**self).match_pattern(sid, index)
+    }
+
+    #[inline(always)]
+    fn patterns_len(&self) -> usize {
+        (**self).patterns_len()
+    }
+
+    #[inline(always)]
+    fn pattern_len(&self, pid: PatternID) -> usize {
+        (**self).pattern_len(pid)
+    }
+
+    #[inline(always)]
+    fn min_pattern_len(&self) -> usize {
+        (**self).min_pattern_len()
+    }
+
+    #[inline(always)]
+    fn max_pattern_len(&self) -> usize {
+        (**self).max_pattern_len()
+    }
+
+    #[inline(always)]
+    fn memory_usage(&self) -> usize {
+        (**self).memory_usage()
+    }
+
+    #[inline(always)]
+    fn prefilter(&self) -> Option<&Prefilter> {
+        (**self).prefilter()
+    }
+}
+
+/// Represents the current state of an overlapping search.
+///
+/// This is used for overlapping searches since they need to know something
+/// about the previous search. For example, when multiple patterns match at the
+/// same position, this state tracks the last reported pattern so that the next
+/// search knows whether to report another matching pattern or continue with
+/// the search at the next position. Additionally, it also tracks which state
+/// the last search call terminated in and the current offset of the search
+/// in the haystack.
+///
+/// This type provides limited introspection capabilities. The only thing a
+/// caller can do is construct it and pass it around to permit search routines
+/// to use it to track state, and to ask whether a match has been found.
+///
+/// Callers should always provide a fresh state constructed via
+/// [`OverlappingState::start`] when starting a new search. That same state
+/// should be reused for subsequent searches on the same `Input`. The state
+/// given will advance through the haystack itself. Callers can detect the end
+/// of a search when neither an error nor a match is returned.
+///
+/// # Example
+///
+/// This example shows how to manually iterate over all overlapping matches. If
+/// you need this, you might consider using
+/// [`AhoCorasick::find_overlapping_iter`](crate::AhoCorasick::find_overlapping_iter)
+/// instead, but this shows how to correctly use an `OverlappingState`.
+///
+/// ```
+/// use aho_corasick::{
+///     automaton::OverlappingState,
+///     AhoCorasick, Input, Match,
+/// };
+///
+/// let patterns = &["append", "appendage", "app"];
+/// let haystack = "append the app to the appendage";
+///
+/// let ac = AhoCorasick::new(patterns).unwrap();
+/// let mut state = OverlappingState::start();
+/// let mut matches = vec![];
+///
+/// loop {
+///     ac.find_overlapping(haystack, &mut state);
+///     let mat = match state.get_match() {
+///         None => break,
+///         Some(mat) => mat,
+///     };
+///     matches.push(mat);
+/// }
+/// let expected = vec![
+///     Match::must(2, 0..3),
+///     Match::must(0, 0..6),
+///     Match::must(2, 11..14),
+///     Match::must(2, 22..25),
+///     Match::must(0, 22..28),
+///     Match::must(1, 22..31),
+/// ];
+/// assert_eq!(expected, matches);
+/// ```
+#[derive(Clone, Debug)]
+pub struct OverlappingState {
+    /// The match reported by the most recent overlapping search to use this
+    /// state.
+    ///
+    /// If a search does not find any matches, then it is expected to clear
+    /// this value.
+    mat: Option<Match>,
+    /// The state ID of the state at which the search was in when the call
+    /// terminated. When this is a match state, `last_match` must be set to a
+    /// non-None value.
+    ///
+    /// A `None` value indicates the start state of the corresponding
+    /// automaton. We cannot use the actual ID, since any one automaton may
+    /// have many start states, and which one is in use depends on search-time
+    /// factors (such as whether the search is anchored or not).
+    id: Option<StateID>,
+    /// The position of the search.
+    ///
+    /// When `id` is None (i.e., we are starting a search), this is set to
+    /// the beginning of the search as given by the caller regardless of its
+    /// current value. Subsequent calls to an overlapping search pick up at
+    /// this offset.
+    at: usize,
+    /// The index into the matching patterns of the next match to report if the
+    /// current state is a match state. Note that this may be 1 greater than
+    /// the total number of matches to report for the current match state. (In
+    /// which case, no more matches should be reported at the current position
+    /// and the search should advance to the next position.)
+    next_match_index: Option<usize>,
+}
+
+impl OverlappingState {
+    /// Create a new overlapping state that begins at the start state.
+    pub fn start() -> OverlappingState {
+        OverlappingState { mat: None, id: None, at: 0, next_match_index: None }
+    }
+
+    /// Return the match result of the most recent search to execute with this
+    /// state.
+    ///
+    /// Every search will clear this result automatically, such that if no
+    /// match is found, this will always correctly report `None`.
+    pub fn get_match(&self) -> Option<Match> {
+        self.mat
+    }
+}
+
+/// An iterator of non-overlapping matches in a particular haystack.
+///
+/// This iterator yields matches according to the [`MatchKind`] used by this
+/// automaton.
+///
+/// This iterator is constructed via the [`Automaton::try_find_iter`] method.
+///
+/// The type variable `A` refers to the implementation of the [`Automaton`]
+/// trait used to execute the search.
+///
+/// The lifetime `'a` refers to the lifetime of the [`Automaton`]
+/// implementation.
+///
+/// The lifetime `'h` refers to the lifetime of the haystack being searched.
+#[derive(Debug)]
+pub struct FindIter<'a, 'h, A> {
+    /// The automaton used to drive the search.
+    aut: &'a A,
+    /// The input parameters to give to each search call.
+    ///
+    /// The start position of the search is mutated during iteration.
+    input: Input<'h>,
+    /// Records the end offset of the most recent match. This is necessary to
+    /// handle a corner case for preventing empty matches from overlapping with
+    /// the ending bounds of a prior match.
+    last_match_end: Option<usize>,
+}
+
+impl<'a, 'h, A: Automaton> FindIter<'a, 'h, A> {
+    /// Creates a new non-overlapping iterator. If the given automaton would
+    /// return an error on a search with the given input configuration, then
+    /// that error is returned here.
+    fn new(
+        aut: &'a A,
+        input: Input<'h>,
+    ) -> Result<FindIter<'a, 'h, A>, MatchError> {
+        // The only way this search can fail is if we cannot retrieve the start
+        // state. e.g., Asking for an anchored search when only unanchored
+        // searches are supported.
+        let _ = aut.start_state(input.get_anchored())?;
+        Ok(FindIter { aut, input, last_match_end: None })
+    }
+
+    /// Executes a search and returns a match if one is found.
+    ///
+    /// This does not advance the input forward. It just executes a search
+    /// based on the current configuration/offsets.
+    fn search(&self) -> Option<Match> {
+        // The unwrap is OK here because we check at iterator construction time
+        // that no subsequent search call (using the same configuration) will
+        // ever return an error.
+        self.aut
+            .try_find(&self.input)
+            .expect("already checked that no match error can occur")
+    }
+
+    /// Handles the special case of an empty match by ensuring that 1) the
+    /// iterator always advances and 2) empty matches never overlap with other
+    /// matches.
+    ///
+    /// (1) is necessary because we principally make progress by setting the
+    /// starting location of the next search to the ending location of the last
+    /// match. But if a match is empty, then this results in a search that does
+    /// not advance and thus does not terminate.
+    ///
+    /// (2) is not strictly necessary, but makes intuitive sense and matches
+    /// the presiding behavior of most general purpose regex engines.
+    /// (Obviously this crate isn't a regex engine, but we choose to match
+    /// their semantics.) The "intuitive sense" here is that we want to report
+    /// NON-overlapping matches. So for example, given the patterns 'a' and
+    /// '' (an empty string) against the haystack 'a', without the special
+    /// handling, you'd get the matches [0, 1) and [1, 1), where the latter
+    /// overlaps with the end bounds of the former.
+    ///
+    /// Note that we mark this cold and forcefully prevent inlining because
+    /// handling empty matches like this is extremely rare and does require
+    /// quite a bit of code, comparatively. Keeping this code out of the main
+    /// iterator function keeps it smaller and more amenable to inlining
+    /// itself.
+    #[cold]
+    #[inline(never)]
+    fn handle_overlapping_empty_match(
+        &mut self,
+        mut m: Match,
+    ) -> Option<Match> {
+        assert!(m.is_empty());
+        if Some(m.end()) == self.last_match_end {
+            self.input.set_start(self.input.start().checked_add(1).unwrap());
+            m = self.search()?;
+        }
+        Some(m)
+    }
+}
+
+impl<'a, 'h, A: Automaton> Iterator for FindIter<'a, 'h, A> {
+    type Item = Match;
+
+    #[inline(always)]
+    fn next(&mut self) -> Option<Match> {
+        let mut m = self.search()?;
+        if m.is_empty() {
+            m = self.handle_overlapping_empty_match(m)?;
+        }
+        self.input.set_start(m.end());
+        self.last_match_end = Some(m.end());
+        Some(m)
+    }
+}
+
+/// An iterator of overlapping matches in a particular haystack.
+///
+/// This iterator will report all possible matches in a particular haystack,
+/// even when the matches overlap.
+///
+/// This iterator is constructed via the
+/// [`Automaton::try_find_overlapping_iter`] method.
+///
+/// The type variable `A` refers to the implementation of the [`Automaton`]
+/// trait used to execute the search.
+///
+/// The lifetime `'a` refers to the lifetime of the [`Automaton`]
+/// implementation.
+///
+/// The lifetime `'h` refers to the lifetime of the haystack being searched.
+#[derive(Debug)]
+pub struct FindOverlappingIter<'a, 'h, A> {
+    aut: &'a A,
+    input: Input<'h>,
+    state: OverlappingState,
+}
+
+impl<'a, 'h, A: Automaton> Iterator for FindOverlappingIter<'a, 'h, A> {
+    type Item = Match;
+
+    #[inline(always)]
+    fn next(&mut self) -> Option<Match> {
+        self.aut
+            .try_find_overlapping(&self.input, &mut self.state)
+            .expect("already checked that no match error can occur here");
+        self.state.get_match()
+    }
+}
+
+/// An iterator that reports matches in a stream.
+///
+/// This iterator yields elements of type `io::Result<Match>`, where an error
+/// is reported if there was a problem reading from the underlying stream.
+/// The iterator terminates only when the underlying stream reaches `EOF`.
+///
+/// This iterator is constructed via the [`Automaton::try_stream_find_iter`]
+/// method.
+///
+/// The type variable `A` refers to the implementation of the [`Automaton`]
+/// trait used to execute the search.
+///
+/// The type variable `R` refers to the `io::Read` stream that is being read
+/// from.
+///
+/// The lifetime `'a` refers to the lifetime of the [`Automaton`]
+/// implementation.
+#[cfg(feature = "std")]
+#[derive(Debug)]
+pub struct StreamFindIter<'a, A, R> {
+    it: StreamChunkIter<'a, A, R>,
+}
+
+#[cfg(feature = "std")]
+impl<'a, A: Automaton, R: std::io::Read> Iterator
+    for StreamFindIter<'a, A, R>
+{
+    type Item = std::io::Result<Match>;
+
+    fn next(&mut self) -> Option<std::io::Result<Match>> {
+        loop {
+            match self.it.next() {
+                None => return None,
+                Some(Err(err)) => return Some(Err(err)),
+                Some(Ok(StreamChunk::NonMatch { .. })) => {}
+                Some(Ok(StreamChunk::Match { mat, .. })) => {
+                    return Some(Ok(mat));
+                }
+            }
+        }
+    }
+}
+
+/// An iterator that reports matches in a stream.
+///
+/// (This doesn't actually implement the `Iterator` trait because it returns
+/// something with a lifetime attached to a buffer it owns, but that's OK. It
+/// still has a `next` method and is iterator-like enough to be fine.)
+///
+/// This iterator yields elements of type `io::Result<StreamChunk>`, where
+/// an error is reported if there was a problem reading from the underlying
+/// stream. The iterator terminates only when the underlying stream reaches
+/// `EOF`.
+///
+/// The idea here is that each chunk represents either a match or a non-match,
+/// and if you concatenated all of the chunks together, you'd reproduce the
+/// entire contents of the stream, byte-for-byte.
+///
+/// This chunk machinery is a bit complicated and it isn't strictly required
+/// for a stream searcher that just reports matches. But we do need something
+/// like this to deal with the "replacement" API, which needs to know which
+/// chunks it can copy and which it needs to replace.
+#[cfg(feature = "std")]
+#[derive(Debug)]
+struct StreamChunkIter<'a, A, R> {
+    /// The underlying automaton to do the search.
+    aut: &'a A,
+    /// The source of bytes we read from.
+    rdr: R,
+    /// A roll buffer for managing bytes from `rdr`. Basically, this is used
+    /// to handle the case of a match that is split by two different
+    /// calls to `rdr.read()`. This isn't strictly needed if all we needed to
+    /// do was report matches, but here we are reporting chunks of non-matches
+    /// and matches and in order to do that, we really just cannot treat our
+    /// stream as non-overlapping blocks of bytes. We need to permit some
+    /// overlap while we retain bytes from a previous `read` call in memory.
+    buf: crate::util::buffer::Buffer,
+    /// The unanchored starting state of this automaton.
+    start: StateID,
+    /// The state of the automaton.
+    sid: StateID,
+    /// The absolute position over the entire stream.
+    absolute_pos: usize,
+    /// The position we're currently at within `buf`.
+    buffer_pos: usize,
+    /// The buffer position of the end of the bytes that we last returned
+    /// to the caller. Basically, whenever we find a match, we look to see if
+    /// there is a difference between where the match started and the position
+    /// of the last byte we returned to the caller. If there's a difference,
+    /// then we need to return a 'NonMatch' chunk.
+    buffer_reported_pos: usize,
+}
+
+#[cfg(feature = "std")]
+impl<'a, A: Automaton, R: std::io::Read> StreamChunkIter<'a, A, R> {
+    fn new(
+        aut: &'a A,
+        rdr: R,
+    ) -> Result<StreamChunkIter<'a, A, R>, MatchError> {
+        // This restriction is a carry-over from older versions of this crate.
+        // I didn't have the bandwidth to think through how to handle, say,
+        // leftmost-first or leftmost-longest matching, but... it should be
+        // possible? The main problem is that once you see a match state in
+        // leftmost-first semantics, you can't just stop at that point and
+        // report a match. You have to keep going until you either hit a dead
+        // state or EOF. So how do you know when you'll hit a dead state? Well,
+        // you don't. With Aho-Corasick, I believe you can put a bound on it
+        // and say, "once a match has been seen, you'll need to scan forward at
+        // most N bytes" where N=aut.max_pattern_len().
+        //
+        // Which is fine, but it does mean that state about whether we're still
+        // looking for a dead state or not needs to persist across buffer
+        // refills. Which this code doesn't really handle. It does preserve
+        // *some* state across buffer refills, basically ensuring that a match
+        // span is always in memory.
+        if !aut.match_kind().is_standard() {
+            return Err(MatchError::unsupported_stream(aut.match_kind()));
+        }
+        // This is kind of a cop-out, but empty matches are SUPER annoying.
+        // If we know they can't happen (which is what we enforce here), then
+        // it makes a lot of logic much simpler. With that said, I'm open to
+        // supporting this case, but we need to define proper semantics for it
+        // first. It wasn't totally clear to me what it should do at the time
+        // of writing, so I decided to just be conservative.
+        //
+        // It also seems like a very weird case to support anyway. Why search a
+        // stream if you're just going to get a match at every position?
+        //
+        // ¯\_(ツ)_/¯
+        if aut.min_pattern_len() == 0 {
+            return Err(MatchError::unsupported_empty());
+        }
+        let start = aut.start_state(Anchored::No)?;
+        Ok(StreamChunkIter {
+            aut,
+            rdr,
+            buf: crate::util::buffer::Buffer::new(aut.max_pattern_len()),
+            start,
+            sid: start,
+            absolute_pos: 0,
+            buffer_pos: 0,
+            buffer_reported_pos: 0,
+        })
+    }
+
+    fn next(&mut self) -> Option<std::io::Result<StreamChunk>> {
+        // This code is pretty gnarly. It IS simpler than the equivalent code
+        // in the previous aho-corasick release, in part because we inline
+        // automaton traversal here and also in part because we have abdicated
+        // support for automatons that contain an empty pattern.
+        //
+        // I suspect this code could be made a bit simpler by designing a
+        // better buffer abstraction.
+        //
+        // But in general, this code is basically write-only. So you'll need
+        // to go through it step-by-step to grok it. One of the key bits of
+        // complexity is tracking a few different offsets. 'buffer_pos' is
+        // where we are in the buffer for search. 'buffer_reported_pos' is the
+        // position immediately following the last byte in the buffer that
+        // we've returned to the caller. And 'absolute_pos' is the overall
+        // current absolute position of the search in the entire stream, and
+        // this is what match spans are reported in terms of.
+        loop {
+            if self.aut.is_match(self.sid) {
+                let mat = self.get_match();
+                if let Some(r) = self.get_non_match_chunk(mat) {
+                    self.buffer_reported_pos += r.len();
+                    let bytes = &self.buf.buffer()[r];
+                    return Some(Ok(StreamChunk::NonMatch { bytes }));
+                }
+                self.sid = self.start;
+                let r = self.get_match_chunk(mat);
+                self.buffer_reported_pos += r.len();
+                let bytes = &self.buf.buffer()[r];
+                return Some(Ok(StreamChunk::Match { bytes, mat }));
+            }
+            if self.buffer_pos >= self.buf.buffer().len() {
+                if let Some(r) = self.get_pre_roll_non_match_chunk() {
+                    self.buffer_reported_pos += r.len();
+                    let bytes = &self.buf.buffer()[r];
+                    return Some(Ok(StreamChunk::NonMatch { bytes }));
+                }
+                if self.buf.buffer().len() >= self.buf.min_buffer_len() {
+                    self.buffer_pos = self.buf.min_buffer_len();
+                    self.buffer_reported_pos -=
+                        self.buf.buffer().len() - self.buf.min_buffer_len();
+                    self.buf.roll();
+                }
+                match self.buf.fill(&mut self.rdr) {
+                    Err(err) => return Some(Err(err)),
+                    Ok(true) => {}
+                    Ok(false) => {
+                        // We've hit EOF, but if there are still some
+                        // unreported bytes remaining, return them now.
+                        if let Some(r) = self.get_eof_non_match_chunk() {
+                            self.buffer_reported_pos += r.len();
+                            let bytes = &self.buf.buffer()[r];
+                            return Some(Ok(StreamChunk::NonMatch { bytes }));
+                        }
+                        // We've reported everything!
+                        return None;
+                    }
+                }
+            }
+            let start = self.absolute_pos;
+            for &byte in self.buf.buffer()[self.buffer_pos..].iter() {
+                self.sid = self.aut.next_state(Anchored::No, self.sid, byte);
+                self.absolute_pos += 1;
+                if self.aut.is_match(self.sid) {
+                    break;
+                }
+            }
+            self.buffer_pos += self.absolute_pos - start;
+        }
+    }
+
+    /// Return a match chunk for the given match. It is assumed that the match
+    /// ends at the current `buffer_pos`.
+    fn get_match_chunk(&self, mat: Match) -> core::ops::Range<usize> {
+        let start = self.buffer_pos - mat.len();
+        let end = self.buffer_pos;
+        start..end
+    }
+
+    /// Return a non-match chunk, if necessary, just before reporting a match.
+    /// This returns `None` if there is nothing to report. Otherwise, this
+    /// assumes that the given match ends at the current `buffer_pos`.
+    fn get_non_match_chunk(
+        &self,
+        mat: Match,
+    ) -> Option<core::ops::Range<usize>> {
+        let buffer_mat_start = self.buffer_pos - mat.len();
+        if buffer_mat_start > self.buffer_reported_pos {
+            let start = self.buffer_reported_pos;
+            let end = buffer_mat_start;
+            return Some(start..end);
+        }
+        None
+    }
+
+    /// Look for any bytes that should be reported as a non-match just before
+    /// rolling the buffer.
+    ///
+    /// Note that this only reports bytes up to `buffer.len() -
+    /// min_buffer_len`, as it's not possible to know whether the bytes
+    /// following that will participate in a match or not.
+    fn get_pre_roll_non_match_chunk(&self) -> Option<core::ops::Range<usize>> {
+        let end =
+            self.buf.buffer().len().saturating_sub(self.buf.min_buffer_len());
+        if self.buffer_reported_pos < end {
+            return Some(self.buffer_reported_pos..end);
+        }
+        None
+    }
+
+    /// Return any unreported bytes as a non-match up to the end of the buffer.
+    ///
+    /// This should only be called when the entire contents of the buffer have
+    /// been searched and EOF has been hit when trying to fill the buffer.
+    fn get_eof_non_match_chunk(&self) -> Option<core::ops::Range<usize>> {
+        if self.buffer_reported_pos < self.buf.buffer().len() {
+            return Some(self.buffer_reported_pos..self.buf.buffer().len());
+        }
+        None
+    }
+
+    /// Return the match at the current position for the current state.
+    ///
+    /// This panics if `self.aut.is_match(self.sid)` isn't true.
+    fn get_match(&self) -> Match {
+        get_match(self.aut, self.sid, 0, self.absolute_pos)
+    }
+}
+
+/// A single chunk yielded by the stream chunk iterator.
+///
+/// The `'r` lifetime refers to the lifetime of the stream chunk iterator.
+#[cfg(feature = "std")]
+#[derive(Debug)]
+enum StreamChunk<'r> {
+    /// A chunk that does not contain any matches.
+    NonMatch { bytes: &'r [u8] },
+    /// A chunk that precisely contains a match.
+    Match { bytes: &'r [u8], mat: Match },
+}
+
+#[inline(never)]
+pub(crate) fn try_find_fwd<A: Automaton + ?Sized>(
+    aut: &A,
+    input: &Input<'_>,
+) -> Result<Option<Match>, MatchError> {
+    if input.is_done() {
+        return Ok(None);
+    }
+    let earliest = aut.match_kind().is_standard() || input.get_earliest();
+    if input.get_anchored().is_anchored() {
+        try_find_fwd_imp(aut, input, None, Anchored::Yes, earliest)
+    } else if let Some(pre) = aut.prefilter() {
+        if earliest {
+            try_find_fwd_imp(aut, input, Some(pre), Anchored::No, true)
+        } else {
+            try_find_fwd_imp(aut, input, Some(pre), Anchored::No, false)
+        }
+    } else {
+        if earliest {
+            try_find_fwd_imp(aut, input, None, Anchored::No, true)
+        } else {
+            try_find_fwd_imp(aut, input, None, Anchored::No, false)
+        }
+    }
+}
+
+#[inline(always)]
+fn try_find_fwd_imp<A: Automaton + ?Sized>(
+    aut: &A,
+    input: &Input<'_>,
+    pre: Option<&Prefilter>,
+    anchored: Anchored,
+    earliest: bool,
+) -> Result<Option<Match>, MatchError> {
+    let mut sid = aut.start_state(input.get_anchored())?;
+    let mut at = input.start();
+    let mut mat = None;
+    if aut.is_match(sid) {
+        mat = Some(get_match(aut, sid, 0, at));
+        if earliest {
+            return Ok(mat);
+        }
+    }
+    if let Some(pre) = pre {
+        match pre.find_in(input.haystack(), input.get_span()) {
+            Candidate::None => return Ok(None),
+            Candidate::Match(m) => return Ok(Some(m)),
+            Candidate::PossibleStartOfMatch(i) => {
+                at = i;
+            }
+        }
+    }
+    while at < input.end() {
+        // I've tried unrolling this loop and eliding bounds checks, but no
+        // matter what I did, I could not observe a consistent improvement on
+        // any benchmark I could devise. (If someone wants to re-litigate this,
+        // the way to do it is to add an 'next_state_unchecked' method to the
+        // 'Automaton' trait with a default impl that uses 'next_state'. Then
+        // use 'aut.next_state_unchecked' here and implement it on DFA using
+        // unchecked slice index acces.)
+        sid = aut.next_state(anchored, sid, input.haystack()[at]);
+        if aut.is_special(sid) {
+            if aut.is_dead(sid) {
+                return Ok(mat);
+            } else if aut.is_match(sid) {
+                // We use 'at + 1' here because the match state is entered
+                // at the last byte of the pattern. Since we use half-open
+                // intervals, the end of the range of the match is one past the
+                // last byte.
+                let m = get_match(aut, sid, 0, at + 1);
+                // For the automata in this crate, we make a size trade off
+                // where we reuse the same automaton for both anchored and
+                // unanchored searches. We achieve this, principally, by simply
+                // not following failure transitions while computing the next
+                // state. Instead, if we fail to find the next state, we return
+                // a dead state, which instructs the search to stop. (This
+                // is why 'next_state' needs to know whether the search is
+                // anchored or not.) In addition, we have different start
+                // states for anchored and unanchored searches. The latter has
+                // a self-loop where as the former does not.
+                //
+                // In this way, we can use the same trie to execute both
+                // anchored and unanchored searches. There is a catch though.
+                // When building an Aho-Corasick automaton for unanchored
+                // searches, we copy matches from match states to other states
+                // (which would otherwise not be match states) if they are
+                // reachable via a failure transition. In the case of an
+                // anchored search, we *specifically* do not want to report
+                // these matches because they represent matches that start past
+                // the beginning of the search.
+                //
+                // Now we could tweak the automaton somehow to differentiate
+                // anchored from unanchored match states, but this would make
+                // 'aut.is_match' and potentially 'aut.is_special' slower. And
+                // also make the automaton itself more complex.
+                //
+                // Instead, we insert a special hack: if the search is
+                // anchored, we simply ignore matches that don't begin at
+                // the start of the search. This is not quite ideal, but we
+                // do specialize this function in such a way that unanchored
+                // searches don't pay for this additional branch. While this
+                // might cause a search to continue on for more than it
+                // otherwise optimally would, it will be no more than the
+                // longest pattern in the automaton. The reason for this is
+                // that we ensure we don't follow failure transitions during
+                // an anchored search. Combined with using a different anchored
+                // starting state with no self-loop, we guarantee that we'll
+                // at worst move through a number of transitions equal to the
+                // longest pattern.
+                //
+                // Now for DFAs, the whole point of them is to eliminate
+                // failure transitions entirely. So there is no way to say "if
+                // it's an anchored search don't follow failure transitions."
+                // Instead, we actually have to build two entirely separate
+                // automatons into the transition table. One with failure
+                // transitions built into it and another that is effectively
+                // just an encoding of the base trie into a transition table.
+                // DFAs still need this check though, because the match states
+                // still carry matches only reachable via a failure transition.
+                // Why? Because removing them seems difficult, although I
+                // haven't given it a lot of thought.
+                if !(anchored.is_anchored() && m.start() > input.start()) {
+                    mat = Some(m);
+                    if earliest {
+                        return Ok(mat);
+                    }
+                }
+            } else if let Some(pre) = pre {
+                // If we're here, we know it's a special state that is not a
+                // dead or a match state AND that a prefilter is active. Thus,
+                // it must be a start state.
+                debug_assert!(aut.is_start(sid));
+                // We don't care about 'Candidate::Match' here because if such
+                // a match were possible, it would have been returned above
+                // when we run the prefilter before walking the automaton.
+                let span = Span::from(at..input.end());
+                match pre.find_in(input.haystack(), span).into_option() {
+                    None => return Ok(None),
+                    Some(i) => {
+                        if i > at {
+                            at = i;
+                            continue;
+                        }
+                    }
+                }
+            } else {
+                // When pre.is_none(), then starting states should not be
+                // treated as special. That is, without a prefilter, is_special
+                // should only return true when the state is a dead or a match
+                // state.
+                //
+                // It is possible to execute a search without a prefilter even
+                // when the underlying searcher has one: an anchored search.
+                // But in this case, the automaton makes it impossible to move
+                // back to the start state by construction, and thus, we should
+                // never reach this branch.
+                debug_assert!(false, "unreachable");
+            }
+        }
+        at += 1;
+    }
+    Ok(mat)
+}
+
+#[inline(never)]
+fn try_find_overlapping_fwd<A: Automaton + ?Sized>(
+    aut: &A,
+    input: &Input<'_>,
+    state: &mut OverlappingState,
+) -> Result<(), MatchError> {
+    state.mat = None;
+    if input.is_done() {
+        return Ok(());
+    }
+    // Searching with a pattern ID is always anchored, so we should only ever
+    // use a prefilter when no pattern ID is given.
+    if aut.prefilter().is_some() && !input.get_anchored().is_anchored() {
+        let pre = aut.prefilter().unwrap();
+        try_find_overlapping_fwd_imp(aut, input, Some(pre), state)
+    } else {
+        try_find_overlapping_fwd_imp(aut, input, None, state)
+    }
+}
+
+#[inline(always)]
+fn try_find_overlapping_fwd_imp<A: Automaton + ?Sized>(
+    aut: &A,
+    input: &Input<'_>,
+    pre: Option<&Prefilter>,
+    state: &mut OverlappingState,
+) -> Result<(), MatchError> {
+    let mut sid = match state.id {
+        None => {
+            let sid = aut.start_state(input.get_anchored())?;
+            // Handle the case where the start state is a match state. That is,
+            // the empty string is in our automaton. We report every match we
+            // can here before moving on and updating 'state.at' and 'state.id'
+            // to find more matches in other parts of the haystack.
+            if aut.is_match(sid) {
+                let i = state.next_match_index.unwrap_or(0);
+                let len = aut.match_len(sid);
+                if i < len {
+                    state.next_match_index = Some(i + 1);
+                    state.mat = Some(get_match(aut, sid, i, input.start()));
+                    return Ok(());
+                }
+            }
+            state.at = input.start();
+            state.id = Some(sid);
+            state.next_match_index = None;
+            state.mat = None;
+            sid
+        }
+        Some(sid) => {
+            // If we still have matches left to report in this state then
+            // report them until we've exhausted them. Only after that do we
+            // advance to the next offset in the haystack.
+            if let Some(i) = state.next_match_index {
+                let len = aut.match_len(sid);
+                if i < len {
+                    state.next_match_index = Some(i + 1);
+                    state.mat = Some(get_match(aut, sid, i, state.at + 1));
+                    return Ok(());
+                }
+                // Once we've reported all matches at a given position, we need
+                // to advance the search to the next position.
+                state.at += 1;
+                state.next_match_index = None;
+                state.mat = None;
+            }
+            sid
+        }
+    };
+    while state.at < input.end() {
+        sid = aut.next_state(
+            input.get_anchored(),
+            sid,
+            input.haystack()[state.at],
+        );
+        if aut.is_special(sid) {
+            state.id = Some(sid);
+            if aut.is_dead(sid) {
+                return Ok(());
+            } else if aut.is_match(sid) {
+                state.next_match_index = Some(1);
+                state.mat = Some(get_match(aut, sid, 0, state.at + 1));
+                return Ok(());
+            } else if let Some(pre) = pre {
+                // If we're here, we know it's a special state that is not a
+                // dead or a match state AND that a prefilter is active. Thus,
+                // it must be a start state.
+                debug_assert!(aut.is_start(sid));
+                let span = Span::from(state.at..input.end());
+                match pre.find_in(input.haystack(), span).into_option() {
+                    None => return Ok(()),
+                    Some(i) => {
+                        if i > state.at {
+                            state.at = i;
+                            continue;
+                        }
+                    }
+                }
+            } else {
+                // When pre.is_none(), then starting states should not be
+                // treated as special. That is, without a prefilter, is_special
+                // should only return true when the state is a dead or a match
+                // state.
+                //
+                // ... except for one special case: in stream searching, we
+                // currently call overlapping search with a 'None' prefilter,
+                // regardless of whether one exists or not, because stream
+                // searching can't currently deal with prefilters correctly in
+                // all cases.
+            }
+        }
+        state.at += 1;
+    }
+    state.id = Some(sid);
+    Ok(())
+}
+
+#[inline(always)]
+fn get_match<A: Automaton + ?Sized>(
+    aut: &A,
+    sid: StateID,
+    index: usize,
+    at: usize,
+) -> Match {
+    let pid = aut.match_pattern(sid, index);
+    let len = aut.pattern_len(pid);
+    Match::new(pid, (at - len)..at)
+}
+
+/// Write a prefix "state" indicator for fmt::Debug impls. It always writes
+/// exactly two printable bytes to the given formatter.
+///
+/// Specifically, this tries to succinctly distinguish the different types of
+/// states: dead states, start states and match states. It even accounts for
+/// the possible overlappings of different state types. (The only possible
+/// overlapping is that of match and start states.)
+pub(crate) fn fmt_state_indicator<A: Automaton>(
+    f: &mut core::fmt::Formatter<'_>,
+    aut: A,
+    id: StateID,
+) -> core::fmt::Result {
+    if aut.is_dead(id) {
+        write!(f, "D ")?;
+    } else if aut.is_match(id) {
+        if aut.is_start(id) {
+            write!(f, "*>")?;
+        } else {
+            write!(f, "* ")?;
+        }
+    } else if aut.is_start(id) {
+        write!(f, " >")?;
+    } else {
+        write!(f, "  ")?;
+    }
+    Ok(())
+}
+
+/// Return an iterator of transitions in a sparse format given an iterator
+/// of all explicitly defined transitions. The iterator yields ranges of
+/// transitions, such that any adjacent transitions mapped to the same
+/// state are combined into a single range.
+pub(crate) fn sparse_transitions<'a>(
+    mut it: impl Iterator<Item = (u8, StateID)> + 'a,
+) -> impl Iterator<Item = (u8, u8, StateID)> + 'a {
+    let mut cur: Option<(u8, u8, StateID)> = None;
+    core::iter::from_fn(move || {
+        while let Some((class, next)) = it.next() {
+            let (prev_start, prev_end, prev_next) = match cur {
+                Some(x) => x,
+                None => {
+                    cur = Some((class, class, next));
+                    continue;
+                }
+            };
+            if prev_next == next {
+                cur = Some((prev_start, class, prev_next));
+            } else {
+                cur = Some((class, class, next));
+                return Some((prev_start, prev_end, prev_next));
+            }
+        }
+        if let Some((start, end, next)) = cur.take() {
+            return Some((start, end, next));
+        }
+        None
+    })
+}
diff --git a/third_party/rust/aho-corasick/src/dfa.rs b/third_party/rust/aho-corasick/src/dfa.rs
new file mode 100644
index 0000000000..f0370a6168
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/dfa.rs
@@ -0,0 +1,814 @@
+/*!
+Provides direct access to a DFA implementation of Aho-Corasick.
+
+This is a low-level API that generally only needs to be used in niche
+circumstances. When possible, prefer using [`AhoCorasick`](crate::AhoCorasick)
+instead of a DFA directly. Using an `DFA` directly is typically only necessary
+when one needs access to the [`Automaton`] trait implementation.
+*/
+
+use alloc::{vec, vec::Vec};
+
+use crate::{
+    automaton::Automaton,
+    nfa::noncontiguous,
+    util::{
+        alphabet::ByteClasses,
+        error::{BuildError, MatchError},
+        int::{Usize, U32},
+        prefilter::Prefilter,
+        primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID},
+        search::{Anchored, MatchKind, StartKind},
+        special::Special,
+    },
+};
+
+/// A DFA implementation of Aho-Corasick.
+///
+/// When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) instead of
+/// this type directly. Using a `DFA` directly is typically only necessary when
+/// one needs access to the [`Automaton`] trait implementation.
+///
+/// This DFA can only be built by first constructing a [`noncontiguous::NFA`].
+/// Both [`DFA::new`] and [`Builder::build`] do this for you automatically, but
+/// [`Builder::build_from_noncontiguous`] permits doing it explicitly.
+///
+/// A DFA provides the best possible search performance (in this crate) via two
+/// mechanisms:
+///
+/// * All states use a dense representation for their transitions.
+/// * All failure transitions are pre-computed such that they are never
+/// explicitly handled at search time.
+///
+/// These two facts combined mean that every state transition is performed
+/// using a constant number of instructions. However, this comes at
+/// great cost. The memory usage of a DFA can be quite exorbitant.
+/// It is potentially multiple orders of magnitude greater than a
+/// [`contiguous::NFA`](crate::nfa::contiguous::NFA) for example. In exchange,
+/// a DFA will typically have better search speed than a `contiguous::NFA`, but
+/// not by orders of magnitude.
+///
+/// Unless you have a small number of patterns or memory usage is not a concern
+/// and search performance is critical, a DFA is usually not the best choice.
+///
+/// Moreover, unlike the NFAs in this crate, it is costly for a DFA to
+/// support for anchored and unanchored search configurations. Namely,
+/// since failure transitions are pre-computed, supporting both anchored
+/// and unanchored searches requires a duplication of the transition table,
+/// making the memory usage of such a DFA ever bigger. (The NFAs in this crate
+/// unconditionally support both anchored and unanchored searches because there
+/// is essentially no added cost for doing so.) It is for this reason that
+/// a DFA's support for anchored and unanchored searches can be configured
+/// via [`Builder::start_kind`]. By default, a DFA only supports unanchored
+/// searches.
+///
+/// # Example
+///
+/// This example shows how to build an `DFA` directly and use it to execute
+/// [`Automaton::try_find`]:
+///
+/// ```
+/// use aho_corasick::{
+///     automaton::Automaton,
+///     dfa::DFA,
+///     Input, Match,
+/// };
+///
+/// let patterns = &["b", "abc", "abcd"];
+/// let haystack = "abcd";
+///
+/// let nfa = DFA::new(patterns).unwrap();
+/// assert_eq!(
+///     Some(Match::must(0, 1..2)),
+///     nfa.try_find(&Input::new(haystack))?,
+/// );
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// It is also possible to implement your own version of `try_find`. See the
+/// [`Automaton`] documentation for an example.
+#[derive(Clone)]
+pub struct DFA {
+    /// The DFA transition table. IDs in this table are pre-multiplied. So
+    /// instead of the IDs being 0, 1, 2, 3, ..., they are 0*stride, 1*stride,
+    /// 2*stride, 3*stride, ...
+    trans: Vec<StateID>,
+    /// The matches for every match state in this DFA. This is first indexed by
+    /// state index (so that's `sid >> stride2`) and then by order in which the
+    /// matches are meant to occur.
+    matches: Vec<Vec<PatternID>>,
+    /// The amount of heap memory used, in bytes, by the inner Vecs of
+    /// 'matches'.
+    matches_memory_usage: usize,
+    /// The length of each pattern. This is used to compute the start offset
+    /// of a match.
+    pattern_lens: Vec<SmallIndex>,
+    /// A prefilter for accelerating searches, if one exists.
+    prefilter: Option<Prefilter>,
+    /// The match semantics built into this DFA.
+    match_kind: MatchKind,
+    /// The total number of states in this DFA.
+    state_len: usize,
+    /// The alphabet size, or total number of equivalence classes, for this
+    /// DFA. Note that the actual number of transitions in each state is
+    /// stride=2^stride2, where stride is the smallest power of 2 greater than
+    /// or equal to alphabet_len. We do things this way so that we can use
+    /// bitshifting to go from a state ID to an index into 'matches'.
+    alphabet_len: usize,
+    /// The exponent with a base 2, such that stride=2^stride2. Given a state
+    /// index 'i', its state identifier is 'i << stride2'. Given a state
+    /// identifier 'sid', its state index is 'sid >> stride2'.
+    stride2: usize,
+    /// The equivalence classes for this DFA. All transitions are defined on
+    /// equivalence classes and not on the 256 distinct byte values.
+    byte_classes: ByteClasses,
+    /// The length of the shortest pattern in this automaton.
+    min_pattern_len: usize,
+    /// The length of the longest pattern in this automaton.
+    max_pattern_len: usize,
+    /// The information required to deduce which states are "special" in this
+    /// DFA.
+    special: Special,
+}
+
+impl DFA {
+    /// Create a new Aho-Corasick DFA using the default configuration.
+    ///
+    /// Use a [`Builder`] if you want to change the configuration.
+    pub fn new<I, P>(patterns: I) -> Result<DFA, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        DFA::builder().build(patterns)
+    }
+
+    /// A convenience method for returning a new Aho-Corasick DFA builder.
+    ///
+    /// This usually permits one to just import the `DFA` type.
+    pub fn builder() -> Builder {
+        Builder::new()
+    }
+}
+
+impl DFA {
+    /// A sentinel state ID indicating that a search should stop once it has
+    /// entered this state. When a search stops, it returns a match if one has
+    /// been found, otherwise no match. A DFA always has an actual dead state
+    /// at this ID.
+    ///
+    /// N.B. DFAs, unlike NFAs, do not have any notion of a FAIL state.
+    /// Namely, the whole point of a DFA is that the FAIL state is completely
+    /// compiled away. That is, DFA construction involves pre-computing the
+    /// failure transitions everywhere, such that failure transitions are no
+    /// longer used at search time. This, combined with its uniformly dense
+    /// representation, are the two most important factors in why it's faster
+    /// than the NFAs in this crate.
+    const DEAD: StateID = StateID::new_unchecked(0);
+
+    /// Adds the given pattern IDs as matches to the given state and also
+    /// records the added memory usage.
+    fn set_matches(
+        &mut self,
+        sid: StateID,
+        pids: impl Iterator<Item = PatternID>,
+    ) {
+        let index = (sid.as_usize() >> self.stride2).checked_sub(2).unwrap();
+        let mut at_least_one = false;
+        for pid in pids {
+            self.matches[index].push(pid);
+            self.matches_memory_usage += PatternID::SIZE;
+            at_least_one = true;
+        }
+        assert!(at_least_one, "match state must have non-empty pids");
+    }
+}
+
+// SAFETY: 'start_state' always returns a valid state ID, 'next_state' always
+// returns a valid state ID given a valid state ID. We otherwise claim that
+// all other methods are correct as well.
+unsafe impl Automaton for DFA {
+    #[inline(always)]
+    fn start_state(&self, anchored: Anchored) -> Result<StateID, MatchError> {
+        // Either of the start state IDs can be DEAD, in which case, support
+        // for that type of search is not provided by this DFA. Which start
+        // state IDs are inactive depends on the 'StartKind' configuration at
+        // DFA construction time.
+        match anchored {
+            Anchored::No => {
+                let start = self.special.start_unanchored_id;
+                if start == DFA::DEAD {
+                    Err(MatchError::invalid_input_unanchored())
+                } else {
+                    Ok(start)
+                }
+            }
+            Anchored::Yes => {
+                let start = self.special.start_anchored_id;
+                if start == DFA::DEAD {
+                    Err(MatchError::invalid_input_anchored())
+                } else {
+                    Ok(start)
+                }
+            }
+        }
+    }
+
+    #[inline(always)]
+    fn next_state(
+        &self,
+        _anchored: Anchored,
+        sid: StateID,
+        byte: u8,
+    ) -> StateID {
+        let class = self.byte_classes.get(byte);
+        self.trans[(sid.as_u32() + u32::from(class)).as_usize()]
+    }
+
+    #[inline(always)]
+    fn is_special(&self, sid: StateID) -> bool {
+        sid <= self.special.max_special_id
+    }
+
+    #[inline(always)]
+    fn is_dead(&self, sid: StateID) -> bool {
+        sid == DFA::DEAD
+    }
+
+    #[inline(always)]
+    fn is_match(&self, sid: StateID) -> bool {
+        !self.is_dead(sid) && sid <= self.special.max_match_id
+    }
+
+    #[inline(always)]
+    fn is_start(&self, sid: StateID) -> bool {
+        sid == self.special.start_unanchored_id
+            || sid == self.special.start_anchored_id
+    }
+
+    #[inline(always)]
+    fn match_kind(&self) -> MatchKind {
+        self.match_kind
+    }
+
+    #[inline(always)]
+    fn patterns_len(&self) -> usize {
+        self.pattern_lens.len()
+    }
+
+    #[inline(always)]
+    fn pattern_len(&self, pid: PatternID) -> usize {
+        self.pattern_lens[pid].as_usize()
+    }
+
+    #[inline(always)]
+    fn min_pattern_len(&self) -> usize {
+        self.min_pattern_len
+    }
+
+    #[inline(always)]
+    fn max_pattern_len(&self) -> usize {
+        self.max_pattern_len
+    }
+
+    #[inline(always)]
+    fn match_len(&self, sid: StateID) -> usize {
+        debug_assert!(self.is_match(sid));
+        let offset = (sid.as_usize() >> self.stride2) - 2;
+        self.matches[offset].len()
+    }
+
+    #[inline(always)]
+    fn match_pattern(&self, sid: StateID, index: usize) -> PatternID {
+        debug_assert!(self.is_match(sid));
+        let offset = (sid.as_usize() >> self.stride2) - 2;
+        self.matches[offset][index]
+    }
+
+    #[inline(always)]
+    fn memory_usage(&self) -> usize {
+        use core::mem::size_of;
+
+        (self.trans.len() * size_of::<u32>())
+            + (self.matches.len() * size_of::<Vec<PatternID>>())
+            + self.matches_memory_usage
+            + (self.pattern_lens.len() * size_of::<SmallIndex>())
+            + self.prefilter.as_ref().map_or(0, |p| p.memory_usage())
+    }
+
+    #[inline(always)]
+    fn prefilter(&self) -> Option<&Prefilter> {
+        self.prefilter.as_ref()
+    }
+}
+
+impl core::fmt::Debug for DFA {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        use crate::{
+            automaton::{fmt_state_indicator, sparse_transitions},
+            util::debug::DebugByte,
+        };
+
+        writeln!(f, "dfa::DFA(")?;
+        for index in 0..self.state_len {
+            let sid = StateID::new_unchecked(index << self.stride2);
+            // While we do currently include the FAIL state in the transition
+            // table (to simplify construction), it is never actually used. It
+            // poses problems with the code below because it gets treated as
+            // a match state incidentally when it is, of course, not. So we
+            // special case it. The fail state is always the first state after
+            // the dead state.
+            //
+            // If the construction is changed to remove the fail state (it
+            // probably should be), then this special case should be updated.
+            if index == 1 {
+                writeln!(f, "F {:06}:", sid.as_usize())?;
+                continue;
+            }
+            fmt_state_indicator(f, self, sid)?;
+            write!(f, "{:06}: ", sid.as_usize())?;
+
+            let it = (0..self.byte_classes.alphabet_len()).map(|class| {
+                (class.as_u8(), self.trans[sid.as_usize() + class])
+            });
+            for (i, (start, end, next)) in sparse_transitions(it).enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                if start == end {
+                    write!(
+                        f,
+                        "{:?} => {:?}",
+                        DebugByte(start),
+                        next.as_usize()
+                    )?;
+                } else {
+                    write!(
+                        f,
+                        "{:?}-{:?} => {:?}",
+                        DebugByte(start),
+                        DebugByte(end),
+                        next.as_usize()
+                    )?;
+                }
+            }
+            write!(f, "\n")?;
+            if self.is_match(sid) {
+                write!(f, " matches: ")?;
+                for i in 0..self.match_len(sid) {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    let pid = self.match_pattern(sid, i);
+                    write!(f, "{}", pid.as_usize())?;
+                }
+                write!(f, "\n")?;
+            }
+        }
+        writeln!(f, "match kind: {:?}", self.match_kind)?;
+        writeln!(f, "prefilter: {:?}", self.prefilter.is_some())?;
+        writeln!(f, "state length: {:?}", self.state_len)?;
+        writeln!(f, "pattern length: {:?}", self.patterns_len())?;
+        writeln!(f, "shortest pattern length: {:?}", self.min_pattern_len)?;
+        writeln!(f, "longest pattern length: {:?}", self.max_pattern_len)?;
+        writeln!(f, "alphabet length: {:?}", self.alphabet_len)?;
+        writeln!(f, "stride: {:?}", 1 << self.stride2)?;
+        writeln!(f, "byte classes: {:?}", self.byte_classes)?;
+        writeln!(f, "memory usage: {:?}", self.memory_usage())?;
+        writeln!(f, ")")?;
+        Ok(())
+    }
+}
+
+/// A builder for configuring an Aho-Corasick DFA.
+///
+/// This builder has a subset of the options available to a
+/// [`AhoCorasickBuilder`](crate::AhoCorasickBuilder). Of the shared options,
+/// their behavior is identical.
+#[derive(Clone, Debug)]
+pub struct Builder {
+    noncontiguous: noncontiguous::Builder,
+    start_kind: StartKind,
+    byte_classes: bool,
+}
+
+impl Default for Builder {
+    fn default() -> Builder {
+        Builder {
+            noncontiguous: noncontiguous::Builder::new(),
+            start_kind: StartKind::Unanchored,
+            byte_classes: true,
+        }
+    }
+}
+
+impl Builder {
+    /// Create a new builder for configuring an Aho-Corasick DFA.
+    pub fn new() -> Builder {
+        Builder::default()
+    }
+
+    /// Build an Aho-Corasick DFA from the given iterator of patterns.
+    ///
+    /// A builder may be reused to create more DFAs.
+    pub fn build<I, P>(&self, patterns: I) -> Result<DFA, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        let nnfa = self.noncontiguous.build(patterns)?;
+        self.build_from_noncontiguous(&nnfa)
+    }
+
+    /// Build an Aho-Corasick DFA from the given noncontiguous NFA.
+    ///
+    /// Note that when this method is used, only the `start_kind` and
+    /// `byte_classes` settings on this builder are respected. The other
+    /// settings only apply to the initial construction of the Aho-Corasick
+    /// automaton. Since using this method requires that initial construction
+    /// has already completed, all settings impacting only initial construction
+    /// are no longer relevant.
+    pub fn build_from_noncontiguous(
+        &self,
+        nnfa: &noncontiguous::NFA,
+    ) -> Result<DFA, BuildError> {
+        debug!("building DFA");
+        let byte_classes = if self.byte_classes {
+            nnfa.byte_classes().clone()
+        } else {
+            ByteClasses::singletons()
+        };
+        let state_len = match self.start_kind {
+            StartKind::Unanchored | StartKind::Anchored => nnfa.states().len(),
+            StartKind::Both => {
+                // These unwraps are OK because we know that the number of
+                // NFA states is < StateID::LIMIT which is in turn less than
+                // i32::MAX. Thus, there is always room to multiply by 2.
+                // Finally, the number of states is always at least 4 in the
+                // NFA (DEAD, FAIL, START-UNANCHORED, START-ANCHORED), so the
+                // subtraction of 4 is okay.
+                //
+                // Note that we subtract 4 because the "anchored" part of
+                // the DFA duplicates the unanchored part (without failure
+                // transitions), but reuses the DEAD, FAIL and START states.
+                nnfa.states()
+                    .len()
+                    .checked_mul(2)
+                    .unwrap()
+                    .checked_sub(4)
+                    .unwrap()
+            }
+        };
+        let trans_len =
+            match state_len.checked_shl(byte_classes.stride2().as_u32()) {
+                Some(trans_len) => trans_len,
+                None => {
+                    return Err(BuildError::state_id_overflow(
+                        StateID::MAX.as_u64(),
+                        usize::MAX.as_u64(),
+                    ))
+                }
+            };
+        StateID::new(trans_len.checked_sub(byte_classes.stride()).unwrap())
+            .map_err(|e| {
+                BuildError::state_id_overflow(
+                    StateID::MAX.as_u64(),
+                    e.attempted(),
+                )
+            })?;
+        let num_match_states = match self.start_kind {
+            StartKind::Unanchored | StartKind::Anchored => {
+                nnfa.special().max_match_id.as_usize().checked_sub(1).unwrap()
+            }
+            StartKind::Both => nnfa
+                .special()
+                .max_match_id
+                .as_usize()
+                .checked_sub(1)
+                .unwrap()
+                .checked_mul(2)
+                .unwrap(),
+        };
+        let mut dfa = DFA {
+            trans: vec![DFA::DEAD; trans_len],
+            matches: vec![vec![]; num_match_states],
+            matches_memory_usage: 0,
+            pattern_lens: nnfa.pattern_lens_raw().to_vec(),
+            prefilter: nnfa.prefilter().map(|p| p.clone()),
+            match_kind: nnfa.match_kind(),
+            state_len,
+            alphabet_len: byte_classes.alphabet_len(),
+            stride2: byte_classes.stride2(),
+            byte_classes,
+            min_pattern_len: nnfa.min_pattern_len(),
+            max_pattern_len: nnfa.max_pattern_len(),
+            // The special state IDs are set later.
+            special: Special::zero(),
+        };
+        match self.start_kind {
+            StartKind::Both => {
+                self.finish_build_both_starts(nnfa, &mut dfa);
+            }
+            StartKind::Unanchored => {
+                self.finish_build_one_start(Anchored::No, nnfa, &mut dfa);
+            }
+            StartKind::Anchored => {
+                self.finish_build_one_start(Anchored::Yes, nnfa, &mut dfa)
+            }
+        }
+        debug!(
+            "DFA built, <states: {:?}, size: {:?}, \
+             alphabet len: {:?}, stride: {:?}>",
+            dfa.state_len,
+            dfa.memory_usage(),
+            dfa.byte_classes.alphabet_len(),
+            dfa.byte_classes.stride(),
+        );
+        // The vectors can grow ~twice as big during construction because a
+        // Vec amortizes growth. But here, let's shrink things back down to
+        // what we actually need since we're never going to add more to it.
+        dfa.trans.shrink_to_fit();
+        dfa.pattern_lens.shrink_to_fit();
+        dfa.matches.shrink_to_fit();
+        // TODO: We might also want to shrink each Vec inside of `dfa.matches`,
+        // or even better, convert it to one contiguous allocation. But I think
+        // I went with nested allocs for good reason (can't remember), so this
+        // may be tricky to do. I decided not to shrink them here because it
+        // might require a fair bit of work to do. It's unclear whether it's
+        // worth it.
+        Ok(dfa)
+    }
+
+    /// Finishes building a DFA for either unanchored or anchored searches,
+    /// but NOT both.
+    fn finish_build_one_start(
+        &self,
+        anchored: Anchored,
+        nnfa: &noncontiguous::NFA,
+        dfa: &mut DFA,
+    ) {
+        // This function always succeeds because we check above that all of the
+        // states in the NFA can be mapped to DFA state IDs.
+        let stride2 = dfa.stride2;
+        let old2new = |oldsid: StateID| {
+            StateID::new_unchecked(oldsid.as_usize() << stride2)
+        };
+        for (oldsid, state) in nnfa.states().iter().with_state_ids() {
+            let newsid = old2new(oldsid);
+            if state.is_match() {
+                dfa.set_matches(newsid, nnfa.iter_matches(oldsid));
+            }
+            sparse_iter(
+                nnfa,
+                oldsid,
+                &dfa.byte_classes,
+                |byte, class, mut oldnextsid| {
+                    if oldnextsid == noncontiguous::NFA::FAIL {
+                        if anchored.is_anchored() {
+                            oldnextsid = noncontiguous::NFA::DEAD;
+                        } else {
+                            oldnextsid = nnfa.next_state(
+                                Anchored::No,
+                                state.fail(),
+                                byte,
+                            );
+                        }
+                    }
+                    dfa.trans[newsid.as_usize() + usize::from(class)] =
+                        old2new(oldnextsid);
+                },
+            );
+        }
+        // Now that we've remapped all the IDs in our states, all that's left
+        // is remapping the special state IDs.
+        let old = nnfa.special();
+        let new = &mut dfa.special;
+        new.max_special_id = old2new(old.max_special_id);
+        new.max_match_id = old2new(old.max_match_id);
+        if anchored.is_anchored() {
+            new.start_unanchored_id = DFA::DEAD;
+            new.start_anchored_id = old2new(old.start_anchored_id);
+        } else {
+            new.start_unanchored_id = old2new(old.start_unanchored_id);
+            new.start_anchored_id = DFA::DEAD;
+        }
+    }
+
+    /// Finishes building a DFA that supports BOTH unanchored and anchored
+    /// searches. It works by inter-leaving unanchored states with anchored
+    /// states in the same transition table. This way, we avoid needing to
+    /// re-shuffle states afterward to ensure that our states still look like
+    /// DEAD, MATCH, ..., START-UNANCHORED, START-ANCHORED, NON-MATCH, ...
+    ///
+    /// Honestly this is pretty inscrutable... Simplifications are most
+    /// welcome.
+    fn finish_build_both_starts(
+        &self,
+        nnfa: &noncontiguous::NFA,
+        dfa: &mut DFA,
+    ) {
+        let stride2 = dfa.stride2;
+        let stride = 1 << stride2;
+        let mut remap_unanchored = vec![DFA::DEAD; nnfa.states().len()];
+        let mut remap_anchored = vec![DFA::DEAD; nnfa.states().len()];
+        let mut is_anchored = vec![false; dfa.state_len];
+        let mut newsid = DFA::DEAD;
+        let next_dfa_id =
+            |sid: StateID| StateID::new_unchecked(sid.as_usize() + stride);
+        for (oldsid, state) in nnfa.states().iter().with_state_ids() {
+            if oldsid == noncontiguous::NFA::DEAD
+                || oldsid == noncontiguous::NFA::FAIL
+            {
+                remap_unanchored[oldsid] = newsid;
+                remap_anchored[oldsid] = newsid;
+                newsid = next_dfa_id(newsid);
+            } else if oldsid == nnfa.special().start_unanchored_id
+                || oldsid == nnfa.special().start_anchored_id
+            {
+                if oldsid == nnfa.special().start_unanchored_id {
+                    remap_unanchored[oldsid] = newsid;
+                    remap_anchored[oldsid] = DFA::DEAD;
+                } else {
+                    remap_unanchored[oldsid] = DFA::DEAD;
+                    remap_anchored[oldsid] = newsid;
+                    is_anchored[newsid.as_usize() >> stride2] = true;
+                }
+                if state.is_match() {
+                    dfa.set_matches(newsid, nnfa.iter_matches(oldsid));
+                }
+                sparse_iter(
+                    nnfa,
+                    oldsid,
+                    &dfa.byte_classes,
+                    |_, class, oldnextsid| {
+                        let class = usize::from(class);
+                        if oldnextsid == noncontiguous::NFA::FAIL {
+                            dfa.trans[newsid.as_usize() + class] = DFA::DEAD;
+                        } else {
+                            dfa.trans[newsid.as_usize() + class] = oldnextsid;
+                        }
+                    },
+                );
+                newsid = next_dfa_id(newsid);
+            } else {
+                let unewsid = newsid;
+                newsid = next_dfa_id(newsid);
+                let anewsid = newsid;
+                newsid = next_dfa_id(newsid);
+
+                remap_unanchored[oldsid] = unewsid;
+                remap_anchored[oldsid] = anewsid;
+                is_anchored[anewsid.as_usize() >> stride2] = true;
+                if state.is_match() {
+                    dfa.set_matches(unewsid, nnfa.iter_matches(oldsid));
+                    dfa.set_matches(anewsid, nnfa.iter_matches(oldsid));
+                }
+                sparse_iter(
+                    nnfa,
+                    oldsid,
+                    &dfa.byte_classes,
+                    |byte, class, oldnextsid| {
+                        let class = usize::from(class);
+                        if oldnextsid == noncontiguous::NFA::FAIL {
+                            dfa.trans[unewsid.as_usize() + class] = nnfa
+                                .next_state(Anchored::No, state.fail(), byte);
+                        } else {
+                            dfa.trans[unewsid.as_usize() + class] = oldnextsid;
+                            dfa.trans[anewsid.as_usize() + class] = oldnextsid;
+                        }
+                    },
+                );
+            }
+        }
+        for i in 0..dfa.state_len {
+            let sid = i << stride2;
+            if is_anchored[i] {
+                for next in dfa.trans[sid..][..stride].iter_mut() {
+                    *next = remap_anchored[*next];
+                }
+            } else {
+                for next in dfa.trans[sid..][..stride].iter_mut() {
+                    *next = remap_unanchored[*next];
+                }
+            }
+        }
+        // Now that we've remapped all the IDs in our states, all that's left
+        // is remapping the special state IDs.
+        let old = nnfa.special();
+        let new = &mut dfa.special;
+        new.max_special_id = remap_anchored[old.max_special_id];
+        new.max_match_id = remap_anchored[old.max_match_id];
+        new.start_unanchored_id = remap_unanchored[old.start_unanchored_id];
+        new.start_anchored_id = remap_anchored[old.start_anchored_id];
+    }
+
+    /// Set the desired match semantics.
+    ///
+    /// This only applies when using [`Builder::build`] and not
+    /// [`Builder::build_from_noncontiguous`].
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::match_kind`](crate::AhoCorasickBuilder::match_kind)
+    /// for more documentation and examples.
+    pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder {
+        self.noncontiguous.match_kind(kind);
+        self
+    }
+
+    /// Enable ASCII-aware case insensitive matching.
+    ///
+    /// This only applies when using [`Builder::build`] and not
+    /// [`Builder::build_from_noncontiguous`].
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::ascii_case_insensitive`](crate::AhoCorasickBuilder::ascii_case_insensitive)
+    /// for more documentation and examples.
+    pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder {
+        self.noncontiguous.ascii_case_insensitive(yes);
+        self
+    }
+
+    /// Enable heuristic prefilter optimizations.
+    ///
+    /// This only applies when using [`Builder::build`] and not
+    /// [`Builder::build_from_noncontiguous`].
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::prefilter`](crate::AhoCorasickBuilder::prefilter)
+    /// for more documentation and examples.
+    pub fn prefilter(&mut self, yes: bool) -> &mut Builder {
+        self.noncontiguous.prefilter(yes);
+        self
+    }
+
+    /// Sets the starting state configuration for the automaton.
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind)
+    /// for more documentation and examples.
+    pub fn start_kind(&mut self, kind: StartKind) -> &mut Builder {
+        self.start_kind = kind;
+        self
+    }
+
+    /// A debug setting for whether to attempt to shrink the size of the
+    /// automaton's alphabet or not.
+    ///
+    /// This should never be enabled unless you're debugging an automaton.
+    /// Namely, disabling byte classes makes transitions easier to reason
+    /// about, since they use the actual bytes instead of equivalence classes.
+    /// Disabling this confers no performance benefit at search time.
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::byte_classes`](crate::AhoCorasickBuilder::byte_classes)
+    /// for more documentation and examples.
+    pub fn byte_classes(&mut self, yes: bool) -> &mut Builder {
+        self.byte_classes = yes;
+        self
+    }
+}
+
+/// Iterate over all possible equivalence class transitions in this state.
+/// The closure is called for all transitions with a distinct equivalence
+/// class, even those not explicitly represented in this sparse state. For
+/// any implicitly defined transitions, the given closure is called with
+/// the fail state ID.
+///
+/// The closure is guaranteed to be called precisely
+/// `byte_classes.alphabet_len()` times, once for every possible class in
+/// ascending order.
+fn sparse_iter<F: FnMut(u8, u8, StateID)>(
+    nnfa: &noncontiguous::NFA,
+    oldsid: StateID,
+    classes: &ByteClasses,
+    mut f: F,
+) {
+    let mut prev_class = None;
+    let mut byte = 0usize;
+    for t in nnfa.iter_trans(oldsid) {
+        while byte < usize::from(t.byte()) {
+            let rep = byte.as_u8();
+            let class = classes.get(rep);
+            byte += 1;
+            if prev_class != Some(class) {
+                f(rep, class, noncontiguous::NFA::FAIL);
+                prev_class = Some(class);
+            }
+        }
+        let rep = t.byte();
+        let class = classes.get(rep);
+        byte += 1;
+        if prev_class != Some(class) {
+            f(rep, class, t.next());
+            prev_class = Some(class);
+        }
+    }
+    for b in byte..=255 {
+        let rep = b.as_u8();
+        let class = classes.get(rep);
+        if prev_class != Some(class) {
+            f(rep, class, noncontiguous::NFA::FAIL);
+            prev_class = Some(class);
+        }
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/lib.rs b/third_party/rust/aho-corasick/src/lib.rs
new file mode 100644
index 0000000000..20e8b81115
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/lib.rs
@@ -0,0 +1,326 @@
+/*!
+A library for finding occurrences of many patterns at once. This library
+provides multiple pattern search principally through an implementation of the
+[Aho-Corasick algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm),
+which builds a fast finite state machine for executing searches in linear time.
+
+Additionally, this library provides a number of configuration options for
+building the automaton that permit controlling the space versus time trade
+off. Other features include simple ASCII case insensitive matching, finding
+overlapping matches, replacements, searching streams and even searching and
+replacing text in streams.
+
+Finally, unlike most other Aho-Corasick implementations, this one
+supports enabling [leftmost-first](MatchKind::LeftmostFirst) or
+[leftmost-longest](MatchKind::LeftmostLongest) match semantics, using a
+(seemingly) novel alternative construction algorithm. For more details on what
+match semantics means, see the [`MatchKind`] type.
+
+# Overview
+
+This section gives a brief overview of the primary types in this crate:
+
+* [`AhoCorasick`] is the primary type and represents an Aho-Corasick automaton.
+This is the type you use to execute searches.
+* [`AhoCorasickBuilder`] can be used to build an Aho-Corasick automaton, and
+supports configuring a number of options.
+* [`Match`] represents a single match reported by an Aho-Corasick automaton.
+Each match has two pieces of information: the pattern that matched and the
+start and end byte offsets corresponding to the position in the haystack at
+which it matched.
+
+# Example: basic searching
+
+This example shows how to search for occurrences of multiple patterns
+simultaneously. Each match includes the pattern that matched along with the
+byte offsets of the match.
+
+```
+use aho_corasick::{AhoCorasick, PatternID};
+
+let patterns = &["apple", "maple", "Snapple"];
+let haystack = "Nobody likes maple in their apple flavored Snapple.";
+
+let ac = AhoCorasick::new(patterns).unwrap();
+let mut matches = vec![];
+for mat in ac.find_iter(haystack) {
+    matches.push((mat.pattern(), mat.start(), mat.end()));
+}
+assert_eq!(matches, vec![
+    (PatternID::must(1), 13, 18),
+    (PatternID::must(0), 28, 33),
+    (PatternID::must(2), 43, 50),
+]);
+```
+
+# Example: case insensitivity
+
+This is like the previous example, but matches `Snapple` case insensitively
+using `AhoCorasickBuilder`:
+
+```
+use aho_corasick::{AhoCorasick, PatternID};
+
+let patterns = &["apple", "maple", "snapple"];
+let haystack = "Nobody likes maple in their apple flavored Snapple.";
+
+let ac = AhoCorasick::builder()
+    .ascii_case_insensitive(true)
+    .build(patterns)
+    .unwrap();
+let mut matches = vec![];
+for mat in ac.find_iter(haystack) {
+    matches.push((mat.pattern(), mat.start(), mat.end()));
+}
+assert_eq!(matches, vec![
+    (PatternID::must(1), 13, 18),
+    (PatternID::must(0), 28, 33),
+    (PatternID::must(2), 43, 50),
+]);
+```
+
+# Example: replacing matches in a stream
+
+This example shows how to execute a search and replace on a stream without
+loading the entire stream into memory first.
+
+```
+# #[cfg(feature = "std")] {
+use aho_corasick::AhoCorasick;
+
+# fn example() -> Result<(), std::io::Error> {
+let patterns = &["fox", "brown", "quick"];
+let replace_with = &["sloth", "grey", "slow"];
+
+// In a real example, these might be `std::fs::File`s instead. All you need to
+// do is supply a pair of `std::io::Read` and `std::io::Write` implementations.
+let rdr = "The quick brown fox.";
+let mut wtr = vec![];
+
+let ac = AhoCorasick::new(patterns).unwrap();
+ac.try_stream_replace_all(rdr.as_bytes(), &mut wtr, replace_with)?;
+assert_eq!(b"The slow grey sloth.".to_vec(), wtr);
+# Ok(()) }; example().unwrap()
+# }
+```
+
+# Example: finding the leftmost first match
+
+In the textbook description of Aho-Corasick, its formulation is typically
+structured such that it reports all possible matches, even when they overlap
+with another. In many cases, overlapping matches may not be desired, such as
+the case of finding all successive non-overlapping matches like you might with
+a standard regular expression.
+
+Unfortunately the "obvious" way to modify the Aho-Corasick algorithm to do
+this doesn't always work in the expected way, since it will report matches as
+soon as they are seen. For example, consider matching the regex `Samwise|Sam`
+against the text `Samwise`. Most regex engines (that are Perl-like, or
+non-POSIX) will report `Samwise` as a match, but the standard Aho-Corasick
+algorithm modified for reporting non-overlapping matches will report `Sam`.
+
+A novel contribution of this library is the ability to change the match
+semantics of Aho-Corasick (without additional search time overhead) such that
+`Samwise` is reported instead. For example, here's the standard approach:
+
+```
+use aho_corasick::AhoCorasick;
+
+let patterns = &["Samwise", "Sam"];
+let haystack = "Samwise";
+
+let ac = AhoCorasick::new(patterns).unwrap();
+let mat = ac.find(haystack).expect("should have a match");
+assert_eq!("Sam", &haystack[mat.start()..mat.end()]);
+```
+
+And now here's the leftmost-first version, which matches how a Perl-like
+regex will work:
+
+```
+use aho_corasick::{AhoCorasick, MatchKind};
+
+let patterns = &["Samwise", "Sam"];
+let haystack = "Samwise";
+
+let ac = AhoCorasick::builder()
+    .match_kind(MatchKind::LeftmostFirst)
+    .build(patterns)
+    .unwrap();
+let mat = ac.find(haystack).expect("should have a match");
+assert_eq!("Samwise", &haystack[mat.start()..mat.end()]);
+```
+
+In addition to leftmost-first semantics, this library also supports
+leftmost-longest semantics, which match the POSIX behavior of a regular
+expression alternation. See [`MatchKind`] for more details.
+
+# Prefilters
+
+While an Aho-Corasick automaton can perform admirably when compared to more
+naive solutions, it is generally slower than more specialized algorithms that
+are accelerated using vector instructions such as SIMD.
+
+For that reason, this library will internally use a "prefilter" to attempt
+to accelerate searches when possible. Currently, this library has several
+different algorithms it might use depending on the patterns provided. Once the
+number of patterns gets too big, prefilters are no longer used.
+
+While a prefilter is generally good to have on by default since it works
+well in the common case, it can lead to less predictable or even sub-optimal
+performance in some cases. For that reason, prefilters can be explicitly
+disabled via [`AhoCorasickBuilder::prefilter`].
+
+# Lower level APIs
+
+This crate also provides several sub-modules that collectively expose many of
+the implementation details of the main [`AhoCorasick`] type. Most users of this
+library can completely ignore the submodules and their contents, but if you
+needed finer grained control, some parts of them may be useful to you. Here is
+a brief overview of each and why you might want to use them:
+
+* The [`packed`] sub-module contains a lower level API for using fast
+vectorized routines for finding a small number of patterns in a haystack.
+You might want to use this API when you want to completely side-step using
+Aho-Corasick automata. Otherwise, the fast vectorized routines are used
+automatically as prefilters for `AhoCorasick` searches whenever possible.
+* The [`automaton`] sub-module provides a lower level finite state
+machine interface that the various Aho-Corasick implementations in
+this crate implement. This sub-module's main contribution is the
+[`Automaton`](automaton::Automaton) trait, which permits manually walking the
+state transitions of an Aho-Corasick automaton.
+* The [`dfa`] and [`nfa`] sub-modules provide DFA and NFA implementations of
+the aforementioned `Automaton` trait. The main reason one might want to use
+these sub-modules is to get access to a type that implements the `Automaton`
+trait. (The top-level `AhoCorasick` type does not implement the `Automaton`
+trait.)
+
+As mentioned above, if you aren't sure whether you need these sub-modules,
+you should be able to safely ignore them and just focus on the [`AhoCorasick`]
+type.
+
+# Crate features
+
+This crate exposes a few features for controlling dependency usage and whether
+this crate can be used without the standard library.
+
+* **std** -
+  Enables support for the standard library. This feature is enabled by
+  default. When disabled, only `core` and `alloc` are used. At an API
+  level, enabling `std` enables `std::error::Error` trait impls for the
+  various error types, and higher level stream search routines such as
+  [`AhoCorasick::try_stream_find_iter`]. But the `std` feature is also required
+  to enable vectorized prefilters. Prefilters can greatly accelerate searches,
+  but generally only apply when the number of patterns is small (less than
+  ~100).
+* **perf-literal** -
+  Enables support for literal prefilters that use vectorized routines from
+  external crates. This feature is enabled by default. If you're only using
+  Aho-Corasick for large numbers of patterns or otherwise can abide lower
+  throughput when searching with a small number of patterns, then it is
+  reasonable to disable this feature.
+* **logging** -
+  Enables a dependency on the `log` crate and emits messages to aide in
+  diagnostics. This feature is disabled by default.
+*/
+
+#![no_std]
+#![deny(missing_docs)]
+#![deny(rustdoc::broken_intra_doc_links)]
+#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+
+extern crate alloc;
+#[cfg(any(test, feature = "std"))]
+extern crate std;
+
+#[cfg(doctest)]
+doc_comment::doctest!("../README.md");
+
+#[cfg(feature = "std")]
+pub use crate::ahocorasick::StreamFindIter;
+pub use crate::{
+    ahocorasick::{
+        AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, FindIter,
+        FindOverlappingIter,
+    },
+    util::{
+        error::{BuildError, MatchError, MatchErrorKind},
+        primitives::{PatternID, PatternIDError},
+        search::{Anchored, Input, Match, MatchKind, Span, StartKind},
+    },
+};
+
+#[macro_use]
+mod macros;
+
+mod ahocorasick;
+pub mod automaton;
+pub mod dfa;
+pub mod nfa;
+pub mod packed;
+#[cfg(test)]
+mod tests;
+// I wrote out the module for implementing fst::Automaton only to later realize
+// that this would make fst a public dependency and fst is not at 1.0 yet. I
+// decided to just keep the code in tree, but build it only during tests.
+//
+// TODO: I think I've changed my mind again. I'm considering pushing it out
+// into either a separate crate or into 'fst' directly as an optional feature.
+// #[cfg(test)]
+// #[allow(dead_code)]
+// mod transducer;
+pub(crate) mod util;
+
+#[cfg(test)]
+mod testoibits {
+    use std::panic::{RefUnwindSafe, UnwindSafe};
+
+    use super::*;
+
+    fn assert_all<T: Send + Sync + UnwindSafe + RefUnwindSafe>() {}
+
+    #[test]
+    fn oibits_main() {
+        assert_all::<AhoCorasick>();
+        assert_all::<AhoCorasickBuilder>();
+        assert_all::<AhoCorasickKind>();
+        assert_all::<FindIter>();
+        assert_all::<FindOverlappingIter>();
+
+        assert_all::<BuildError>();
+        assert_all::<MatchError>();
+        assert_all::<MatchErrorKind>();
+
+        assert_all::<Anchored>();
+        assert_all::<Input>();
+        assert_all::<Match>();
+        assert_all::<MatchKind>();
+        assert_all::<Span>();
+        assert_all::<StartKind>();
+    }
+
+    #[test]
+    fn oibits_automaton() {
+        use crate::{automaton, dfa::DFA};
+
+        assert_all::<automaton::FindIter<DFA>>();
+        assert_all::<automaton::FindOverlappingIter<DFA>>();
+        #[cfg(feature = "std")]
+        assert_all::<automaton::StreamFindIter<DFA, std::io::Stdin>>();
+        assert_all::<automaton::OverlappingState>();
+
+        assert_all::<automaton::Prefilter>();
+        assert_all::<automaton::Candidate>();
+    }
+
+    #[test]
+    fn oibits_packed() {
+        use crate::packed;
+
+        assert_all::<packed::Config>();
+        assert_all::<packed::Builder>();
+        assert_all::<packed::Searcher>();
+        assert_all::<packed::FindIter>();
+        assert_all::<packed::MatchKind>();
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/macros.rs b/third_party/rust/aho-corasick/src/macros.rs
new file mode 100644
index 0000000000..fc73e6eddd
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/macros.rs
@@ -0,0 +1,18 @@
+#![allow(unused_macros)]
+
+macro_rules! log {
+    ($($tt:tt)*) => {
+        #[cfg(feature = "logging")]
+        {
+            $($tt)*
+        }
+    }
+}
+
+macro_rules! debug {
+    ($($tt:tt)*) => { log!(log::debug!($($tt)*)) }
+}
+
+macro_rules! trace {
+    ($($tt:tt)*) => { log!(log::trace!($($tt)*)) }
+}
diff --git a/third_party/rust/aho-corasick/src/nfa/contiguous.rs b/third_party/rust/aho-corasick/src/nfa/contiguous.rs
new file mode 100644
index 0000000000..29c162107d
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/nfa/contiguous.rs
@@ -0,0 +1,1141 @@
+/*!
+Provides a contiguous NFA implementation of Aho-Corasick.
+
+This is a low-level API that generally only needs to be used in niche
+circumstances. When possible, prefer using [`AhoCorasick`](crate::AhoCorasick)
+instead of a contiguous NFA directly. Using an `NFA` directly is typically only
+necessary when one needs access to the [`Automaton`] trait implementation.
+*/
+
+use alloc::{vec, vec::Vec};
+
+use crate::{
+    automaton::Automaton,
+    nfa::noncontiguous,
+    util::{
+        alphabet::ByteClasses,
+        error::{BuildError, MatchError},
+        int::{Usize, U16, U32},
+        prefilter::Prefilter,
+        primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID},
+        search::{Anchored, MatchKind},
+        special::Special,
+    },
+};
+
+/// A contiguous NFA implementation of Aho-Corasick.
+///
+/// When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) instead of
+/// this type directly. Using an `NFA` directly is typically only necessary
+/// when one needs access to the [`Automaton`] trait implementation.
+///
+/// This NFA can only be built by first constructing a [`noncontiguous::NFA`].
+/// Both [`NFA::new`] and [`Builder::build`] do this for you automatically, but
+/// [`Builder::build_from_noncontiguous`] permits doing it explicitly.
+///
+/// The main difference between a noncontiguous NFA and a contiguous NFA is
+/// that the latter represents all of its states and transitions in a single
+/// allocation, where as the former uses a separate allocation for each state.
+/// Doing this at construction time while keeping a low memory footprint isn't
+/// feasible, which is primarily why there are two different NFA types: one
+/// that does the least amount of work possible to build itself, and another
+/// that does a little extra work to compact itself and make state transitions
+/// faster by making some states use a dense representation.
+///
+/// Because a contiguous NFA uses a single allocation, there is a lot more
+/// opportunity for compression tricks to reduce the heap memory used. Indeed,
+/// it is not uncommon for a contiguous NFA to use an order of magnitude less
+/// heap memory than a noncontiguous NFA. Since building a contiguous NFA
+/// usually only takes a fraction of the time it takes to build a noncontiguous
+/// NFA, the overall build time is not much slower. Thus, in most cases, a
+/// contiguous NFA is the best choice.
+///
+/// Since a contiguous NFA uses various tricks for compression and to achieve
+/// faster state transitions, currently, its limit on the number of states
+/// is somewhat smaller than what a noncontiguous NFA can achieve. Generally
+/// speaking, you shouldn't expect to run into this limit if the number of
+/// patterns is under 1 million. It is plausible that this limit will be
+/// increased in the future. If the limit is reached, building a contiguous NFA
+/// will return an error. Often, since building a contiguous NFA is relatively
+/// cheap, it can make sense to always try it even if you aren't sure if it
+/// will fail or not. If it does, you can always fall back to a noncontiguous
+/// NFA. (Indeed, the main [`AhoCorasick`](crate::AhoCorasick) type employs a
+/// strategy similar to this at construction time.)
+///
+/// # Example
+///
+/// This example shows how to build an `NFA` directly and use it to execute
+/// [`Automaton::try_find`]:
+///
+/// ```
+/// use aho_corasick::{
+///     automaton::Automaton,
+///     nfa::contiguous::NFA,
+///     Input, Match,
+/// };
+///
+/// let patterns = &["b", "abc", "abcd"];
+/// let haystack = "abcd";
+///
+/// let nfa = NFA::new(patterns).unwrap();
+/// assert_eq!(
+///     Some(Match::must(0, 1..2)),
+///     nfa.try_find(&Input::new(haystack))?,
+/// );
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// It is also possible to implement your own version of `try_find`. See the
+/// [`Automaton`] documentation for an example.
+#[derive(Clone)]
+pub struct NFA {
+    /// The raw NFA representation. Each state is packed with a header
+    /// (containing the format of the state, the failure transition and, for
+    /// a sparse state, the number of transitions), its transitions and any
+    /// matching pattern IDs for match states.
+    repr: Vec<u32>,
+    /// The length of each pattern. This is used to compute the start offset
+    /// of a match.
+    pattern_lens: Vec<SmallIndex>,
+    /// The total number of states in this NFA.
+    state_len: usize,
+    /// A prefilter for accelerating searches, if one exists.
+    prefilter: Option<Prefilter>,
+    /// The match semantics built into this NFA.
+    match_kind: MatchKind,
+    /// The alphabet size, or total number of equivalence classes, for this
+    /// NFA. Dense states always have this many transitions.
+    alphabet_len: usize,
+    /// The equivalence classes for this NFA. All transitions, dense and
+    /// sparse, are defined on equivalence classes and not on the 256 distinct
+    /// byte values.
+    byte_classes: ByteClasses,
+    /// The length of the shortest pattern in this automaton.
+    min_pattern_len: usize,
+    /// The length of the longest pattern in this automaton.
+    max_pattern_len: usize,
+    /// The information required to deduce which states are "special" in this
+    /// NFA.
+    special: Special,
+}
+
+impl NFA {
+    /// Create a new Aho-Corasick contiguous NFA using the default
+    /// configuration.
+    ///
+    /// Use a [`Builder`] if you want to change the configuration.
+    pub fn new<I, P>(patterns: I) -> Result<NFA, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        NFA::builder().build(patterns)
+    }
+
+    /// A convenience method for returning a new Aho-Corasick contiguous NFA
+    /// builder.
+    ///
+    /// This usually permits one to just import the `NFA` type.
+    pub fn builder() -> Builder {
+        Builder::new()
+    }
+}
+
+impl NFA {
+    /// A sentinel state ID indicating that a search should stop once it has
+    /// entered this state. When a search stops, it returns a match if one
+    /// has been found, otherwise no match. A contiguous NFA always has an
+    /// actual dead state at this ID.
+    const DEAD: StateID = StateID::new_unchecked(0);
+    /// Another sentinel state ID indicating that a search should move through
+    /// current state's failure transition.
+    ///
+    /// Note that unlike DEAD, this does not actually point to a valid state
+    /// in a contiguous NFA. (noncontiguous::NFA::FAIL does point to a valid
+    /// state.) Instead, this points to the position that is guaranteed to
+    /// never be a valid state ID (by making sure it points to a place in the
+    /// middle of the encoding of the DEAD state). Since we never need to
+    /// actually look at the FAIL state itself, this works out.
+    ///
+    /// By why do it this way? So that FAIL is a constant. I don't have any
+    /// concrete evidence that this materially helps matters, but it's easy to
+    /// do. The alternative would be making the FAIL ID point to the second
+    /// state, which could be made a constant but is a little trickier to do.
+    /// The easiest path is to just make the FAIL state a runtime value, but
+    /// since comparisons with FAIL occur in perf critical parts of the search,
+    /// we want it to be as tight as possible and not waste any registers.
+    ///
+    /// Very hand wavy... But the code complexity that results from this is
+    /// very mild.
+    const FAIL: StateID = StateID::new_unchecked(1);
+}
+
+// SAFETY: 'start_state' always returns a valid state ID, 'next_state' always
+// returns a valid state ID given a valid state ID. We otherwise claim that
+// all other methods are correct as well.
+unsafe impl Automaton for NFA {
+    #[inline(always)]
+    fn start_state(&self, anchored: Anchored) -> Result<StateID, MatchError> {
+        match anchored {
+            Anchored::No => Ok(self.special.start_unanchored_id),
+            Anchored::Yes => Ok(self.special.start_anchored_id),
+        }
+    }
+
+    #[inline(always)]
+    fn next_state(
+        &self,
+        anchored: Anchored,
+        mut sid: StateID,
+        byte: u8,
+    ) -> StateID {
+        let repr = &self.repr;
+        let class = self.byte_classes.get(byte);
+        let u32tosid = StateID::from_u32_unchecked;
+        loop {
+            let o = sid.as_usize();
+            let kind = repr[o] & 0xFF;
+            // I tried to encapsulate the "next transition" logic into its own
+            // function, but it seemed to always result in sub-optimal codegen
+            // that led to real and significant slowdowns. So we just inline
+            // the logic here.
+            //
+            // I've also tried a lot of different ways to speed up this
+            // routine, and most of them have failed.
+            if kind == State::KIND_DENSE {
+                let next = u32tosid(repr[o + 2 + usize::from(class)]);
+                if next != NFA::FAIL {
+                    return next;
+                }
+            } else if kind == State::KIND_ONE {
+                if class == repr[o].low_u16().high_u8() {
+                    return u32tosid(repr[o + 2]);
+                }
+            } else {
+                // NOTE: I tried a SWAR technique in the loop below, but found
+                // it slower. See the 'swar' test in the tests for this module.
+                let trans_len = kind.as_usize();
+                let classes_len = u32_len(trans_len);
+                let trans_offset = o + 2 + classes_len;
+                for (i, &chunk) in
+                    repr[o + 2..][..classes_len].iter().enumerate()
+                {
+                    let classes = chunk.to_ne_bytes();
+                    if classes[0] == class {
+                        return u32tosid(repr[trans_offset + i * 4]);
+                    }
+                    if classes[1] == class {
+                        return u32tosid(repr[trans_offset + i * 4 + 1]);
+                    }
+                    if classes[2] == class {
+                        return u32tosid(repr[trans_offset + i * 4 + 2]);
+                    }
+                    if classes[3] == class {
+                        return u32tosid(repr[trans_offset + i * 4 + 3]);
+                    }
+                }
+            }
+            // For an anchored search, we never follow failure transitions
+            // because failure transitions lead us down a path to matching
+            // a *proper* suffix of the path we were on. Thus, it can only
+            // produce matches that appear after the beginning of the search.
+            if anchored.is_anchored() {
+                return NFA::DEAD;
+            }
+            sid = u32tosid(repr[o + 1]);
+        }
+    }
+
+    #[inline(always)]
+    fn is_special(&self, sid: StateID) -> bool {
+        sid <= self.special.max_special_id
+    }
+
+    #[inline(always)]
+    fn is_dead(&self, sid: StateID) -> bool {
+        sid == NFA::DEAD
+    }
+
+    #[inline(always)]
+    fn is_match(&self, sid: StateID) -> bool {
+        !self.is_dead(sid) && sid <= self.special.max_match_id
+    }
+
+    #[inline(always)]
+    fn is_start(&self, sid: StateID) -> bool {
+        sid == self.special.start_unanchored_id
+            || sid == self.special.start_anchored_id
+    }
+
+    #[inline(always)]
+    fn match_kind(&self) -> MatchKind {
+        self.match_kind
+    }
+
+    #[inline(always)]
+    fn patterns_len(&self) -> usize {
+        self.pattern_lens.len()
+    }
+
+    #[inline(always)]
+    fn pattern_len(&self, pid: PatternID) -> usize {
+        self.pattern_lens[pid].as_usize()
+    }
+
+    #[inline(always)]
+    fn min_pattern_len(&self) -> usize {
+        self.min_pattern_len
+    }
+
+    #[inline(always)]
+    fn max_pattern_len(&self) -> usize {
+        self.max_pattern_len
+    }
+
+    #[inline(always)]
+    fn match_len(&self, sid: StateID) -> usize {
+        State::match_len(self.alphabet_len, &self.repr[sid.as_usize()..])
+    }
+
+    #[inline(always)]
+    fn match_pattern(&self, sid: StateID, index: usize) -> PatternID {
+        State::match_pattern(
+            self.alphabet_len,
+            &self.repr[sid.as_usize()..],
+            index,
+        )
+    }
+
+    #[inline(always)]
+    fn memory_usage(&self) -> usize {
+        use core::mem::size_of;
+
+        (self.repr.len() * size_of::<u32>())
+            + (self.pattern_lens.len() * size_of::<SmallIndex>())
+            + self.prefilter.as_ref().map_or(0, |p| p.memory_usage())
+    }
+
+    #[inline(always)]
+    fn prefilter(&self) -> Option<&Prefilter> {
+        self.prefilter.as_ref()
+    }
+}
+
+impl core::fmt::Debug for NFA {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        use crate::automaton::fmt_state_indicator;
+
+        writeln!(f, "contiguous::NFA(")?;
+        let mut sid = NFA::DEAD; // always the first state and always present
+        loop {
+            let raw = &self.repr[sid.as_usize()..];
+            if raw.is_empty() {
+                break;
+            }
+            let is_match = self.is_match(sid);
+            let state = State::read(self.alphabet_len, is_match, raw);
+            fmt_state_indicator(f, self, sid)?;
+            write!(
+                f,
+                "{:06}({:06}): ",
+                sid.as_usize(),
+                state.fail.as_usize()
+            )?;
+            state.fmt(f)?;
+            write!(f, "\n")?;
+            if self.is_match(sid) {
+                write!(f, "         matches: ")?;
+                for i in 0..state.match_len {
+                    let pid = State::match_pattern(self.alphabet_len, raw, i);
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    write!(f, "{}", pid.as_usize())?;
+                }
+                write!(f, "\n")?;
+            }
+            // The FAIL state doesn't actually have space for a state allocated
+            // for it, so we have to treat it as a special case. write below
+            // the DEAD state.
+            if sid == NFA::DEAD {
+                writeln!(f, "F {:06}:", NFA::FAIL.as_usize())?;
+            }
+            let len = State::len(self.alphabet_len, is_match, raw);
+            sid = StateID::new(sid.as_usize().checked_add(len).unwrap())
+                .unwrap();
+        }
+        writeln!(f, "match kind: {:?}", self.match_kind)?;
+        writeln!(f, "prefilter: {:?}", self.prefilter.is_some())?;
+        writeln!(f, "state length: {:?}", self.state_len)?;
+        writeln!(f, "pattern length: {:?}", self.patterns_len())?;
+        writeln!(f, "shortest pattern length: {:?}", self.min_pattern_len)?;
+        writeln!(f, "longest pattern length: {:?}", self.max_pattern_len)?;
+        writeln!(f, "alphabet length: {:?}", self.alphabet_len)?;
+        writeln!(f, "byte classes: {:?}", self.byte_classes)?;
+        writeln!(f, "memory usage: {:?}", self.memory_usage())?;
+        writeln!(f, ")")?;
+
+        Ok(())
+    }
+}
+
+/// The "in memory" representation a single dense or sparse state.
+///
+/// A `State`'s in memory representation is not ever actually materialized
+/// during a search with a contiguous NFA. Doing so would be too slow. (Indeed,
+/// the only time a `State` is actually constructed is in `Debug` impls.)
+/// Instead, a `State` exposes a number of static methods for reading certain
+/// things from the raw binary encoding of the state.
+#[derive(Clone)]
+struct State<'a> {
+    /// The state to transition to when 'class_to_next' yields a transition
+    /// to the FAIL state.
+    fail: StateID,
+    /// The number of pattern IDs in this state. For a non-match state, this is
+    /// always zero. Otherwise it is always bigger than zero.
+    match_len: usize,
+    /// The sparse or dense representation of the transitions for this state.
+    trans: StateTrans<'a>,
+}
+
+/// The underlying representation of sparse or dense transitions for a state.
+///
+/// Note that like `State`, we don't typically construct values of this type
+/// during a search since we don't always need all values and thus would
+/// represent a lot of wasteful work.
+#[derive(Clone)]
+enum StateTrans<'a> {
+    /// A sparse representation of transitions for a state, where only non-FAIL
+    /// transitions are explicitly represented.
+    Sparse {
+        classes: &'a [u32],
+        /// The transitions for this state, where each transition is packed
+        /// into a u32. The low 8 bits correspond to the byte class for the
+        /// transition, and the high 24 bits correspond to the next state ID.
+        ///
+        /// This packing is why the max state ID allowed for a contiguous
+        /// NFA is 2^24-1.
+        nexts: &'a [u32],
+    },
+    /// A "one transition" state that is never a match state.
+    ///
+    /// These are by far the most common state, so we use a specialized and
+    /// very compact representation for them.
+    One {
+        /// The element of this NFA's alphabet that this transition is
+        /// defined for.
+        class: u8,
+        /// The state this should transition to if the current symbol is
+        /// equal to 'class'.
+        next: u32,
+    },
+    /// A dense representation of transitions for a state, where all
+    /// transitions are explicitly represented, including transitions to the
+    /// FAIL state.
+    Dense {
+        /// A dense set of transitions to other states. The transitions may
+        /// point to a FAIL state, in which case, the search should try the
+        /// same transition lookup at 'fail'.
+        ///
+        /// Note that this is indexed by byte equivalence classes and not
+        /// byte values. That means 'class_to_next[byte]' is wrong and
+        /// 'class_to_next[classes.get(byte)]' is correct. The number of
+        /// transitions is always equivalent to 'classes.alphabet_len()'.
+        class_to_next: &'a [u32],
+    },
+}
+
+impl<'a> State<'a> {
+    /// The offset of where the "kind" of a state is stored. If it isn't one
+    /// of the sentinel values below, then it's a sparse state and the kind
+    /// corresponds to the number of transitions in the state.
+    const KIND: usize = 0;
+
+    /// A sentinel value indicating that the state uses a dense representation.
+    const KIND_DENSE: u32 = 0xFF;
+    /// A sentinel value indicating that the state uses a special "one
+    /// transition" encoding. In practice, non-match states with one transition
+    /// make up the overwhelming majority of all states in any given
+    /// Aho-Corasick automaton, so we can specialize them using a very compact
+    /// representation.
+    const KIND_ONE: u32 = 0xFE;
+
+    /// The maximum number of transitions to encode as a sparse state. Usually
+    /// states with a lot of transitions are either very rare, or occur near
+    /// the start state. In the latter case, they are probably dense already
+    /// anyway. In the former case, making them dense is fine because they're
+    /// rare.
+    ///
+    /// This needs to be small enough to permit each of the sentinel values for
+    /// 'KIND' above. Namely, a sparse state embeds the number of transitions
+    /// into the 'KIND'. Basically, "sparse" is a state kind too, but it's the
+    /// "else" branch.
+    ///
+    /// N.B. There isn't anything particularly magical about 127 here. I
+    /// just picked it because I figured any sparse state with this many
+    /// transitions is going to be exceptionally rare, and if it did have this
+    /// many transitions, then it would be quite slow to do a linear scan on
+    /// the transitions during a search anyway.
+    const MAX_SPARSE_TRANSITIONS: usize = 127;
+
+    /// Remap state IDs in-place.
+    ///
+    /// `state` should be the the raw binary encoding of a state. (The start
+    /// of the slice must correspond to the start of the state, but the slice
+    /// may extend past the end of the encoding of the state.)
+    fn remap(
+        alphabet_len: usize,
+        old_to_new: &[StateID],
+        state: &mut [u32],
+    ) -> Result<(), BuildError> {
+        let kind = State::kind(state);
+        if kind == State::KIND_DENSE {
+            state[1] = old_to_new[state[1].as_usize()].as_u32();
+            for next in state[2..][..alphabet_len].iter_mut() {
+                *next = old_to_new[next.as_usize()].as_u32();
+            }
+        } else if kind == State::KIND_ONE {
+            state[1] = old_to_new[state[1].as_usize()].as_u32();
+            state[2] = old_to_new[state[2].as_usize()].as_u32();
+        } else {
+            let trans_len = State::sparse_trans_len(state);
+            let classes_len = u32_len(trans_len);
+            state[1] = old_to_new[state[1].as_usize()].as_u32();
+            for next in state[2 + classes_len..][..trans_len].iter_mut() {
+                *next = old_to_new[next.as_usize()].as_u32();
+            }
+        }
+        Ok(())
+    }
+
+    /// Returns the length, in number of u32s, of this state.
+    ///
+    /// This is useful for reading states consecutively, e.g., in the Debug
+    /// impl without needing to store a separate map from state index to state
+    /// identifier.
+    ///
+    /// `state` should be the the raw binary encoding of a state. (The start
+    /// of the slice must correspond to the start of the state, but the slice
+    /// may extend past the end of the encoding of the state.)
+    fn len(alphabet_len: usize, is_match: bool, state: &[u32]) -> usize {
+        let kind_len = 1;
+        let fail_len = 1;
+        let kind = State::kind(state);
+        let (classes_len, trans_len) = if kind == State::KIND_DENSE {
+            (0, alphabet_len)
+        } else if kind == State::KIND_ONE {
+            (0, 1)
+        } else {
+            let trans_len = State::sparse_trans_len(state);
+            let classes_len = u32_len(trans_len);
+            (classes_len, trans_len)
+        };
+        let match_len = if !is_match {
+            0
+        } else if State::match_len(alphabet_len, state) == 1 {
+            // This is a special case because when there is one pattern ID for
+            // a match state, it is represented by a single u32 with its high
+            // bit set (which is impossible for a valid pattern ID).
+            1
+        } else {
+            // We add 1 to include the u32 that indicates the number of
+            // pattern IDs that follow.
+            1 + State::match_len(alphabet_len, state)
+        };
+        kind_len + fail_len + classes_len + trans_len + match_len
+    }
+
+    /// Returns the kind of this state.
+    ///
+    /// This only includes the low byte.
+    #[inline(always)]
+    fn kind(state: &[u32]) -> u32 {
+        state[State::KIND] & 0xFF
+    }
+
+    /// Get the number of sparse transitions in this state. This can never
+    /// be more than State::MAX_SPARSE_TRANSITIONS, as all states with more
+    /// transitions are encoded as dense states.
+    ///
+    /// `state` should be the the raw binary encoding of a sparse state. (The
+    /// start of the slice must correspond to the start of the state, but the
+    /// slice may extend past the end of the encoding of the state.) If this
+    /// isn't a sparse state, then the return value is unspecified.
+    ///
+    /// Do note that this is only legal to call on a sparse state. So for
+    /// example, "one transition" state is not a sparse state, so it would not
+    /// be legal to call this method on such a state.
+    #[inline(always)]
+    fn sparse_trans_len(state: &[u32]) -> usize {
+        (state[State::KIND] & 0xFF).as_usize()
+    }
+
+    /// Returns the total number of matching pattern IDs in this state. Calling
+    /// this on a state that isn't a match results in unspecified behavior.
+    /// Thus, the returned number is never 0 for all correct calls.
+    ///
+    /// `state` should be the the raw binary encoding of a state. (The start
+    /// of the slice must correspond to the start of the state, but the slice
+    /// may extend past the end of the encoding of the state.)
+    #[inline(always)]
+    fn match_len(alphabet_len: usize, state: &[u32]) -> usize {
+        // We don't need to handle KIND_ONE here because it can never be a
+        // match state.
+        let packed = if State::kind(state) == State::KIND_DENSE {
+            let start = 2 + alphabet_len;
+            state[start].as_usize()
+        } else {
+            let trans_len = State::sparse_trans_len(state);
+            let classes_len = u32_len(trans_len);
+            let start = 2 + classes_len + trans_len;
+            state[start].as_usize()
+        };
+        if packed & (1 << 31) == 0 {
+            packed
+        } else {
+            1
+        }
+    }
+
+    /// Returns the pattern ID corresponding to the given index for the state
+    /// given. The `index` provided must be less than the number of pattern IDs
+    /// in this state.
+    ///
+    /// `state` should be the the raw binary encoding of a state. (The start of
+    /// the slice must correspond to the start of the state, but the slice may
+    /// extend past the end of the encoding of the state.)
+    ///
+    /// If the given state is not a match state or if the index is out of
+    /// bounds, then this has unspecified behavior.
+    #[inline(always)]
+    fn match_pattern(
+        alphabet_len: usize,
+        state: &[u32],
+        index: usize,
+    ) -> PatternID {
+        // We don't need to handle KIND_ONE here because it can never be a
+        // match state.
+        let start = if State::kind(state) == State::KIND_DENSE {
+            2 + alphabet_len
+        } else {
+            let trans_len = State::sparse_trans_len(state);
+            let classes_len = u32_len(trans_len);
+            2 + classes_len + trans_len
+        };
+        let packed = state[start];
+        let pid = if packed & (1 << 31) == 0 {
+            state[start + 1 + index]
+        } else {
+            assert_eq!(0, index);
+            packed & !(1 << 31)
+        };
+        PatternID::from_u32_unchecked(pid)
+    }
+
+    /// Read a state's binary encoding to its in-memory representation.
+    ///
+    /// `alphabet_len` should be the total number of transitions defined for
+    /// dense states.
+    ///
+    /// `is_match` should be true if this state is a match state and false
+    /// otherwise.
+    ///
+    /// `state` should be the the raw binary encoding of a state. (The start
+    /// of the slice must correspond to the start of the state, but the slice
+    /// may extend past the end of the encoding of the state.)
+    fn read(
+        alphabet_len: usize,
+        is_match: bool,
+        state: &'a [u32],
+    ) -> State<'a> {
+        let kind = State::kind(state);
+        let match_len =
+            if !is_match { 0 } else { State::match_len(alphabet_len, state) };
+        let (trans, fail) = if kind == State::KIND_DENSE {
+            let fail = StateID::from_u32_unchecked(state[1]);
+            let class_to_next = &state[2..][..alphabet_len];
+            (StateTrans::Dense { class_to_next }, fail)
+        } else if kind == State::KIND_ONE {
+            let fail = StateID::from_u32_unchecked(state[1]);
+            let class = state[State::KIND].low_u16().high_u8();
+            let next = state[2];
+            (StateTrans::One { class, next }, fail)
+        } else {
+            let fail = StateID::from_u32_unchecked(state[1]);
+            let trans_len = State::sparse_trans_len(state);
+            let classes_len = u32_len(trans_len);
+            let classes = &state[2..][..classes_len];
+            let nexts = &state[2 + classes_len..][..trans_len];
+            (StateTrans::Sparse { classes, nexts }, fail)
+        };
+        State { fail, match_len, trans }
+    }
+
+    /// Encode the "old" state from a noncontiguous NFA to its binary
+    /// representation to the given `dst` slice. `classes` should be the byte
+    /// classes computed for the noncontiguous NFA that the given state came
+    /// from.
+    ///
+    /// This returns an error if `dst` became so big that `StateID`s can no
+    /// longer be created for new states. Otherwise, it returns the state ID of
+    /// the new state created.
+    ///
+    /// When `force_dense` is true, then the encoded state will always use a
+    /// dense format. Otherwise, the choice between dense and sparse will be
+    /// automatically chosen based on the old state.
+    fn write(
+        nnfa: &noncontiguous::NFA,
+        oldsid: StateID,
+        old: &noncontiguous::State,
+        classes: &ByteClasses,
+        dst: &mut Vec<u32>,
+        force_dense: bool,
+    ) -> Result<StateID, BuildError> {
+        let sid = StateID::new(dst.len()).map_err(|e| {
+            BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted())
+        })?;
+        let old_len = nnfa.iter_trans(oldsid).count();
+        // For states with a lot of transitions, we might as well just make
+        // them dense. These kinds of hot states tend to be very rare, so we're
+        // okay with it. This also gives us more sentinels in the state's
+        // 'kind', which lets us create different state kinds to save on
+        // space.
+        let kind = if force_dense || old_len > State::MAX_SPARSE_TRANSITIONS {
+            State::KIND_DENSE
+        } else if old_len == 1 && !old.is_match() {
+            State::KIND_ONE
+        } else {
+            // For a sparse state, the kind is just the number of transitions.
+            u32::try_from(old_len).unwrap()
+        };
+        if kind == State::KIND_DENSE {
+            dst.push(kind);
+            dst.push(old.fail().as_u32());
+            State::write_dense_trans(nnfa, oldsid, classes, dst)?;
+        } else if kind == State::KIND_ONE {
+            let t = nnfa.iter_trans(oldsid).next().unwrap();
+            let class = u32::from(classes.get(t.byte()));
+            dst.push(kind | (class << 8));
+            dst.push(old.fail().as_u32());
+            dst.push(t.next().as_u32());
+        } else {
+            dst.push(kind);
+            dst.push(old.fail().as_u32());
+            State::write_sparse_trans(nnfa, oldsid, classes, dst)?;
+        }
+        // Now finally write the number of matches and the matches themselves.
+        if old.is_match() {
+            let matches_len = nnfa.iter_matches(oldsid).count();
+            if matches_len == 1 {
+                let pid = nnfa.iter_matches(oldsid).next().unwrap().as_u32();
+                assert_eq!(0, pid & (1 << 31));
+                dst.push((1 << 31) | pid);
+            } else {
+                assert_eq!(0, matches_len & (1 << 31));
+                dst.push(matches_len.as_u32());
+                dst.extend(nnfa.iter_matches(oldsid).map(|pid| pid.as_u32()));
+            }
+        }
+        Ok(sid)
+    }
+
+    /// Encode the "old" state transitions from a noncontiguous NFA to its
+    /// binary sparse representation to the given `dst` slice. `classes` should
+    /// be the byte classes computed for the noncontiguous NFA that the given
+    /// state came from.
+    ///
+    /// This returns an error if `dst` became so big that `StateID`s can no
+    /// longer be created for new states.
+    fn write_sparse_trans(
+        nnfa: &noncontiguous::NFA,
+        oldsid: StateID,
+        classes: &ByteClasses,
+        dst: &mut Vec<u32>,
+    ) -> Result<(), BuildError> {
+        let (mut chunk, mut len) = ([0; 4], 0);
+        for t in nnfa.iter_trans(oldsid) {
+            chunk[len] = classes.get(t.byte());
+            len += 1;
+            if len == 4 {
+                dst.push(u32::from_ne_bytes(chunk));
+                chunk = [0; 4];
+                len = 0;
+            }
+        }
+        if len > 0 {
+            // In the case where the number of transitions isn't divisible
+            // by 4, the last u32 chunk will have some left over room. In
+            // this case, we "just" repeat the last equivalence class. By
+            // doing this, we know the leftover faux transitions will never
+            // be followed because if they were, it would have been followed
+            // prior to it in the last equivalence class. This saves us some
+            // branching in the search time state transition code.
+            let repeat = chunk[len - 1];
+            while len < 4 {
+                chunk[len] = repeat;
+                len += 1;
+            }
+            dst.push(u32::from_ne_bytes(chunk));
+        }
+        for t in nnfa.iter_trans(oldsid) {
+            dst.push(t.next().as_u32());
+        }
+        Ok(())
+    }
+
+    /// Encode the "old" state transitions from a noncontiguous NFA to its
+    /// binary dense representation to the given `dst` slice. `classes` should
+    /// be the byte classes computed for the noncontiguous NFA that the given
+    /// state came from.
+    ///
+    /// This returns an error if `dst` became so big that `StateID`s can no
+    /// longer be created for new states.
+    fn write_dense_trans(
+        nnfa: &noncontiguous::NFA,
+        oldsid: StateID,
+        classes: &ByteClasses,
+        dst: &mut Vec<u32>,
+    ) -> Result<(), BuildError> {
+        // Our byte classes let us shrink the size of our dense states to the
+        // number of equivalence classes instead of just fixing it to 256.
+        // Any non-explicitly defined transition is just a transition to the
+        // FAIL state, so we fill that in first and then overwrite them with
+        // explicitly defined transitions. (Most states probably only have one
+        // or two explicitly defined transitions.)
+        //
+        // N.B. Remember that while building the contiguous NFA, we use state
+        // IDs from the noncontiguous NFA. It isn't until we've added all
+        // states that we go back and map noncontiguous IDs to contiguous IDs.
+        let start = dst.len();
+        dst.extend(
+            core::iter::repeat(noncontiguous::NFA::FAIL.as_u32())
+                .take(classes.alphabet_len()),
+        );
+        assert!(start < dst.len(), "equivalence classes are never empty");
+        for t in nnfa.iter_trans(oldsid) {
+            dst[start + usize::from(classes.get(t.byte()))] =
+                t.next().as_u32();
+        }
+        Ok(())
+    }
+
+    /// Return an iterator over every explicitly defined transition in this
+    /// state.
+    fn transitions<'b>(&'b self) -> impl Iterator<Item = (u8, StateID)> + 'b {
+        let mut i = 0;
+        core::iter::from_fn(move || match self.trans {
+            StateTrans::Sparse { classes, nexts } => {
+                if i >= nexts.len() {
+                    return None;
+                }
+                let chunk = classes[i / 4];
+                let class = chunk.to_ne_bytes()[i % 4];
+                let next = StateID::from_u32_unchecked(nexts[i]);
+                i += 1;
+                Some((class, next))
+            }
+            StateTrans::One { class, next } => {
+                if i == 0 {
+                    i += 1;
+                    Some((class, StateID::from_u32_unchecked(next)))
+                } else {
+                    None
+                }
+            }
+            StateTrans::Dense { class_to_next } => {
+                if i >= class_to_next.len() {
+                    return None;
+                }
+                let class = i.as_u8();
+                let next = StateID::from_u32_unchecked(class_to_next[i]);
+                i += 1;
+                Some((class, next))
+            }
+        })
+    }
+}
+
+impl<'a> core::fmt::Debug for State<'a> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        use crate::{automaton::sparse_transitions, util::debug::DebugByte};
+
+        let it = sparse_transitions(self.transitions())
+            // Writing out all FAIL transitions is quite noisy. Instead, we
+            // just require readers of the output to assume anything absent
+            // maps to the FAIL transition.
+            .filter(|&(_, _, sid)| sid != NFA::FAIL)
+            .enumerate();
+        for (i, (start, end, sid)) in it {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            if start == end {
+                write!(f, "{:?} => {:?}", DebugByte(start), sid.as_usize())?;
+            } else {
+                write!(
+                    f,
+                    "{:?}-{:?} => {:?}",
+                    DebugByte(start),
+                    DebugByte(end),
+                    sid.as_usize()
+                )?;
+            }
+        }
+        Ok(())
+    }
+}
+
+/// A builder for configuring an Aho-Corasick contiguous NFA.
+///
+/// This builder has a subset of the options available to a
+/// [`AhoCorasickBuilder`](crate::AhoCorasickBuilder). Of the shared options,
+/// their behavior is identical.
+#[derive(Clone, Debug)]
+pub struct Builder {
+    noncontiguous: noncontiguous::Builder,
+    dense_depth: usize,
+    byte_classes: bool,
+}
+
+impl Default for Builder {
+    fn default() -> Builder {
+        Builder {
+            noncontiguous: noncontiguous::Builder::new(),
+            dense_depth: 2,
+            byte_classes: true,
+        }
+    }
+}
+
+impl Builder {
+    /// Create a new builder for configuring an Aho-Corasick contiguous NFA.
+    pub fn new() -> Builder {
+        Builder::default()
+    }
+
+    /// Build an Aho-Corasick contiguous NFA from the given iterator of
+    /// patterns.
+    ///
+    /// A builder may be reused to create more NFAs.
+    pub fn build<I, P>(&self, patterns: I) -> Result<NFA, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        let nnfa = self.noncontiguous.build(patterns)?;
+        self.build_from_noncontiguous(&nnfa)
+    }
+
+    /// Build an Aho-Corasick contiguous NFA from the given noncontiguous NFA.
+    ///
+    /// Note that when this method is used, only the `dense_depth` and
+    /// `byte_classes` settings on this builder are respected. The other
+    /// settings only apply to the initial construction of the Aho-Corasick
+    /// automaton. Since using this method requires that initial construction
+    /// has already completed, all settings impacting only initial construction
+    /// are no longer relevant.
+    pub fn build_from_noncontiguous(
+        &self,
+        nnfa: &noncontiguous::NFA,
+    ) -> Result<NFA, BuildError> {
+        debug!("building contiguous NFA");
+        let byte_classes = if self.byte_classes {
+            nnfa.byte_classes().clone()
+        } else {
+            ByteClasses::singletons()
+        };
+        let mut index_to_state_id = vec![NFA::DEAD; nnfa.states().len()];
+        let mut nfa = NFA {
+            repr: vec![],
+            pattern_lens: nnfa.pattern_lens_raw().to_vec(),
+            state_len: nnfa.states().len(),
+            prefilter: nnfa.prefilter().map(|p| p.clone()),
+            match_kind: nnfa.match_kind(),
+            alphabet_len: byte_classes.alphabet_len(),
+            byte_classes,
+            min_pattern_len: nnfa.min_pattern_len(),
+            max_pattern_len: nnfa.max_pattern_len(),
+            // The special state IDs are set later.
+            special: Special::zero(),
+        };
+        for (oldsid, state) in nnfa.states().iter().with_state_ids() {
+            // We don't actually encode a fail state since it isn't necessary.
+            // But we still want to make sure any FAIL ids are mapped
+            // correctly.
+            if oldsid == noncontiguous::NFA::FAIL {
+                index_to_state_id[oldsid] = NFA::FAIL;
+                continue;
+            }
+            let force_dense = state.depth().as_usize() < self.dense_depth;
+            let newsid = State::write(
+                nnfa,
+                oldsid,
+                state,
+                &nfa.byte_classes,
+                &mut nfa.repr,
+                force_dense,
+            )?;
+            index_to_state_id[oldsid] = newsid;
+        }
+        for &newsid in index_to_state_id.iter() {
+            if newsid == NFA::FAIL {
+                continue;
+            }
+            let state = &mut nfa.repr[newsid.as_usize()..];
+            State::remap(nfa.alphabet_len, &index_to_state_id, state)?;
+        }
+        // Now that we've remapped all the IDs in our states, all that's left
+        // is remapping the special state IDs.
+        let remap = &index_to_state_id;
+        let old = nnfa.special();
+        let new = &mut nfa.special;
+        new.max_special_id = remap[old.max_special_id];
+        new.max_match_id = remap[old.max_match_id];
+        new.start_unanchored_id = remap[old.start_unanchored_id];
+        new.start_anchored_id = remap[old.start_anchored_id];
+        debug!(
+            "contiguous NFA built, <states: {:?}, size: {:?}, \
+             alphabet len: {:?}>",
+            nfa.state_len,
+            nfa.memory_usage(),
+            nfa.byte_classes.alphabet_len(),
+        );
+        // The vectors can grow ~twice as big during construction because a
+        // Vec amortizes growth. But here, let's shrink things back down to
+        // what we actually need since we're never going to add more to it.
+        nfa.repr.shrink_to_fit();
+        nfa.pattern_lens.shrink_to_fit();
+        Ok(nfa)
+    }
+
+    /// Set the desired match semantics.
+    ///
+    /// This only applies when using [`Builder::build`] and not
+    /// [`Builder::build_from_noncontiguous`].
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::match_kind`](crate::AhoCorasickBuilder::match_kind)
+    /// for more documentation and examples.
+    pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder {
+        self.noncontiguous.match_kind(kind);
+        self
+    }
+
+    /// Enable ASCII-aware case insensitive matching.
+    ///
+    /// This only applies when using [`Builder::build`] and not
+    /// [`Builder::build_from_noncontiguous`].
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::ascii_case_insensitive`](crate::AhoCorasickBuilder::ascii_case_insensitive)
+    /// for more documentation and examples.
+    pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder {
+        self.noncontiguous.ascii_case_insensitive(yes);
+        self
+    }
+
+    /// Enable heuristic prefilter optimizations.
+    ///
+    /// This only applies when using [`Builder::build`] and not
+    /// [`Builder::build_from_noncontiguous`].
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::prefilter`](crate::AhoCorasickBuilder::prefilter)
+    /// for more documentation and examples.
+    pub fn prefilter(&mut self, yes: bool) -> &mut Builder {
+        self.noncontiguous.prefilter(yes);
+        self
+    }
+
+    /// Set the limit on how many states use a dense representation for their
+    /// transitions. Other states will generally use a sparse representation.
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::dense_depth`](crate::AhoCorasickBuilder::dense_depth)
+    /// for more documentation and examples.
+    pub fn dense_depth(&mut self, depth: usize) -> &mut Builder {
+        self.dense_depth = depth;
+        self
+    }
+
+    /// A debug setting for whether to attempt to shrink the size of the
+    /// automaton's alphabet or not.
+    ///
+    /// This should never be enabled unless you're debugging an automaton.
+    /// Namely, disabling byte classes makes transitions easier to reason
+    /// about, since they use the actual bytes instead of equivalence classes.
+    /// Disabling this confers no performance benefit at search time.
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::byte_classes`](crate::AhoCorasickBuilder::byte_classes)
+    /// for more documentation and examples.
+    pub fn byte_classes(&mut self, yes: bool) -> &mut Builder {
+        self.byte_classes = yes;
+        self
+    }
+}
+
+/// Computes the number of u32 values needed to represent one byte per the
+/// number of transitions given.
+fn u32_len(ntrans: usize) -> usize {
+    if ntrans % 4 == 0 {
+        ntrans >> 2
+    } else {
+        (ntrans >> 2) + 1
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    // This test demonstrates a SWAR technique I tried in the sparse transition
+    // code inside of 'next_state'. Namely, sparse transitions work by
+    // iterating over u32 chunks, with each chunk containing up to 4 classes
+    // corresponding to 4 transitions. This SWAR technique lets us find a
+    // matching transition without converting the u32 to a [u8; 4].
+    //
+    // It turned out to be a little slower unfortunately, which isn't too
+    // surprising, since this is likely a throughput oriented optimization.
+    // Loop unrolling doesn't really help us because the vast majority of
+    // states have very few transitions.
+    //
+    // Anyway, this code was a little tricky to write, so I converted it to a
+    // test in case someone figures out how to use it more effectively than
+    // I could.
+    //
+    // (This also only works on little endian. So big endian would need to be
+    // accounted for if we ever decided to use this I think.)
+    #[cfg(target_endian = "little")]
+    #[test]
+    fn swar() {
+        use super::*;
+
+        fn has_zero_byte(x: u32) -> u32 {
+            const LO_U32: u32 = 0x01010101;
+            const HI_U32: u32 = 0x80808080;
+
+            x.wrapping_sub(LO_U32) & !x & HI_U32
+        }
+
+        fn broadcast(b: u8) -> u32 {
+            (u32::from(b)) * (u32::MAX / 255)
+        }
+
+        fn index_of(x: u32) -> usize {
+            let o =
+                (((x - 1) & 0x01010101).wrapping_mul(0x01010101) >> 24) - 1;
+            o.as_usize()
+        }
+
+        let bytes: [u8; 4] = [b'1', b'A', b'a', b'z'];
+        let chunk = u32::from_ne_bytes(bytes);
+
+        let needle = broadcast(b'1');
+        assert_eq!(0, index_of(has_zero_byte(needle ^ chunk)));
+        let needle = broadcast(b'A');
+        assert_eq!(1, index_of(has_zero_byte(needle ^ chunk)));
+        let needle = broadcast(b'a');
+        assert_eq!(2, index_of(has_zero_byte(needle ^ chunk)));
+        let needle = broadcast(b'z');
+        assert_eq!(3, index_of(has_zero_byte(needle ^ chunk)));
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/nfa/mod.rs b/third_party/rust/aho-corasick/src/nfa/mod.rs
new file mode 100644
index 0000000000..93f4dc25c2
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/nfa/mod.rs
@@ -0,0 +1,40 @@
+/*!
+Provides direct access to NFA implementations of Aho-Corasick.
+
+The principle characteristic of an NFA in this crate is that it may
+transition through multiple states per byte of haystack. In Aho-Corasick
+parlance, NFAs follow failure transitions during a search. In contrast,
+a [`DFA`](crate::dfa::DFA) pre-computes all failure transitions during
+compilation at the expense of a much bigger memory footprint.
+
+Currently, there are two NFA implementations provided: noncontiguous and
+contiguous. The names reflect their internal representation, and consequently,
+the trade offs associated with them:
+
+* A [`noncontiguous::NFA`] uses a separate allocation for every NFA state to
+represent its transitions in a sparse format. This is ideal for building an
+NFA, since it cheaply permits different states to have a different number of
+transitions. A noncontiguous NFA is where the main Aho-Corasick construction
+algorithm is implemented. All other Aho-Corasick implementations are built by
+first constructing a noncontiguous NFA.
+* A [`contiguous::NFA`] is uses a single allocation to represent all states,
+while still encoding most states as sparse states but permitting states near
+the starting state to have a dense representation. The dense representation
+uses more memory, but permits computing transitions during a search more
+quickly. By only making the most active states dense (the states near the
+starting state), a contiguous NFA better balances memory usage with search
+speed. The single contiguous allocation also uses less overhead per state and
+enables compression tricks where most states only use 8 bytes of heap memory.
+
+When given the choice between these two, you almost always want to pick a
+contiguous NFA. It takes only a little longer to build, but both its memory
+usage and search speed are typically much better than a noncontiguous NFA. A
+noncontiguous NFA is useful when prioritizing build times, or when there are
+so many patterns that a contiguous NFA could not be built. (Currently, because
+of both memory and search speed improvements, a contiguous NFA has a smaller
+internal limit on the total number of NFA states it can represent. But you
+would likely need to have hundreds of thousands or even millions of patterns
+before you hit this limit.)
+*/
+pub mod contiguous;
+pub mod noncontiguous;
diff --git a/third_party/rust/aho-corasick/src/nfa/noncontiguous.rs b/third_party/rust/aho-corasick/src/nfa/noncontiguous.rs
new file mode 100644
index 0000000000..af32617c90
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/nfa/noncontiguous.rs
@@ -0,0 +1,1762 @@
+/*!
+Provides a noncontiguous NFA implementation of Aho-Corasick.
+
+This is a low-level API that generally only needs to be used in niche
+circumstances. When possible, prefer using [`AhoCorasick`](crate::AhoCorasick)
+instead of a noncontiguous NFA directly. Using an `NFA` directly is typically
+only necessary when one needs access to the [`Automaton`] trait implementation.
+*/
+
+use alloc::{
+    collections::{BTreeSet, VecDeque},
+    vec,
+    vec::Vec,
+};
+
+use crate::{
+    automaton::Automaton,
+    util::{
+        alphabet::{ByteClassSet, ByteClasses},
+        error::{BuildError, MatchError},
+        prefilter::{self, opposite_ascii_case, Prefilter},
+        primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID},
+        remapper::Remapper,
+        search::{Anchored, MatchKind},
+        special::Special,
+    },
+};
+
+/// A noncontiguous NFA implementation of Aho-Corasick.
+///
+/// When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) instead of
+/// this type directly. Using an `NFA` directly is typically only necessary
+/// when one needs access to the [`Automaton`] trait implementation.
+///
+/// This NFA represents the "core" implementation of Aho-Corasick in this
+/// crate. Namely, constructing this NFA involving building a trie and then
+/// filling in the failure transitions between states, similar to what is
+/// described in any standard textbook description of Aho-Corasick.
+///
+/// In order to minimize heap usage and to avoid additional construction costs,
+/// this implementation represents the transitions of all states as distinct
+/// sparse memory allocations. This is where it gets its name from. That is,
+/// this NFA has no contiguous memory allocation for its transition table. Each
+/// state gets its own allocation.
+///
+/// While the sparse representation keeps memory usage to somewhat reasonable
+/// levels, it is still quite large and also results in somewhat mediocre
+/// search performance. For this reason, it is almost always a good idea to
+/// use a [`contiguous::NFA`](crate::nfa::contiguous::NFA) instead. It is
+/// marginally slower to build, but has higher throughput and can sometimes use
+/// an order of magnitude less memory. The main reason to use a noncontiguous
+/// NFA is when you need the fastest possible construction time, or when a
+/// contiguous NFA does not have the desired capacity. (The total number of NFA
+/// states it can have is fewer than a noncontiguous NFA.)
+///
+/// # Example
+///
+/// This example shows how to build an `NFA` directly and use it to execute
+/// [`Automaton::try_find`]:
+///
+/// ```
+/// use aho_corasick::{
+///     automaton::Automaton,
+///     nfa::noncontiguous::NFA,
+///     Input, Match,
+/// };
+///
+/// let patterns = &["b", "abc", "abcd"];
+/// let haystack = "abcd";
+///
+/// let nfa = NFA::new(patterns).unwrap();
+/// assert_eq!(
+///     Some(Match::must(0, 1..2)),
+///     nfa.try_find(&Input::new(haystack))?,
+/// );
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// It is also possible to implement your own version of `try_find`. See the
+/// [`Automaton`] documentation for an example.
+#[derive(Clone)]
+pub struct NFA {
+    /// The match semantics built into this NFA.
+    match_kind: MatchKind,
+    /// A set of states. Each state defines its own transitions, a fail
+    /// transition and a set of indices corresponding to matches.
+    ///
+    /// The first state is always the fail state, which is used only as a
+    /// sentinel. Namely, in the final NFA, no transition into the fail state
+    /// exists. (Well, they do, but they aren't followed. Instead, the state's
+    /// failure transition is followed.)
+    ///
+    /// The second state (index 1) is always the dead state. Dead states are
+    /// in every automaton, but only used when leftmost-{first,longest} match
+    /// semantics are enabled. Specifically, they instruct search to stop
+    /// at specific points in order to report the correct match location. In
+    /// the standard Aho-Corasick construction, there are no transitions to
+    /// the dead state.
+    ///
+    /// The third state (index 2) is generally intended to be the starting or
+    /// "root" state.
+    states: Vec<State>,
+    /// Transitions stored in a sparse representation via a linked list.
+    ///
+    /// Each transition contains three pieces of information: the byte it
+    /// is defined for, the state it transitions to and a link to the next
+    /// transition in the same state (or `StateID::ZERO` if it is the last
+    /// transition).
+    ///
+    /// The first transition for each state is determined by `State::sparse`.
+    ///
+    /// Note that this contains a complete set of all transitions in this NFA,
+    /// including states that have a dense representation for transitions.
+    /// (Adding dense transitions for a state doesn't remove its sparse
+    /// transitions, since deleting transitions from this particular sparse
+    /// representation would be fairly expensive.)
+    sparse: Vec<Transition>,
+    /// Transitions stored in a dense representation.
+    ///
+    /// A state has a row in this table if and only if `State::dense` is
+    /// not equal to `StateID::ZERO`. When not zero, there are precisely
+    /// `NFA::byte_classes::alphabet_len()` entries beginning at `State::dense`
+    /// in this table.
+    ///
+    /// Generally a very small minority of states have a dense representation
+    /// since it uses so much memory.
+    dense: Vec<StateID>,
+    /// Matches stored in linked list for each state.
+    ///
+    /// Like sparse transitions, each match has a link to the next match in the
+    /// state.
+    ///
+    /// The first match for each state is determined by `State::matches`.
+    matches: Vec<Match>,
+    /// The length, in bytes, of each pattern in this NFA. This slice is
+    /// indexed by `PatternID`.
+    ///
+    /// The number of entries in this vector corresponds to the total number of
+    /// patterns in this automaton.
+    pattern_lens: Vec<SmallIndex>,
+    /// A prefilter for quickly skipping to candidate matches, if pertinent.
+    prefilter: Option<Prefilter>,
+    /// A set of equivalence classes in terms of bytes. We compute this while
+    /// building the NFA, but don't use it in the NFA's states. Instead, we
+    /// use this for building the DFA. We store it on the NFA since it's easy
+    /// to compute while visiting the patterns.
+    byte_classes: ByteClasses,
+    /// The length, in bytes, of the shortest pattern in this automaton. This
+    /// information is useful for detecting whether an automaton matches the
+    /// empty string or not.
+    min_pattern_len: usize,
+    /// The length, in bytes, of the longest pattern in this automaton. This
+    /// information is useful for keeping correct buffer sizes when searching
+    /// on streams.
+    max_pattern_len: usize,
+    /// The information required to deduce which states are "special" in this
+    /// NFA.
+    ///
+    /// Since the DEAD and FAIL states are always the first two states and
+    /// there are only ever two start states (which follow all of the match
+    /// states), it follows that we can determine whether a state is a fail,
+    /// dead, match or start with just a few comparisons on the ID itself:
+    ///
+    ///    is_dead(sid): sid == NFA::DEAD
+    ///    is_fail(sid): sid == NFA::FAIL
+    ///   is_match(sid): NFA::FAIL < sid && sid <= max_match_id
+    ///   is_start(sid): sid == start_unanchored_id || sid == start_anchored_id
+    ///
+    /// Note that this only applies to the NFA after it has been constructed.
+    /// During construction, the start states are the first ones added and the
+    /// match states are inter-leaved with non-match states. Once all of the
+    /// states have been added, the states are shuffled such that the above
+    /// predicates hold.
+    special: Special,
+}
+
+impl NFA {
+    /// Create a new Aho-Corasick noncontiguous NFA using the default
+    /// configuration.
+    ///
+    /// Use a [`Builder`] if you want to change the configuration.
+    pub fn new<I, P>(patterns: I) -> Result<NFA, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        NFA::builder().build(patterns)
+    }
+
+    /// A convenience method for returning a new Aho-Corasick noncontiguous NFA
+    /// builder.
+    ///
+    /// This usually permits one to just import the `NFA` type.
+    pub fn builder() -> Builder {
+        Builder::new()
+    }
+}
+
+impl NFA {
+    /// The DEAD state is a sentinel state like the FAIL state. The DEAD state
+    /// instructs any search to stop and return any currently recorded match,
+    /// or no match otherwise. Generally speaking, it is impossible for an
+    /// unanchored standard search to enter a DEAD state. But an anchored
+    /// search can, and so to can a leftmost search.
+    ///
+    /// We put DEAD before FAIL so that DEAD is always 0. We repeat this
+    /// decision across the other Aho-Corasicm automata, so that DEAD
+    /// states there are always 0 too. It's not that we need all of the
+    /// implementations to agree, but rather, the contiguous NFA and the DFA
+    /// use a sort of "premultiplied" state identifier where the only state
+    /// whose ID is always known and constant is the first state. Subsequent
+    /// state IDs depend on how much space has already been used in the
+    /// transition table.
+    pub(crate) const DEAD: StateID = StateID::new_unchecked(0);
+    /// The FAIL state mostly just corresponds to the ID of any transition on a
+    /// state that isn't explicitly defined. When one transitions into the FAIL
+    /// state, one must follow the previous state's failure transition before
+    /// doing the next state lookup. In this way, FAIL is more of a sentinel
+    /// than a state that one actually transitions into. In particular, it is
+    /// never exposed in the `Automaton` interface.
+    pub(crate) const FAIL: StateID = StateID::new_unchecked(1);
+
+    /// Returns the equivalence classes of bytes found while constructing
+    /// this NFA.
+    ///
+    /// Note that the NFA doesn't actually make use of these equivalence
+    /// classes. Instead, these are useful for building the DFA when desired.
+    pub(crate) fn byte_classes(&self) -> &ByteClasses {
+        &self.byte_classes
+    }
+
+    /// Returns a slice containing the length of each pattern in this searcher.
+    /// It is indexed by `PatternID` and has length `NFA::patterns_len`.
+    ///
+    /// This is exposed for convenience when building a contiguous NFA. But it
+    /// can be reconstructed from the `Automaton` API if necessary.
+    pub(crate) fn pattern_lens_raw(&self) -> &[SmallIndex] {
+        &self.pattern_lens
+    }
+
+    /// Returns a slice of all states in this non-contiguous NFA.
+    pub(crate) fn states(&self) -> &[State] {
+        &self.states
+    }
+
+    /// Returns the underlying "special" state information for this NFA.
+    pub(crate) fn special(&self) -> &Special {
+        &self.special
+    }
+
+    /// Swaps the states at `id1` and `id2`.
+    ///
+    /// This does not update the transitions of any state to account for the
+    /// state swap.
+    pub(crate) fn swap_states(&mut self, id1: StateID, id2: StateID) {
+        self.states.swap(id1.as_usize(), id2.as_usize());
+    }
+
+    /// Re-maps all state IDs in this NFA according to the `map` function
+    /// given.
+    pub(crate) fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
+        let alphabet_len = self.byte_classes.alphabet_len();
+        for state in self.states.iter_mut() {
+            state.fail = map(state.fail);
+            let mut link = state.sparse;
+            while link != StateID::ZERO {
+                let t = &mut self.sparse[link];
+                t.next = map(t.next);
+                link = t.link;
+            }
+            if state.dense != StateID::ZERO {
+                let start = state.dense.as_usize();
+                for next in self.dense[start..][..alphabet_len].iter_mut() {
+                    *next = map(*next);
+                }
+            }
+        }
+    }
+
+    /// Iterate over all of the transitions for the given state ID.
+    pub(crate) fn iter_trans(
+        &self,
+        sid: StateID,
+    ) -> impl Iterator<Item = Transition> + '_ {
+        let mut link = self.states[sid].sparse;
+        core::iter::from_fn(move || {
+            if link == StateID::ZERO {
+                return None;
+            }
+            let t = self.sparse[link];
+            link = t.link;
+            Some(t)
+        })
+    }
+
+    /// Iterate over all of the matches for the given state ID.
+    pub(crate) fn iter_matches(
+        &self,
+        sid: StateID,
+    ) -> impl Iterator<Item = PatternID> + '_ {
+        let mut link = self.states[sid].matches;
+        core::iter::from_fn(move || {
+            if link == StateID::ZERO {
+                return None;
+            }
+            let m = self.matches[link];
+            link = m.link;
+            Some(m.pid)
+        })
+    }
+
+    /// Return the link following the one given. If the one given is the last
+    /// link for the given state, then return `None`.
+    ///
+    /// If no previous link is given, then this returns the first link in the
+    /// state, if one exists.
+    ///
+    /// This is useful for manually iterating over the transitions in a single
+    /// state without borrowing the NFA. This permits mutating other parts of
+    /// the NFA during iteration. Namely, one can access the transition pointed
+    /// to by the link via `self.sparse[link]`.
+    fn next_link(
+        &self,
+        sid: StateID,
+        prev: Option<StateID>,
+    ) -> Option<StateID> {
+        let link =
+            prev.map_or(self.states[sid].sparse, |p| self.sparse[p].link);
+        if link == StateID::ZERO {
+            None
+        } else {
+            Some(link)
+        }
+    }
+
+    /// Follow the transition for the given byte in the given state. If no such
+    /// transition exists, then the FAIL state ID is returned.
+    #[inline(always)]
+    fn follow_transition(&self, sid: StateID, byte: u8) -> StateID {
+        let s = &self.states[sid];
+        // This is a special case that targets starting states and states
+        // near a start state. Namely, after the initial trie is constructed,
+        // we look for states close to the start state to convert to a dense
+        // representation for their transitions. This winds up using a lot more
+        // memory per state in exchange for faster transition lookups. But
+        // since we only do this for a small number of states (by default), the
+        // memory usage is usually minimal.
+        //
+        // This has *massive* benefit when executing searches because the
+        // unanchored starting state is by far the hottest state and is
+        // frequently visited. Moreover, the 'for' loop below that works
+        // decently on an actually sparse state is disastrous on a state that
+        // is nearly or completely dense.
+        if s.dense == StateID::ZERO {
+            self.follow_transition_sparse(sid, byte)
+        } else {
+            let class = usize::from(self.byte_classes.get(byte));
+            self.dense[s.dense.as_usize() + class]
+        }
+    }
+
+    /// Like `follow_transition`, but always uses the sparse representation.
+    #[inline(always)]
+    fn follow_transition_sparse(&self, sid: StateID, byte: u8) -> StateID {
+        for t in self.iter_trans(sid) {
+            if byte <= t.byte {
+                if byte == t.byte {
+                    return t.next;
+                }
+                break;
+            }
+        }
+        NFA::FAIL
+    }
+
+    /// Set the transition for the given byte to the state ID given.
+    ///
+    /// Note that one should not set transitions to the FAIL state. It is not
+    /// technically incorrect, but it wastes space. If a transition is not
+    /// defined, then it is automatically assumed to lead to the FAIL state.
+    fn add_transition(
+        &mut self,
+        prev: StateID,
+        byte: u8,
+        next: StateID,
+    ) -> Result<(), BuildError> {
+        if self.states[prev].dense != StateID::ZERO {
+            let dense = self.states[prev].dense;
+            let class = usize::from(self.byte_classes.get(byte));
+            self.dense[dense.as_usize() + class] = next;
+        }
+
+        let head = self.states[prev].sparse;
+        if head == StateID::ZERO || byte < self.sparse[head].byte {
+            let new_link = self.alloc_transition()?;
+            self.sparse[new_link] = Transition { byte, next, link: head };
+            self.states[prev].sparse = new_link;
+            return Ok(());
+        } else if byte == self.sparse[head].byte {
+            self.sparse[head].next = next;
+            return Ok(());
+        }
+
+        // We handled the only cases where the beginning of the transition
+        // chain needs to change. At this point, we now know that there is
+        // at least one entry in the transition chain and the byte for that
+        // transition is less than the byte for the transition we're adding.
+        let (mut link_prev, mut link_next) = (head, self.sparse[head].link);
+        while link_next != StateID::ZERO && byte > self.sparse[link_next].byte
+        {
+            link_prev = link_next;
+            link_next = self.sparse[link_next].link;
+        }
+        if link_next == StateID::ZERO || byte < self.sparse[link_next].byte {
+            let link = self.alloc_transition()?;
+            self.sparse[link] = Transition { byte, next, link: link_next };
+            self.sparse[link_prev].link = link;
+        } else {
+            assert_eq!(byte, self.sparse[link_next].byte);
+            self.sparse[link_next].next = next;
+        }
+        Ok(())
+    }
+
+    /// This sets every possible transition (all 255 of them) for the given
+    /// state to the name `next` value.
+    ///
+    /// This is useful for efficiently initializing start/dead states.
+    ///
+    /// # Panics
+    ///
+    /// This requires that the state has no transitions added to it already.
+    /// If it has any transitions, then this panics. It will also panic if
+    /// the state has been densified prior to calling this.
+    fn init_full_state(
+        &mut self,
+        prev: StateID,
+        next: StateID,
+    ) -> Result<(), BuildError> {
+        assert_eq!(
+            StateID::ZERO,
+            self.states[prev].dense,
+            "state must not be dense yet"
+        );
+        assert_eq!(
+            StateID::ZERO,
+            self.states[prev].sparse,
+            "state must have zero transitions"
+        );
+        let mut prev_link = StateID::ZERO;
+        for byte in 0..=255 {
+            let new_link = self.alloc_transition()?;
+            self.sparse[new_link] =
+                Transition { byte, next, link: StateID::ZERO };
+            if prev_link == StateID::ZERO {
+                self.states[prev].sparse = new_link;
+            } else {
+                self.sparse[prev_link].link = new_link;
+            }
+            prev_link = new_link;
+        }
+        Ok(())
+    }
+
+    /// Add a match for the given pattern ID to the state for the given ID.
+    fn add_match(
+        &mut self,
+        sid: StateID,
+        pid: PatternID,
+    ) -> Result<(), BuildError> {
+        let head = self.states[sid].matches;
+        let mut link = head;
+        while self.matches[link].link != StateID::ZERO {
+            link = self.matches[link].link;
+        }
+        let new_match_link = self.alloc_match()?;
+        self.matches[new_match_link].pid = pid;
+        if link == StateID::ZERO {
+            self.states[sid].matches = new_match_link;
+        } else {
+            self.matches[link].link = new_match_link;
+        }
+        Ok(())
+    }
+
+    /// Copy matches from the `src` state to the `dst` state. This is useful
+    /// when a match state can be reached via a failure transition. In which
+    /// case, you'll want to copy the matches (if any) from the state reached
+    /// by the failure transition to the original state you were at.
+    fn copy_matches(
+        &mut self,
+        src: StateID,
+        dst: StateID,
+    ) -> Result<(), BuildError> {
+        let head_dst = self.states[dst].matches;
+        let mut link_dst = head_dst;
+        while self.matches[link_dst].link != StateID::ZERO {
+            link_dst = self.matches[link_dst].link;
+        }
+        let mut link_src = self.states[src].matches;
+        while link_src != StateID::ZERO {
+            let new_match_link =
+                StateID::new(self.matches.len()).map_err(|e| {
+                    BuildError::state_id_overflow(
+                        StateID::MAX.as_u64(),
+                        e.attempted(),
+                    )
+                })?;
+            self.matches.push(Match {
+                pid: self.matches[link_src].pid,
+                link: StateID::ZERO,
+            });
+            if link_dst == StateID::ZERO {
+                self.states[dst].matches = new_match_link;
+            } else {
+                self.matches[link_dst].link = new_match_link;
+            }
+
+            link_dst = new_match_link;
+            link_src = self.matches[link_src].link;
+        }
+        Ok(())
+    }
+
+    /// Create a new entry in `NFA::trans`, if there's room, and return that
+    /// entry's ID. If there's no room, then an error is returned.
+    fn alloc_transition(&mut self) -> Result<StateID, BuildError> {
+        let id = StateID::new(self.sparse.len()).map_err(|e| {
+            BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted())
+        })?;
+        self.sparse.push(Transition::default());
+        Ok(id)
+    }
+
+    /// Create a new entry in `NFA::matches`, if there's room, and return that
+    /// entry's ID. If there's no room, then an error is returned.
+    fn alloc_match(&mut self) -> Result<StateID, BuildError> {
+        let id = StateID::new(self.matches.len()).map_err(|e| {
+            BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted())
+        })?;
+        self.matches.push(Match::default());
+        Ok(id)
+    }
+
+    /// Create a new set of `N` transitions in this NFA's dense transition
+    /// table. The ID return corresponds to the index at which the `N`
+    /// transitions begin. So `id+0` is the first transition and `id+(N-1)` is
+    /// the last.
+    ///
+    /// `N` is determined via `NFA::byte_classes::alphabet_len`.
+    fn alloc_dense_state(&mut self) -> Result<StateID, BuildError> {
+        let id = StateID::new(self.dense.len()).map_err(|e| {
+            BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted())
+        })?;
+        // We use FAIL because it's the correct default. If a state doesn't
+        // have a transition defined for every possible byte value, then the
+        // transition function should return NFA::FAIL.
+        self.dense.extend(
+            core::iter::repeat(NFA::FAIL)
+                .take(self.byte_classes.alphabet_len()),
+        );
+        Ok(id)
+    }
+
+    /// Allocate and add a fresh state to the underlying NFA and return its
+    /// ID (guaranteed to be one more than the ID of the previously allocated
+    /// state). If the ID would overflow `StateID`, then this returns an error.
+    fn alloc_state(&mut self, depth: usize) -> Result<StateID, BuildError> {
+        // This is OK because we error when building the trie if we see a
+        // pattern whose length cannot fit into a 'SmallIndex', and the longest
+        // possible depth corresponds to the length of the longest pattern.
+        let depth = SmallIndex::new(depth)
+            .expect("patterns longer than SmallIndex::MAX are not allowed");
+        let id = StateID::new(self.states.len()).map_err(|e| {
+            BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted())
+        })?;
+        self.states.push(State {
+            sparse: StateID::ZERO,
+            dense: StateID::ZERO,
+            matches: StateID::ZERO,
+            fail: self.special.start_unanchored_id,
+            depth,
+        });
+        Ok(id)
+    }
+}
+
+// SAFETY: 'start_state' always returns a valid state ID, 'next_state' always
+// returns a valid state ID given a valid state ID. We otherwise claim that
+// all other methods are correct as well.
+unsafe impl Automaton for NFA {
+    #[inline(always)]
+    fn start_state(&self, anchored: Anchored) -> Result<StateID, MatchError> {
+        match anchored {
+            Anchored::No => Ok(self.special.start_unanchored_id),
+            Anchored::Yes => Ok(self.special.start_anchored_id),
+        }
+    }
+
+    #[inline(always)]
+    fn next_state(
+        &self,
+        anchored: Anchored,
+        mut sid: StateID,
+        byte: u8,
+    ) -> StateID {
+        // This terminates since:
+        //
+        // 1. state.fail never points to the FAIL state.
+        // 2. All state.fail values point to a state closer to the start state.
+        // 3. The start state has no transitions to the FAIL state.
+        loop {
+            let next = self.follow_transition(sid, byte);
+            if next != NFA::FAIL {
+                return next;
+            }
+            // For an anchored search, we never follow failure transitions
+            // because failure transitions lead us down a path to matching
+            // a *proper* suffix of the path we were on. Thus, it can only
+            // produce matches that appear after the beginning of the search.
+            if anchored.is_anchored() {
+                return NFA::DEAD;
+            }
+            sid = self.states[sid].fail();
+        }
+    }
+
+    #[inline(always)]
+    fn is_special(&self, sid: StateID) -> bool {
+        sid <= self.special.max_special_id
+    }
+
+    #[inline(always)]
+    fn is_dead(&self, sid: StateID) -> bool {
+        sid == NFA::DEAD
+    }
+
+    #[inline(always)]
+    fn is_match(&self, sid: StateID) -> bool {
+        // N.B. This returns true when sid==NFA::FAIL but that's okay because
+        // NFA::FAIL is not actually a valid state ID from the perspective of
+        // the Automaton trait. Namely, it is never returned by 'start_state'
+        // or by 'next_state'. So we don't need to care about it here.
+        !self.is_dead(sid) && sid <= self.special.max_match_id
+    }
+
+    #[inline(always)]
+    fn is_start(&self, sid: StateID) -> bool {
+        sid == self.special.start_unanchored_id
+            || sid == self.special.start_anchored_id
+    }
+
+    #[inline(always)]
+    fn match_kind(&self) -> MatchKind {
+        self.match_kind
+    }
+
+    #[inline(always)]
+    fn patterns_len(&self) -> usize {
+        self.pattern_lens.len()
+    }
+
+    #[inline(always)]
+    fn pattern_len(&self, pid: PatternID) -> usize {
+        self.pattern_lens[pid].as_usize()
+    }
+
+    #[inline(always)]
+    fn min_pattern_len(&self) -> usize {
+        self.min_pattern_len
+    }
+
+    #[inline(always)]
+    fn max_pattern_len(&self) -> usize {
+        self.max_pattern_len
+    }
+
+    #[inline(always)]
+    fn match_len(&self, sid: StateID) -> usize {
+        self.iter_matches(sid).count()
+    }
+
+    #[inline(always)]
+    fn match_pattern(&self, sid: StateID, index: usize) -> PatternID {
+        self.iter_matches(sid).nth(index).unwrap()
+    }
+
+    #[inline(always)]
+    fn memory_usage(&self) -> usize {
+        self.states.len() * core::mem::size_of::<State>()
+            + self.sparse.len() * core::mem::size_of::<Transition>()
+            + self.matches.len() * core::mem::size_of::<Match>()
+            + self.dense.len() * StateID::SIZE
+            + self.pattern_lens.len() * SmallIndex::SIZE
+            + self.prefilter.as_ref().map_or(0, |p| p.memory_usage())
+    }
+
+    #[inline(always)]
+    fn prefilter(&self) -> Option<&Prefilter> {
+        self.prefilter.as_ref()
+    }
+}
+
+/// A representation of a sparse NFA state for an Aho-Corasick automaton.
+///
+/// It contains the transitions to the next state, a failure transition for
+/// cases where there exists no other transition for the current input byte
+/// and the matches implied by visiting this state (if any).
+#[derive(Clone, Debug)]
+pub(crate) struct State {
+    /// A pointer to `NFA::trans` corresponding to the head of a linked list
+    /// containing all of the transitions for this state.
+    ///
+    /// This is `StateID::ZERO` if and only if this state has zero transitions.
+    sparse: StateID,
+    /// A pointer to a row of `N` transitions in `NFA::dense`. These
+    /// transitions correspond precisely to what is obtained by traversing
+    /// `sparse`, but permits constant time lookup.
+    ///
+    /// When this is zero (which is true for most states in the default
+    /// configuration), then this state has no dense representation.
+    ///
+    /// Note that `N` is equal to `NFA::byte_classes::alphabet_len()`. This is
+    /// typically much less than 256 (the maximum value).
+    dense: StateID,
+    /// A pointer to `NFA::matches` corresponding to the head of a linked list
+    /// containing all of the matches for this state.
+    ///
+    /// This is `StateID::ZERO` if and only if this state is not a match state.
+    matches: StateID,
+    /// The state that should be transitioned to if the current byte in the
+    /// haystack does not have a corresponding transition defined in this
+    /// state.
+    fail: StateID,
+    /// The depth of this state. Specifically, this is the distance from this
+    /// state to the starting state. (For the special sentinel states DEAD and
+    /// FAIL, their depth is always 0.) The depth of a starting state is 0.
+    ///
+    /// Note that depth is currently not used in this non-contiguous NFA. It
+    /// may in the future, but it is used in the contiguous NFA. Namely, it
+    /// permits an optimization where states near the starting state have their
+    /// transitions stored in a dense fashion, but all other states have their
+    /// transitions stored in a sparse fashion. (This non-contiguous NFA uses
+    /// a sparse representation for all states unconditionally.) In any case,
+    /// this is really the only convenient place to compute and store this
+    /// information, which we need when building the contiguous NFA.
+    depth: SmallIndex,
+}
+
+impl State {
+    /// Return true if and only if this state is a match state.
+    pub(crate) fn is_match(&self) -> bool {
+        self.matches != StateID::ZERO
+    }
+
+    /// Returns the failure transition for this state.
+    pub(crate) fn fail(&self) -> StateID {
+        self.fail
+    }
+
+    /// Returns the depth of this state. That is, the number of transitions
+    /// this state is from the start state of the NFA.
+    pub(crate) fn depth(&self) -> SmallIndex {
+        self.depth
+    }
+}
+
+/// A single transition in a non-contiguous NFA.
+#[derive(Clone, Copy, Default)]
+#[repr(packed)]
+pub(crate) struct Transition {
+    byte: u8,
+    next: StateID,
+    link: StateID,
+}
+
+impl Transition {
+    /// Return the byte for which this transition is defined.
+    pub(crate) fn byte(&self) -> u8 {
+        self.byte
+    }
+
+    /// Return the ID of the state that this transition points to.
+    pub(crate) fn next(&self) -> StateID {
+        self.next
+    }
+
+    /// Return the ID of the next transition.
+    fn link(&self) -> StateID {
+        self.link
+    }
+}
+
+impl core::fmt::Debug for Transition {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(
+            f,
+            "Transition(byte: {:X?}, next: {:?}, link: {:?})",
+            self.byte,
+            self.next().as_usize(),
+            self.link().as_usize()
+        )
+    }
+}
+
+/// A single match in a non-contiguous NFA.
+#[derive(Clone, Copy, Default)]
+struct Match {
+    pid: PatternID,
+    link: StateID,
+}
+
+impl Match {
+    /// Return the pattern ID for this match.
+    pub(crate) fn pattern(&self) -> PatternID {
+        self.pid
+    }
+
+    /// Return the ID of the next match.
+    fn link(&self) -> StateID {
+        self.link
+    }
+}
+
+impl core::fmt::Debug for Match {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(
+            f,
+            "Match(pid: {:?}, link: {:?})",
+            self.pattern().as_usize(),
+            self.link().as_usize()
+        )
+    }
+}
+
+/// A builder for configuring an Aho-Corasick noncontiguous NFA.
+///
+/// This builder has a subset of the options available to a
+/// [`AhoCorasickBuilder`](crate::AhoCorasickBuilder). Of the shared options,
+/// their behavior is identical.
+#[derive(Clone, Debug)]
+pub struct Builder {
+    match_kind: MatchKind,
+    prefilter: bool,
+    ascii_case_insensitive: bool,
+    dense_depth: usize,
+}
+
+impl Default for Builder {
+    fn default() -> Builder {
+        Builder {
+            match_kind: MatchKind::default(),
+            prefilter: true,
+            ascii_case_insensitive: false,
+            dense_depth: 3,
+        }
+    }
+}
+
+impl Builder {
+    /// Create a new builder for configuring an Aho-Corasick noncontiguous NFA.
+    pub fn new() -> Builder {
+        Builder::default()
+    }
+
+    /// Build an Aho-Corasick noncontiguous NFA from the given iterator of
+    /// patterns.
+    ///
+    /// A builder may be reused to create more NFAs.
+    pub fn build<I, P>(&self, patterns: I) -> Result<NFA, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        debug!("building non-contiguous NFA");
+        let nfa = Compiler::new(self)?.compile(patterns)?;
+        debug!(
+            "non-contiguous NFA built, <states: {:?}, size: {:?}>",
+            nfa.states.len(),
+            nfa.memory_usage()
+        );
+        Ok(nfa)
+    }
+
+    /// Set the desired match semantics.
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::match_kind`](crate::AhoCorasickBuilder::match_kind)
+    /// for more documentation and examples.
+    pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder {
+        self.match_kind = kind;
+        self
+    }
+
+    /// Enable ASCII-aware case insensitive matching.
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::ascii_case_insensitive`](crate::AhoCorasickBuilder::ascii_case_insensitive)
+    /// for more documentation and examples.
+    pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder {
+        self.ascii_case_insensitive = yes;
+        self
+    }
+
+    /// Set the limit on how many states use a dense representation for their
+    /// transitions. Other states will generally use a sparse representation.
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::dense_depth`](crate::AhoCorasickBuilder::dense_depth)
+    /// for more documentation and examples.
+    pub fn dense_depth(&mut self, depth: usize) -> &mut Builder {
+        self.dense_depth = depth;
+        self
+    }
+
+    /// Enable heuristic prefilter optimizations.
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::prefilter`](crate::AhoCorasickBuilder::prefilter)
+    /// for more documentation and examples.
+    pub fn prefilter(&mut self, yes: bool) -> &mut Builder {
+        self.prefilter = yes;
+        self
+    }
+}
+
+/// A compiler uses a builder configuration and builds up the NFA formulation
+/// of an Aho-Corasick automaton. This roughly corresponds to the standard
+/// formulation described in textbooks, with some tweaks to support leftmost
+/// searching.
+#[derive(Debug)]
+struct Compiler<'a> {
+    builder: &'a Builder,
+    prefilter: prefilter::Builder,
+    nfa: NFA,
+    byteset: ByteClassSet,
+}
+
+impl<'a> Compiler<'a> {
+    fn new(builder: &'a Builder) -> Result<Compiler<'a>, BuildError> {
+        let prefilter = prefilter::Builder::new(builder.match_kind)
+            .ascii_case_insensitive(builder.ascii_case_insensitive);
+        Ok(Compiler {
+            builder,
+            prefilter,
+            nfa: NFA {
+                match_kind: builder.match_kind,
+                states: vec![],
+                sparse: vec![],
+                dense: vec![],
+                matches: vec![],
+                pattern_lens: vec![],
+                prefilter: None,
+                byte_classes: ByteClasses::singletons(),
+                min_pattern_len: usize::MAX,
+                max_pattern_len: 0,
+                special: Special::zero(),
+            },
+            byteset: ByteClassSet::empty(),
+        })
+    }
+
+    fn compile<I, P>(mut self, patterns: I) -> Result<NFA, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        // Add dummy transition/match links, so that no valid link will point
+        // to another link at index 0.
+        self.nfa.sparse.push(Transition::default());
+        self.nfa.matches.push(Match::default());
+        // Add a dummy dense transition so that no states can have dense==0
+        // represent a valid pointer to dense transitions. This permits
+        // dense==0 to be a sentinel indicating "no dense transitions."
+        self.nfa.dense.push(NFA::DEAD);
+        // the dead state, only used for leftmost and fixed to id==0
+        self.nfa.alloc_state(0)?;
+        // the fail state, which is never entered and fixed to id==1
+        self.nfa.alloc_state(0)?;
+        // unanchored start state, initially fixed to id==2 but later shuffled
+        // to appear after all non-start match states.
+        self.nfa.special.start_unanchored_id = self.nfa.alloc_state(0)?;
+        // anchored start state, initially fixed to id==3 but later shuffled
+        // to appear after unanchored start state.
+        self.nfa.special.start_anchored_id = self.nfa.alloc_state(0)?;
+        // Initialize the unanchored starting state in order to make it dense,
+        // and thus make transition lookups on this state faster.
+        self.init_unanchored_start_state()?;
+        // Set all transitions on the DEAD state to point to itself. This way,
+        // the DEAD state can never be escaped. It MUST be used as a sentinel
+        // in any correct search.
+        self.add_dead_state_loop()?;
+        // Build the base trie from the given patterns.
+        self.build_trie(patterns)?;
+        self.nfa.states.shrink_to_fit();
+        // Turn our set of bytes into equivalent classes. This NFA
+        // implementation uses byte classes only for states that use a dense
+        // representation of transitions. (And that's why this comes before
+        // `self.densify()`, as the byte classes need to be set first.)
+        self.nfa.byte_classes = self.byteset.byte_classes();
+        // Add transitions (and maybe matches) to the anchored starting state.
+        // The anchored starting state is used for anchored searches. The only
+        // mechanical difference between it and the unanchored start state is
+        // that missing transitions map to the DEAD state instead of the FAIL
+        // state.
+        self.set_anchored_start_state()?;
+        // Rewrite transitions to the FAIL state on the unanchored start state
+        // as self-transitions. This keeps the start state active at all times.
+        self.add_unanchored_start_state_loop();
+        // Make some (possibly zero) states use a dense representation for
+        // transitions. It's important to do this right after the states
+        // and non-failure transitions are solidified. That way, subsequent
+        // accesses (particularly `fill_failure_transitions`) will benefit from
+        // the faster transition lookup in densified states.
+        self.densify()?;
+        // The meat of the Aho-Corasick algorithm: compute and write failure
+        // transitions. i.e., the state to move to when a transition isn't
+        // defined in the current state. These are epsilon transitions and thus
+        // make this formulation an NFA.
+        self.fill_failure_transitions()?;
+        // Handle a special case under leftmost semantics when at least one
+        // of the patterns is the empty string.
+        self.close_start_state_loop_for_leftmost();
+        // Shuffle states so that we have DEAD, FAIL, MATCH, ..., START, START,
+        // NON-MATCH, ... This permits us to very quickly query the type of
+        // the state we're currently in during a search.
+        self.shuffle();
+        self.nfa.prefilter = self.prefilter.build();
+        // Store the maximum ID of all *relevant* special states. Start states
+        // are only relevant when we have a prefilter, otherwise, there is zero
+        // reason to care about whether a state is a start state or not during
+        // a search. Indeed, without a prefilter, we are careful to explicitly
+        // NOT care about start states, otherwise the search can ping pong
+        // between the unrolled loop and the handling of special-status states
+        // and destroy perf.
+        self.nfa.special.max_special_id = if self.nfa.prefilter.is_some() {
+            // Why the anchored starting state? Because we always put it
+            // after the unanchored starting state and it is therefore the
+            // maximum. Why put unanchored followed by anchored? No particular
+            // reason, but that's how the states are logically organized in the
+            // Thompson NFA implementation found in regex-automata. ¯\_(ツ)_/¯
+            self.nfa.special.start_anchored_id
+        } else {
+            self.nfa.special.max_match_id
+        };
+        self.nfa.sparse.shrink_to_fit();
+        self.nfa.dense.shrink_to_fit();
+        self.nfa.matches.shrink_to_fit();
+        self.nfa.pattern_lens.shrink_to_fit();
+        Ok(self.nfa)
+    }
+
+    /// This sets up the initial prefix trie that makes up the Aho-Corasick
+    /// automaton. Effectively, it creates the basic structure of the
+    /// automaton, where every pattern given has a path from the start state to
+    /// the end of the pattern.
+    fn build_trie<I, P>(&mut self, patterns: I) -> Result<(), BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        'PATTERNS: for (i, pat) in patterns.into_iter().enumerate() {
+            let pid = PatternID::new(i).map_err(|e| {
+                BuildError::pattern_id_overflow(
+                    PatternID::MAX.as_u64(),
+                    e.attempted(),
+                )
+            })?;
+            let pat = pat.as_ref();
+            let patlen = SmallIndex::new(pat.len())
+                .map_err(|_| BuildError::pattern_too_long(pid, pat.len()))?;
+            self.nfa.min_pattern_len =
+                core::cmp::min(self.nfa.min_pattern_len, pat.len());
+            self.nfa.max_pattern_len =
+                core::cmp::max(self.nfa.max_pattern_len, pat.len());
+            assert_eq!(
+                i,
+                self.nfa.pattern_lens.len(),
+                "expected number of patterns to match pattern ID"
+            );
+            self.nfa.pattern_lens.push(patlen);
+            // We add the pattern to the prefilter here because the pattern
+            // ID in the prefilter is determined with respect to the patterns
+            // added to the prefilter. That is, it isn't the ID we have here,
+            // but the one determined by its own accounting of patterns.
+            // To ensure they line up, we add every pattern we see to the
+            // prefilter, even if some patterns ultimately are impossible to
+            // match (in leftmost-first semantics specifically).
+            //
+            // Another way of doing this would be to expose an API in the
+            // prefilter to permit setting your own pattern IDs. Or to just use
+            // our own map and go between them. But this case is sufficiently
+            // rare that we don't bother and just make sure they're in sync.
+            if self.builder.prefilter {
+                self.prefilter.add(pat);
+            }
+
+            let mut prev = self.nfa.special.start_unanchored_id;
+            let mut saw_match = false;
+            for (depth, &b) in pat.iter().enumerate() {
+                // When leftmost-first match semantics are requested, we
+                // specifically stop adding patterns when a previously added
+                // pattern is a prefix of it. We avoid adding it because
+                // leftmost-first semantics imply that the pattern can never
+                // match. This is not just an optimization to save space! It
+                // is necessary for correctness. In fact, this is the only
+                // difference in the automaton between the implementations for
+                // leftmost-first and leftmost-longest.
+                saw_match = saw_match || self.nfa.states[prev].is_match();
+                if self.builder.match_kind.is_leftmost_first() && saw_match {
+                    // Skip to the next pattern immediately. This avoids
+                    // incorrectly adding a match after this loop terminates.
+                    continue 'PATTERNS;
+                }
+
+                // Add this byte to our equivalence classes. These don't
+                // get used while building the trie, but other Aho-Corasick
+                // implementations may use them.
+                self.byteset.set_range(b, b);
+                if self.builder.ascii_case_insensitive {
+                    let b = opposite_ascii_case(b);
+                    self.byteset.set_range(b, b);
+                }
+
+                // If the transition from prev using the current byte already
+                // exists, then just move through it. Otherwise, add a new
+                // state. We track the depth here so that we can determine
+                // how to represent transitions. States near the start state
+                // use a dense representation that uses more memory but is
+                // faster. Other states use a sparse representation that uses
+                // less memory but is slower.
+                let next = self.nfa.follow_transition(prev, b);
+                if next != NFA::FAIL {
+                    prev = next;
+                } else {
+                    let next = self.nfa.alloc_state(depth)?;
+                    self.nfa.add_transition(prev, b, next)?;
+                    if self.builder.ascii_case_insensitive {
+                        let b = opposite_ascii_case(b);
+                        self.nfa.add_transition(prev, b, next)?;
+                    }
+                    prev = next;
+                }
+            }
+            // Once the pattern has been added, log the match in the final
+            // state that it reached.
+            self.nfa.add_match(prev, pid)?;
+        }
+        Ok(())
+    }
+
+    /// This routine creates failure transitions according to the standard
+    /// textbook formulation of the Aho-Corasick algorithm, with a couple small
+    /// tweaks to support "leftmost" semantics.
+    ///
+    /// Building failure transitions is the most interesting part of building
+    /// the Aho-Corasick automaton, because they are what allow searches to
+    /// be performed in linear time. Specifically, a failure transition is
+    /// a single transition associated with each state that points back to
+    /// the longest proper suffix of the pattern being searched. The failure
+    /// transition is followed whenever there exists no transition on the
+    /// current state for the current input byte. If there is no other proper
+    /// suffix, then the failure transition points back to the starting state.
+    ///
+    /// For example, let's say we built an Aho-Corasick automaton with the
+    /// following patterns: 'abcd' and 'cef'. The trie looks like this:
+    ///
+    /// ```ignore
+    ///          a - S1 - b - S2 - c - S3 - d - S4*
+    ///         /
+    ///     S0 - c - S5 - e - S6 - f - S7*
+    /// ```
+    ///
+    /// At this point, it should be fairly straight-forward to see how this
+    /// trie can be used in a simplistic way. At any given position in the
+    /// text we're searching (called the "subject" string), all we need to do
+    /// is follow the transitions in the trie by consuming one transition for
+    /// each byte in the subject string. If we reach a match state, then we can
+    /// report that location as a match.
+    ///
+    /// The trick comes when searching a subject string like 'abcef'. We'll
+    /// initially follow the transition from S0 to S1 and wind up in S3 after
+    /// observng the 'c' byte. At this point, the next byte is 'e' but state
+    /// S3 has no transition for 'e', so the search fails. We then would need
+    /// to restart the search at the next position in 'abcef', which
+    /// corresponds to 'b'. The match would fail, but the next search starting
+    /// at 'c' would finally succeed. The problem with this approach is that
+    /// we wind up searching the subject string potentially many times. In
+    /// effect, this makes the algorithm have worst case `O(n * m)` complexity,
+    /// where `n ~ len(subject)` and `m ~ len(all patterns)`. We would instead
+    /// like to achieve a `O(n + m)` worst case complexity.
+    ///
+    /// This is where failure transitions come in. Instead of dying at S3 in
+    /// the first search, the automaton can instruct the search to move to
+    /// another part of the automaton that corresponds to a suffix of what
+    /// we've seen so far. Recall that we've seen 'abc' in the subject string,
+    /// and the automaton does indeed have a non-empty suffix, 'c', that could
+    /// potentially lead to another match. Thus, the actual Aho-Corasick
+    /// automaton for our patterns in this case looks like this:
+    ///
+    /// ```ignore
+    ///          a - S1 - b - S2 - c - S3 - d - S4*
+    ///         /                      /
+    ///        /       ----------------
+    ///       /       /
+    ///     S0 - c - S5 - e - S6 - f - S7*
+    /// ```
+    ///
+    /// That is, we have a failure transition from S3 to S5, which is followed
+    /// exactly in cases when we are in state S3 but see any byte other than
+    /// 'd' (that is, we've "failed" to find a match in this portion of our
+    /// trie). We know we can transition back to S5 because we've already seen
+    /// a 'c' byte, so we don't need to re-scan it. We can then pick back up
+    /// with the search starting at S5 and complete our match.
+    ///
+    /// Adding failure transitions to a trie is fairly simple, but subtle. The
+    /// key issue is that you might have multiple failure transition that you
+    /// need to follow. For example, look at the trie for the patterns
+    /// 'abcd', 'b', 'bcd' and 'cd':
+    ///
+    /// ```ignore
+    ///          - a - S1 - b - S2* - c - S3 - d - S4*
+    ///         /               /         /
+    ///        /         -------   -------
+    ///       /         /         /
+    ///     S0 --- b - S5* - c - S6 - d - S7*
+    ///       \                  /
+    ///        \         --------
+    ///         \       /
+    ///          - c - S8 - d - S9*
+    /// ```
+    ///
+    /// The failure transitions for this trie are defined from S2 to S5,
+    /// S3 to S6 and S6 to S8. Moreover, state S2 needs to track that it
+    /// corresponds to a match, since its failure transition to S5 is itself
+    /// a match state.
+    ///
+    /// Perhaps simplest way to think about adding these failure transitions
+    /// is recursively. That is, if you know the failure transitions for every
+    /// possible previous state that could be visited (e.g., when computing the
+    /// failure transition for S3, you already know the failure transitions
+    /// for S0, S1 and S2), then you can simply follow the failure transition
+    /// of the previous state and check whether the incoming transition is
+    /// defined after following the failure transition.
+    ///
+    /// For example, when determining the failure state for S3, by our
+    /// assumptions, we already know that there is a failure transition from
+    /// S2 (the previous state) to S5. So we follow that transition and check
+    /// whether the transition connecting S2 to S3 is defined. Indeed, it is,
+    /// as there is a transition from S5 to S6 for the byte 'c'. If no such
+    /// transition existed, we could keep following the failure transitions
+    /// until we reach the start state, which is the failure transition for
+    /// every state that has no corresponding proper suffix.
+    ///
+    /// We don't actually use recursion to implement this, but instead, use a
+    /// breadth first search of the automaton. Our base case is the start
+    /// state, whose failure transition is just a transition to itself.
+    ///
+    /// When building a leftmost automaton, we proceed as above, but only
+    /// include a subset of failure transitions. Namely, we omit any failure
+    /// transitions that appear after a match state in the trie. This is
+    /// because failure transitions always point back to a proper suffix of
+    /// what has been seen so far. Thus, following a failure transition after
+    /// a match implies looking for a match that starts after the one that has
+    /// already been seen, which is of course therefore not the leftmost match.
+    ///
+    /// N.B. I came up with this algorithm on my own, and after scouring all of
+    /// the other AC implementations I know of (Perl, Snort, many on GitHub).
+    /// I couldn't find any that implement leftmost semantics like this.
+    /// Perl of course needs leftmost-first semantics, but they implement it
+    /// with a seeming hack at *search* time instead of encoding it into the
+    /// automaton. There are also a couple Java libraries that support leftmost
+    /// longest semantics, but they do it by building a queue of matches at
+    /// search time, which is even worse than what Perl is doing. ---AG
+    fn fill_failure_transitions(&mut self) -> Result<(), BuildError> {
+        let is_leftmost = self.builder.match_kind.is_leftmost();
+        let start_uid = self.nfa.special.start_unanchored_id;
+        // Initialize the queue for breadth first search with all transitions
+        // out of the start state. We handle the start state specially because
+        // we only want to follow non-self transitions. If we followed self
+        // transitions, then this would never terminate.
+        let mut queue = VecDeque::new();
+        let mut seen = self.queued_set();
+        let mut prev_link = None;
+        while let Some(link) = self.nfa.next_link(start_uid, prev_link) {
+            prev_link = Some(link);
+            let t = self.nfa.sparse[link];
+
+            // Skip anything we've seen before and any self-transitions on the
+            // start state.
+            if start_uid == t.next() || seen.contains(t.next) {
+                continue;
+            }
+            queue.push_back(t.next);
+            seen.insert(t.next);
+            // Under leftmost semantics, if a state immediately following
+            // the start state is a match state, then we never want to
+            // follow its failure transition since the failure transition
+            // necessarily leads back to the start state, which we never
+            // want to do for leftmost matching after a match has been
+            // found.
+            //
+            // We apply the same logic to non-start states below as well.
+            if is_leftmost && self.nfa.states[t.next].is_match() {
+                self.nfa.states[t.next].fail = NFA::DEAD;
+            }
+        }
+        while let Some(id) = queue.pop_front() {
+            let mut prev_link = None;
+            while let Some(link) = self.nfa.next_link(id, prev_link) {
+                prev_link = Some(link);
+                let t = self.nfa.sparse[link];
+
+                if seen.contains(t.next) {
+                    // The only way to visit a duplicate state in a transition
+                    // list is when ASCII case insensitivity is enabled. In
+                    // this case, we want to skip it since it's redundant work.
+                    // But it would also end up duplicating matches, which
+                    // results in reporting duplicate matches in some cases.
+                    // See the 'acasei010' regression test.
+                    continue;
+                }
+                queue.push_back(t.next);
+                seen.insert(t.next);
+
+                // As above for start states, under leftmost semantics, once
+                // we see a match all subsequent states should have no failure
+                // transitions because failure transitions always imply looking
+                // for a match that is a suffix of what has been seen so far
+                // (where "seen so far" corresponds to the string formed by
+                // following the transitions from the start state to the
+                // current state). Under leftmost semantics, we specifically do
+                // not want to allow this to happen because we always want to
+                // report the match found at the leftmost position.
+                //
+                // The difference between leftmost-first and leftmost-longest
+                // occurs previously while we build the trie. For
+                // leftmost-first, we simply omit any entries that would
+                // otherwise require passing through a match state.
+                //
+                // Note that for correctness, the failure transition has to be
+                // set to the dead state for ALL states following a match, not
+                // just the match state itself. However, by setting the failure
+                // transition to the dead state on all match states, the dead
+                // state will automatically propagate to all subsequent states
+                // via the failure state computation below.
+                if is_leftmost && self.nfa.states[t.next].is_match() {
+                    self.nfa.states[t.next].fail = NFA::DEAD;
+                    continue;
+                }
+                let mut fail = self.nfa.states[id].fail;
+                while self.nfa.follow_transition(fail, t.byte) == NFA::FAIL {
+                    fail = self.nfa.states[fail].fail;
+                }
+                fail = self.nfa.follow_transition(fail, t.byte);
+                self.nfa.states[t.next].fail = fail;
+                self.nfa.copy_matches(fail, t.next)?;
+            }
+            // If the start state is a match state, then this automaton can
+            // match the empty string. This implies all states are match states
+            // since every position matches the empty string, so copy the
+            // matches from the start state to every state. Strictly speaking,
+            // this is only necessary for overlapping matches since each
+            // non-empty non-start match state needs to report empty matches
+            // in addition to its own. For the non-overlapping case, such
+            // states only report the first match, which is never empty since
+            // it isn't a start state.
+            if !is_leftmost {
+                self.nfa
+                    .copy_matches(self.nfa.special.start_unanchored_id, id)?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Shuffle the states so that they appear in this sequence:
+    ///
+    ///   DEAD, FAIL, MATCH..., START, START, NON-MATCH...
+    ///
+    /// The idea here is that if we know how special states are laid out in our
+    /// transition table, then we can determine what "kind" of state we're in
+    /// just by comparing our current state ID with a particular value. In this
+    /// way, we avoid doing extra memory lookups.
+    ///
+    /// Before shuffling begins, our states look something like this:
+    ///
+    ///   DEAD, FAIL, START, START, (MATCH | NON-MATCH)...
+    ///
+    /// So all we need to do is move all of the MATCH states so that they
+    /// all appear before any NON-MATCH state, like so:
+    ///
+    ///   DEAD, FAIL, START, START, MATCH... NON-MATCH...
+    ///
+    /// Then it's just a simple matter of swapping the two START states with
+    /// the last two MATCH states.
+    ///
+    /// (This is the same technique used for fully compiled DFAs in
+    /// regex-automata.)
+    fn shuffle(&mut self) {
+        let old_start_uid = self.nfa.special.start_unanchored_id;
+        let old_start_aid = self.nfa.special.start_anchored_id;
+        assert!(old_start_uid < old_start_aid);
+        assert_eq!(
+            3,
+            old_start_aid.as_usize(),
+            "anchored start state should be at index 3"
+        );
+        // We implement shuffling by a sequence of pairwise swaps of states.
+        // Since we have a number of things referencing states via their
+        // IDs and swapping them changes their IDs, we need to record every
+        // swap we make so that we can remap IDs. The remapper handles this
+        // book-keeping for us.
+        let mut remapper = Remapper::new(&self.nfa, 0);
+        // The way we proceed here is by moving all match states so that
+        // they directly follow the start states. So it will go: DEAD, FAIL,
+        // START-UNANCHORED, START-ANCHORED, MATCH, ..., NON-MATCH, ...
+        //
+        // To do that, we proceed forward through all states after
+        // START-ANCHORED and swap match states so that they appear before all
+        // non-match states.
+        let mut next_avail = StateID::from(4u8);
+        for i in next_avail.as_usize()..self.nfa.states.len() {
+            let sid = StateID::new(i).unwrap();
+            if !self.nfa.states[sid].is_match() {
+                continue;
+            }
+            remapper.swap(&mut self.nfa, sid, next_avail);
+            // The key invariant here is that only non-match states exist
+            // between 'next_avail' and 'sid' (with them being potentially
+            // equivalent). Thus, incrementing 'next_avail' by 1 is guaranteed
+            // to land on the leftmost non-match state. (Unless 'next_avail'
+            // and 'sid' are equivalent, in which case, a swap will occur but
+            // it is a no-op.)
+            next_avail = StateID::new(next_avail.one_more()).unwrap();
+        }
+        // Now we'd like to move the start states to immediately following the
+        // match states. (The start states may themselves be match states, but
+        // we'll handle that later.) We arrange the states this way so that we
+        // don't necessarily need to check whether a state is a start state or
+        // not before checking whether a state is a match state. For example,
+        // we'd like to be able to write this as our state machine loop:
+        //
+        //   sid = start()
+        //   for byte in haystack:
+        //     sid = next(sid, byte)
+        //     if sid <= nfa.max_start_id:
+        //       if sid <= nfa.max_dead_id:
+        //         # search complete
+        //       elif sid <= nfa.max_match_id:
+        //         # found match
+        //
+        // The important context here is that we might not want to look for
+        // start states at all. Namely, if a searcher doesn't have a prefilter,
+        // then there is no reason to care about whether we're in a start state
+        // or not. And indeed, if we did check for it, this very hot loop would
+        // ping pong between the special state handling and the main state
+        // transition logic. This in turn stalls the CPU by killing branch
+        // prediction.
+        //
+        // So essentially, we really want to be able to "forget" that start
+        // states even exist and this is why we put them at the end.
+        let new_start_aid =
+            StateID::new(next_avail.as_usize().checked_sub(1).unwrap())
+                .unwrap();
+        remapper.swap(&mut self.nfa, old_start_aid, new_start_aid);
+        let new_start_uid =
+            StateID::new(next_avail.as_usize().checked_sub(2).unwrap())
+                .unwrap();
+        remapper.swap(&mut self.nfa, old_start_uid, new_start_uid);
+        let new_max_match_id =
+            StateID::new(next_avail.as_usize().checked_sub(3).unwrap())
+                .unwrap();
+        self.nfa.special.max_match_id = new_max_match_id;
+        self.nfa.special.start_unanchored_id = new_start_uid;
+        self.nfa.special.start_anchored_id = new_start_aid;
+        // If one start state is a match state, then they both are.
+        if self.nfa.states[self.nfa.special.start_anchored_id].is_match() {
+            self.nfa.special.max_match_id = self.nfa.special.start_anchored_id;
+        }
+        remapper.remap(&mut self.nfa);
+    }
+
+    /// Attempts to convert the transition representation of a subset of states
+    /// in this NFA from sparse to dense. This can greatly improve search
+    /// performance since states with a higher number of transitions tend to
+    /// correlate with very active states.
+    ///
+    /// We generally only densify states that are close to the start state.
+    /// These tend to be the most active states and thus benefit from a dense
+    /// representation more than other states.
+    ///
+    /// This tends to best balance between memory usage and performance. In
+    /// particular, the *vast majority* of all states in a typical Aho-Corasick
+    /// automaton have only 1 transition and are usually farther from the start
+    /// state and thus don't get densified.
+    ///
+    /// Note that this doesn't remove the sparse representation of transitions
+    /// for states that are densified. It could be done, but actually removing
+    /// entries from `NFA::sparse` is likely more expensive than it's worth.
+    fn densify(&mut self) -> Result<(), BuildError> {
+        for i in 0..self.nfa.states.len() {
+            let sid = StateID::new(i).unwrap();
+            // Don't bother densifying states that are only used as sentinels.
+            if sid == NFA::DEAD || sid == NFA::FAIL {
+                continue;
+            }
+            // Only densify states that are "close enough" to the start state.
+            if self.nfa.states[sid].depth.as_usize()
+                >= self.builder.dense_depth
+            {
+                continue;
+            }
+            let dense = self.nfa.alloc_dense_state()?;
+            let mut prev_link = None;
+            while let Some(link) = self.nfa.next_link(sid, prev_link) {
+                prev_link = Some(link);
+                let t = self.nfa.sparse[link];
+
+                let class = usize::from(self.nfa.byte_classes.get(t.byte));
+                let index = dense.as_usize() + class;
+                self.nfa.dense[index] = t.next;
+            }
+            self.nfa.states[sid].dense = dense;
+        }
+        Ok(())
+    }
+
+    /// Returns a set that tracked queued states.
+    ///
+    /// This is only necessary when ASCII case insensitivity is enabled, since
+    /// it is the only way to visit the same state twice. Otherwise, this
+    /// returns an inert set that nevers adds anything and always reports
+    /// `false` for every member test.
+    fn queued_set(&self) -> QueuedSet {
+        if self.builder.ascii_case_insensitive {
+            QueuedSet::active()
+        } else {
+            QueuedSet::inert()
+        }
+    }
+
+    /// Initializes the unanchored start state by making it dense. This is
+    /// achieved by explicitly setting every transition to the FAIL state.
+    /// This isn't necessary for correctness, since any missing transition is
+    /// automatically assumed to be mapped to the FAIL state. We do this to
+    /// make the unanchored starting state dense, and thus in turn make
+    /// transition lookups on it faster. (Which is worth doing because it's
+    /// the most active state.)
+    fn init_unanchored_start_state(&mut self) -> Result<(), BuildError> {
+        let start_uid = self.nfa.special.start_unanchored_id;
+        let start_aid = self.nfa.special.start_anchored_id;
+        self.nfa.init_full_state(start_uid, NFA::FAIL)?;
+        self.nfa.init_full_state(start_aid, NFA::FAIL)?;
+        Ok(())
+    }
+
+    /// Setup the anchored start state by copying all of the transitions and
+    /// matches from the unanchored starting state with one change: the failure
+    /// transition is changed to the DEAD state, so that for any undefined
+    /// transitions, the search will stop.
+    fn set_anchored_start_state(&mut self) -> Result<(), BuildError> {
+        let start_uid = self.nfa.special.start_unanchored_id;
+        let start_aid = self.nfa.special.start_anchored_id;
+        let (mut uprev_link, mut aprev_link) = (None, None);
+        loop {
+            let unext = self.nfa.next_link(start_uid, uprev_link);
+            let anext = self.nfa.next_link(start_aid, aprev_link);
+            let (ulink, alink) = match (unext, anext) {
+                (Some(ulink), Some(alink)) => (ulink, alink),
+                (None, None) => break,
+                _ => unreachable!(),
+            };
+            uprev_link = Some(ulink);
+            aprev_link = Some(alink);
+            self.nfa.sparse[alink].next = self.nfa.sparse[ulink].next;
+        }
+        self.nfa.copy_matches(start_uid, start_aid)?;
+        // This is the main difference between the unanchored and anchored
+        // starting states. If a lookup on an anchored starting state fails,
+        // then the search should stop.
+        //
+        // N.B. This assumes that the loop on the unanchored starting state
+        // hasn't been created yet.
+        self.nfa.states[start_aid].fail = NFA::DEAD;
+        Ok(())
+    }
+
+    /// Set the failure transitions on the start state to loop back to the
+    /// start state. This effectively permits the Aho-Corasick automaton to
+    /// match at any position. This is also required for finding the next
+    /// state to terminate, namely, finding the next state should never return
+    /// a fail_id.
+    ///
+    /// This must be done after building the initial trie, since trie
+    /// construction depends on transitions to `fail_id` to determine whether a
+    /// state already exists or not.
+    fn add_unanchored_start_state_loop(&mut self) {
+        let start_uid = self.nfa.special.start_unanchored_id;
+        let mut prev_link = None;
+        while let Some(link) = self.nfa.next_link(start_uid, prev_link) {
+            prev_link = Some(link);
+            if self.nfa.sparse[link].next() == NFA::FAIL {
+                self.nfa.sparse[link].next = start_uid;
+            }
+        }
+    }
+
+    /// Remove the start state loop by rewriting any transitions on the start
+    /// state back to the start state with transitions to the dead state.
+    ///
+    /// The loop is only closed when two conditions are met: the start state
+    /// is a match state and the match kind is leftmost-first or
+    /// leftmost-longest.
+    ///
+    /// The reason for this is that under leftmost semantics, a start state
+    /// that is also a match implies that we should never restart the search
+    /// process. We allow normal transitions out of the start state, but if
+    /// none exist, we transition to the dead state, which signals that
+    /// searching should stop.
+    fn close_start_state_loop_for_leftmost(&mut self) {
+        let start_uid = self.nfa.special.start_unanchored_id;
+        let start = &mut self.nfa.states[start_uid];
+        let dense = start.dense;
+        if self.builder.match_kind.is_leftmost() && start.is_match() {
+            let mut prev_link = None;
+            while let Some(link) = self.nfa.next_link(start_uid, prev_link) {
+                prev_link = Some(link);
+                if self.nfa.sparse[link].next() == start_uid {
+                    self.nfa.sparse[link].next = NFA::DEAD;
+                    if dense != StateID::ZERO {
+                        let b = self.nfa.sparse[link].byte;
+                        let class = usize::from(self.nfa.byte_classes.get(b));
+                        self.nfa.dense[dense.as_usize() + class] = NFA::DEAD;
+                    }
+                }
+            }
+        }
+    }
+
+    /// Sets all transitions on the dead state to point back to the dead state.
+    /// Normally, missing transitions map back to the failure state, but the
+    /// point of the dead state is to act as a sink that can never be escaped.
+    fn add_dead_state_loop(&mut self) -> Result<(), BuildError> {
+        self.nfa.init_full_state(NFA::DEAD, NFA::DEAD)?;
+        Ok(())
+    }
+}
+
+/// A set of state identifiers used to avoid revisiting the same state multiple
+/// times when filling in failure transitions.
+///
+/// This set has an "inert" and an "active" mode. When inert, the set never
+/// stores anything and always returns `false` for every member test. This is
+/// useful to avoid the performance and memory overhead of maintaining this
+/// set when it is not needed.
+#[derive(Debug)]
+struct QueuedSet {
+    set: Option<BTreeSet<StateID>>,
+}
+
+impl QueuedSet {
+    /// Return an inert set that returns `false` for every state ID membership
+    /// test.
+    fn inert() -> QueuedSet {
+        QueuedSet { set: None }
+    }
+
+    /// Return an active set that tracks state ID membership.
+    fn active() -> QueuedSet {
+        QueuedSet { set: Some(BTreeSet::new()) }
+    }
+
+    /// Inserts the given state ID into this set. (If the set is inert, then
+    /// this is a no-op.)
+    fn insert(&mut self, state_id: StateID) {
+        if let Some(ref mut set) = self.set {
+            set.insert(state_id);
+        }
+    }
+
+    /// Returns true if and only if the given state ID is in this set. If the
+    /// set is inert, this always returns false.
+    fn contains(&self, state_id: StateID) -> bool {
+        match self.set {
+            None => false,
+            Some(ref set) => set.contains(&state_id),
+        }
+    }
+}
+
+impl core::fmt::Debug for NFA {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        use crate::{
+            automaton::{fmt_state_indicator, sparse_transitions},
+            util::debug::DebugByte,
+        };
+
+        writeln!(f, "noncontiguous::NFA(")?;
+        for (sid, state) in self.states.iter().with_state_ids() {
+            // The FAIL state doesn't actually have space for a state allocated
+            // for it, so we have to treat it as a special case.
+            if sid == NFA::FAIL {
+                writeln!(f, "F {:06}:", sid.as_usize())?;
+                continue;
+            }
+            fmt_state_indicator(f, self, sid)?;
+            write!(
+                f,
+                "{:06}({:06}): ",
+                sid.as_usize(),
+                state.fail.as_usize()
+            )?;
+
+            let it = sparse_transitions(
+                self.iter_trans(sid).map(|t| (t.byte, t.next)),
+            )
+            .enumerate();
+            for (i, (start, end, sid)) in it {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                if start == end {
+                    write!(
+                        f,
+                        "{:?} => {:?}",
+                        DebugByte(start),
+                        sid.as_usize()
+                    )?;
+                } else {
+                    write!(
+                        f,
+                        "{:?}-{:?} => {:?}",
+                        DebugByte(start),
+                        DebugByte(end),
+                        sid.as_usize()
+                    )?;
+                }
+            }
+
+            write!(f, "\n")?;
+            if self.is_match(sid) {
+                write!(f, "         matches: ")?;
+                for (i, pid) in self.iter_matches(sid).enumerate() {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    write!(f, "{}", pid.as_usize())?;
+                }
+                write!(f, "\n")?;
+            }
+        }
+        writeln!(f, "match kind: {:?}", self.match_kind)?;
+        writeln!(f, "prefilter: {:?}", self.prefilter.is_some())?;
+        writeln!(f, "state length: {:?}", self.states.len())?;
+        writeln!(f, "pattern length: {:?}", self.patterns_len())?;
+        writeln!(f, "shortest pattern length: {:?}", self.min_pattern_len)?;
+        writeln!(f, "longest pattern length: {:?}", self.max_pattern_len)?;
+        writeln!(f, "memory usage: {:?}", self.memory_usage())?;
+        writeln!(f, ")")?;
+        Ok(())
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/packed/api.rs b/third_party/rust/aho-corasick/src/packed/api.rs
new file mode 100644
index 0000000000..44f0bc9be3
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/api.rs
@@ -0,0 +1,687 @@
+use alloc::sync::Arc;
+
+use crate::{
+    packed::{pattern::Patterns, rabinkarp::RabinKarp, teddy},
+    util::search::{Match, Span},
+};
+
+/// This is a limit placed on the total number of patterns we're willing to try
+/// and match at once. As more sophisticated algorithms are added, this number
+/// may be increased.
+const PATTERN_LIMIT: usize = 128;
+
+/// A knob for controlling the match semantics of a packed multiple string
+/// searcher.
+///
+/// This differs from the [`MatchKind`](crate::MatchKind) type in the top-level
+/// crate module in that it doesn't support "standard" match semantics,
+/// and instead only supports leftmost-first or leftmost-longest. Namely,
+/// "standard" semantics cannot be easily supported by packed searchers.
+///
+/// For more information on the distinction between leftmost-first and
+/// leftmost-longest, see the docs on the top-level `MatchKind` type.
+///
+/// Unlike the top-level `MatchKind` type, the default match semantics for this
+/// type are leftmost-first.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+#[non_exhaustive]
+pub enum MatchKind {
+    /// Use leftmost-first match semantics, which reports leftmost matches.
+    /// When there are multiple possible leftmost matches, the match
+    /// corresponding to the pattern that appeared earlier when constructing
+    /// the automaton is reported.
+    ///
+    /// This is the default.
+    LeftmostFirst,
+    /// Use leftmost-longest match semantics, which reports leftmost matches.
+    /// When there are multiple possible leftmost matches, the longest match
+    /// is chosen.
+    LeftmostLongest,
+}
+
+impl Default for MatchKind {
+    fn default() -> MatchKind {
+        MatchKind::LeftmostFirst
+    }
+}
+
+/// The configuration for a packed multiple pattern searcher.
+///
+/// The configuration is currently limited only to being able to select the
+/// match semantics (leftmost-first or leftmost-longest) of a searcher. In the
+/// future, more knobs may be made available.
+///
+/// A configuration produces a [`packed::Builder`](Builder), which in turn can
+/// be used to construct a [`packed::Searcher`](Searcher) for searching.
+///
+/// # Example
+///
+/// This example shows how to use leftmost-longest semantics instead of the
+/// default (leftmost-first).
+///
+/// ```
+/// use aho_corasick::{packed::{Config, MatchKind}, PatternID};
+///
+/// # fn example() -> Option<()> {
+/// let searcher = Config::new()
+///     .match_kind(MatchKind::LeftmostLongest)
+///     .builder()
+///     .add("foo")
+///     .add("foobar")
+///     .build()?;
+/// let matches: Vec<PatternID> = searcher
+///     .find_iter("foobar")
+///     .map(|mat| mat.pattern())
+///     .collect();
+/// assert_eq!(vec![PatternID::must(1)], matches);
+/// # Some(()) }
+/// # if cfg!(all(feature = "std", any(
+/// #     target_arch = "x86_64", target_arch = "aarch64",
+/// # ))) {
+/// #     example().unwrap()
+/// # } else {
+/// #     assert!(example().is_none());
+/// # }
+/// ```
+#[derive(Clone, Debug)]
+pub struct Config {
+    kind: MatchKind,
+    force: Option<ForceAlgorithm>,
+    only_teddy_fat: Option<bool>,
+    only_teddy_256bit: Option<bool>,
+    heuristic_pattern_limits: bool,
+}
+
+/// An internal option for forcing the use of a particular packed algorithm.
+///
+/// When an algorithm is forced, if a searcher could not be constructed for it,
+/// then no searcher will be returned even if an alternative algorithm would
+/// work.
+#[derive(Clone, Debug)]
+enum ForceAlgorithm {
+    Teddy,
+    RabinKarp,
+}
+
+impl Default for Config {
+    fn default() -> Config {
+        Config::new()
+    }
+}
+
+impl Config {
+    /// Create a new default configuration. A default configuration uses
+    /// leftmost-first match semantics.
+    pub fn new() -> Config {
+        Config {
+            kind: MatchKind::LeftmostFirst,
+            force: None,
+            only_teddy_fat: None,
+            only_teddy_256bit: None,
+            heuristic_pattern_limits: true,
+        }
+    }
+
+    /// Create a packed builder from this configuration. The builder can be
+    /// used to accumulate patterns and create a [`Searcher`] from them.
+    pub fn builder(&self) -> Builder {
+        Builder::from_config(self.clone())
+    }
+
+    /// Set the match semantics for this configuration.
+    pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config {
+        self.kind = kind;
+        self
+    }
+
+    /// An undocumented method for forcing the use of the Teddy algorithm.
+    ///
+    /// This is only exposed for more precise testing and benchmarks. Callers
+    /// should not use it as it is not part of the API stability guarantees of
+    /// this crate.
+    #[doc(hidden)]
+    pub fn only_teddy(&mut self, yes: bool) -> &mut Config {
+        if yes {
+            self.force = Some(ForceAlgorithm::Teddy);
+        } else {
+            self.force = None;
+        }
+        self
+    }
+
+    /// An undocumented method for forcing the use of the Fat Teddy algorithm.
+    ///
+    /// This is only exposed for more precise testing and benchmarks. Callers
+    /// should not use it as it is not part of the API stability guarantees of
+    /// this crate.
+    #[doc(hidden)]
+    pub fn only_teddy_fat(&mut self, yes: Option<bool>) -> &mut Config {
+        self.only_teddy_fat = yes;
+        self
+    }
+
+    /// An undocumented method for forcing the use of SSE (`Some(false)`) or
+    /// AVX (`Some(true)`) algorithms.
+    ///
+    /// This is only exposed for more precise testing and benchmarks. Callers
+    /// should not use it as it is not part of the API stability guarantees of
+    /// this crate.
+    #[doc(hidden)]
+    pub fn only_teddy_256bit(&mut self, yes: Option<bool>) -> &mut Config {
+        self.only_teddy_256bit = yes;
+        self
+    }
+
+    /// An undocumented method for forcing the use of the Rabin-Karp algorithm.
+    ///
+    /// This is only exposed for more precise testing and benchmarks. Callers
+    /// should not use it as it is not part of the API stability guarantees of
+    /// this crate.
+    #[doc(hidden)]
+    pub fn only_rabin_karp(&mut self, yes: bool) -> &mut Config {
+        if yes {
+            self.force = Some(ForceAlgorithm::RabinKarp);
+        } else {
+            self.force = None;
+        }
+        self
+    }
+
+    /// Request that heuristic limitations on the number of patterns be
+    /// employed. This useful to disable for benchmarking where one wants to
+    /// explore how Teddy performs on large number of patterns even if the
+    /// heuristics would otherwise refuse construction.
+    ///
+    /// This is enabled by default.
+    pub fn heuristic_pattern_limits(&mut self, yes: bool) -> &mut Config {
+        self.heuristic_pattern_limits = yes;
+        self
+    }
+}
+
+/// A builder for constructing a packed searcher from a collection of patterns.
+///
+/// # Example
+///
+/// This example shows how to use a builder to construct a searcher. By
+/// default, leftmost-first match semantics are used.
+///
+/// ```
+/// use aho_corasick::{packed::{Builder, MatchKind}, PatternID};
+///
+/// # fn example() -> Option<()> {
+/// let searcher = Builder::new()
+///     .add("foobar")
+///     .add("foo")
+///     .build()?;
+/// let matches: Vec<PatternID> = searcher
+///     .find_iter("foobar")
+///     .map(|mat| mat.pattern())
+///     .collect();
+/// assert_eq!(vec![PatternID::ZERO], matches);
+/// # Some(()) }
+/// # if cfg!(all(feature = "std", any(
+/// #     target_arch = "x86_64", target_arch = "aarch64",
+/// # ))) {
+/// #     example().unwrap()
+/// # } else {
+/// #     assert!(example().is_none());
+/// # }
+/// ```
+#[derive(Clone, Debug)]
+pub struct Builder {
+    /// The configuration of this builder and subsequent matcher.
+    config: Config,
+    /// Set to true if the builder detects that a matcher cannot be built.
+    inert: bool,
+    /// The patterns provided by the caller.
+    patterns: Patterns,
+}
+
+impl Builder {
+    /// Create a new builder for constructing a multi-pattern searcher. This
+    /// constructor uses the default configuration.
+    pub fn new() -> Builder {
+        Builder::from_config(Config::new())
+    }
+
+    fn from_config(config: Config) -> Builder {
+        Builder { config, inert: false, patterns: Patterns::new() }
+    }
+
+    /// Build a searcher from the patterns added to this builder so far.
+    pub fn build(&self) -> Option<Searcher> {
+        if self.inert || self.patterns.is_empty() {
+            return None;
+        }
+        let mut patterns = self.patterns.clone();
+        patterns.set_match_kind(self.config.kind);
+        let patterns = Arc::new(patterns);
+        let rabinkarp = RabinKarp::new(&patterns);
+        // Effectively, we only want to return a searcher if we can use Teddy,
+        // since Teddy is our only fast packed searcher at the moment.
+        // Rabin-Karp is only used when searching haystacks smaller than what
+        // Teddy can support. Thus, the only way to get a Rabin-Karp searcher
+        // is to force it using undocumented APIs (for tests/benchmarks).
+        let (search_kind, minimum_len) = match self.config.force {
+            None | Some(ForceAlgorithm::Teddy) => {
+                debug!("trying to build Teddy packed matcher");
+                let teddy = match self.build_teddy(Arc::clone(&patterns)) {
+                    None => return None,
+                    Some(teddy) => teddy,
+                };
+                let minimum_len = teddy.minimum_len();
+                (SearchKind::Teddy(teddy), minimum_len)
+            }
+            Some(ForceAlgorithm::RabinKarp) => {
+                debug!("using Rabin-Karp packed matcher");
+                (SearchKind::RabinKarp, 0)
+            }
+        };
+        Some(Searcher { patterns, rabinkarp, search_kind, minimum_len })
+    }
+
+    fn build_teddy(&self, patterns: Arc<Patterns>) -> Option<teddy::Searcher> {
+        teddy::Builder::new()
+            .only_256bit(self.config.only_teddy_256bit)
+            .only_fat(self.config.only_teddy_fat)
+            .heuristic_pattern_limits(self.config.heuristic_pattern_limits)
+            .build(patterns)
+    }
+
+    /// Add the given pattern to this set to match.
+    ///
+    /// The order in which patterns are added is significant. Namely, when
+    /// using leftmost-first match semantics, then when multiple patterns can
+    /// match at a particular location, the pattern that was added first is
+    /// used as the match.
+    ///
+    /// If the number of patterns added exceeds the amount supported by packed
+    /// searchers, then the builder will stop accumulating patterns and render
+    /// itself inert. At this point, constructing a searcher will always return
+    /// `None`.
+    pub fn add<P: AsRef<[u8]>>(&mut self, pattern: P) -> &mut Builder {
+        if self.inert {
+            return self;
+        } else if self.patterns.len() >= PATTERN_LIMIT {
+            self.inert = true;
+            self.patterns.reset();
+            return self;
+        }
+        // Just in case PATTERN_LIMIT increases beyond u16::MAX.
+        assert!(self.patterns.len() <= core::u16::MAX as usize);
+
+        let pattern = pattern.as_ref();
+        if pattern.is_empty() {
+            self.inert = true;
+            self.patterns.reset();
+            return self;
+        }
+        self.patterns.add(pattern);
+        self
+    }
+
+    /// Add the given iterator of patterns to this set to match.
+    ///
+    /// The iterator must yield elements that can be converted into a `&[u8]`.
+    ///
+    /// The order in which patterns are added is significant. Namely, when
+    /// using leftmost-first match semantics, then when multiple patterns can
+    /// match at a particular location, the pattern that was added first is
+    /// used as the match.
+    ///
+    /// If the number of patterns added exceeds the amount supported by packed
+    /// searchers, then the builder will stop accumulating patterns and render
+    /// itself inert. At this point, constructing a searcher will always return
+    /// `None`.
+    pub fn extend<I, P>(&mut self, patterns: I) -> &mut Builder
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        for p in patterns {
+            self.add(p);
+        }
+        self
+    }
+
+    /// Returns the number of patterns added to this builder.
+    pub fn len(&self) -> usize {
+        self.patterns.len()
+    }
+
+    /// Returns the length, in bytes, of the shortest pattern added.
+    pub fn minimum_len(&self) -> usize {
+        self.patterns.minimum_len()
+    }
+}
+
+impl Default for Builder {
+    fn default() -> Builder {
+        Builder::new()
+    }
+}
+
+/// A packed searcher for quickly finding occurrences of multiple patterns.
+///
+/// If callers need more flexible construction, or if one wants to change the
+/// match semantics (either leftmost-first or leftmost-longest), then one can
+/// use the [`Config`] and/or [`Builder`] types for more fine grained control.
+///
+/// # Example
+///
+/// This example shows how to create a searcher from an iterator of patterns.
+/// By default, leftmost-first match semantics are used.
+///
+/// ```
+/// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
+///
+/// # fn example() -> Option<()> {
+/// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
+/// let matches: Vec<PatternID> = searcher
+///     .find_iter("foobar")
+///     .map(|mat| mat.pattern())
+///     .collect();
+/// assert_eq!(vec![PatternID::ZERO], matches);
+/// # Some(()) }
+/// # if cfg!(all(feature = "std", any(
+/// #     target_arch = "x86_64", target_arch = "aarch64",
+/// # ))) {
+/// #     example().unwrap()
+/// # } else {
+/// #     assert!(example().is_none());
+/// # }
+/// ```
+#[derive(Clone, Debug)]
+pub struct Searcher {
+    patterns: Arc<Patterns>,
+    rabinkarp: RabinKarp,
+    search_kind: SearchKind,
+    minimum_len: usize,
+}
+
+#[derive(Clone, Debug)]
+enum SearchKind {
+    Teddy(teddy::Searcher),
+    RabinKarp,
+}
+
+impl Searcher {
+    /// A convenience function for constructing a searcher from an iterator
+    /// of things that can be converted to a `&[u8]`.
+    ///
+    /// If a searcher could not be constructed (either because of an
+    /// unsupported CPU or because there are too many patterns), then `None`
+    /// is returned.
+    ///
+    /// # Example
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
+    ///
+    /// # fn example() -> Option<()> {
+    /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
+    /// let matches: Vec<PatternID> = searcher
+    ///     .find_iter("foobar")
+    ///     .map(|mat| mat.pattern())
+    ///     .collect();
+    /// assert_eq!(vec![PatternID::ZERO], matches);
+    /// # Some(()) }
+    /// # if cfg!(all(feature = "std", any(
+    /// #     target_arch = "x86_64", target_arch = "aarch64",
+    /// # ))) {
+    /// #     example().unwrap()
+    /// # } else {
+    /// #     assert!(example().is_none());
+    /// # }
+    /// ```
+    pub fn new<I, P>(patterns: I) -> Option<Searcher>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        Builder::new().extend(patterns).build()
+    }
+
+    /// A convenience function for calling `Config::new()`.
+    ///
+    /// This is useful for avoiding an additional import.
+    pub fn config() -> Config {
+        Config::new()
+    }
+
+    /// A convenience function for calling `Builder::new()`.
+    ///
+    /// This is useful for avoiding an additional import.
+    pub fn builder() -> Builder {
+        Builder::new()
+    }
+
+    /// Return the first occurrence of any of the patterns in this searcher,
+    /// according to its match semantics, in the given haystack. The `Match`
+    /// returned will include the identifier of the pattern that matched, which
+    /// corresponds to the index of the pattern (starting from `0`) in which it
+    /// was added.
+    ///
+    /// # Example
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
+    ///
+    /// # fn example() -> Option<()> {
+    /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
+    /// let mat = searcher.find("foobar")?;
+    /// assert_eq!(PatternID::ZERO, mat.pattern());
+    /// assert_eq!(0, mat.start());
+    /// assert_eq!(6, mat.end());
+    /// # Some(()) }
+    /// # if cfg!(all(feature = "std", any(
+    /// #     target_arch = "x86_64", target_arch = "aarch64",
+    /// # ))) {
+    /// #     example().unwrap()
+    /// # } else {
+    /// #     assert!(example().is_none());
+    /// # }
+    /// ```
+    #[inline]
+    pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<Match> {
+        let haystack = haystack.as_ref();
+        self.find_in(haystack, Span::from(0..haystack.len()))
+    }
+
+    /// Return the first occurrence of any of the patterns in this searcher,
+    /// according to its match semantics, in the given haystack starting from
+    /// the given position.
+    ///
+    /// The `Match` returned will include the identifier of the pattern that
+    /// matched, which corresponds to the index of the pattern (starting from
+    /// `0`) in which it was added. The offsets in the `Match` will be relative
+    /// to the start of `haystack` (and not `at`).
+    ///
+    /// # Example
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID, Span};
+    ///
+    /// # fn example() -> Option<()> {
+    /// let haystack = "foofoobar";
+    /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
+    /// let mat = searcher.find_in(haystack, Span::from(3..haystack.len()))?;
+    /// assert_eq!(PatternID::ZERO, mat.pattern());
+    /// assert_eq!(3, mat.start());
+    /// assert_eq!(9, mat.end());
+    /// # Some(()) }
+    /// # if cfg!(all(feature = "std", any(
+    /// #     target_arch = "x86_64", target_arch = "aarch64",
+    /// # ))) {
+    /// #     example().unwrap()
+    /// # } else {
+    /// #     assert!(example().is_none());
+    /// # }
+    /// ```
+    #[inline]
+    pub fn find_in<B: AsRef<[u8]>>(
+        &self,
+        haystack: B,
+        span: Span,
+    ) -> Option<Match> {
+        let haystack = haystack.as_ref();
+        match self.search_kind {
+            SearchKind::Teddy(ref teddy) => {
+                if haystack[span].len() < teddy.minimum_len() {
+                    return self.find_in_slow(haystack, span);
+                }
+                teddy.find(&haystack[..span.end], span.start)
+            }
+            SearchKind::RabinKarp => {
+                self.rabinkarp.find_at(&haystack[..span.end], span.start)
+            }
+        }
+    }
+
+    /// Return an iterator of non-overlapping occurrences of the patterns in
+    /// this searcher, according to its match semantics, in the given haystack.
+    ///
+    /// # Example
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
+    ///
+    /// # fn example() -> Option<()> {
+    /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
+    /// let matches: Vec<PatternID> = searcher
+    ///     .find_iter("foobar fooba foofoo")
+    ///     .map(|mat| mat.pattern())
+    ///     .collect();
+    /// assert_eq!(vec![
+    ///     PatternID::must(0),
+    ///     PatternID::must(1),
+    ///     PatternID::must(1),
+    ///     PatternID::must(1),
+    /// ], matches);
+    /// # Some(()) }
+    /// # if cfg!(all(feature = "std", any(
+    /// #     target_arch = "x86_64", target_arch = "aarch64",
+    /// # ))) {
+    /// #     example().unwrap()
+    /// # } else {
+    /// #     assert!(example().is_none());
+    /// # }
+    /// ```
+    #[inline]
+    pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>(
+        &'a self,
+        haystack: &'b B,
+    ) -> FindIter<'a, 'b> {
+        let haystack = haystack.as_ref();
+        let span = Span::from(0..haystack.len());
+        FindIter { searcher: self, haystack, span }
+    }
+
+    /// Returns the match kind used by this packed searcher.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use aho_corasick::packed::{MatchKind, Searcher};
+    ///
+    /// # fn example() -> Option<()> {
+    /// let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
+    /// // leftmost-first is the default.
+    /// assert_eq!(&MatchKind::LeftmostFirst, searcher.match_kind());
+    /// # Some(()) }
+    /// # if cfg!(all(feature = "std", any(
+    /// #     target_arch = "x86_64", target_arch = "aarch64",
+    /// # ))) {
+    /// #     example().unwrap()
+    /// # } else {
+    /// #     assert!(example().is_none());
+    /// # }
+    /// ```
+    #[inline]
+    pub fn match_kind(&self) -> &MatchKind {
+        self.patterns.match_kind()
+    }
+
+    /// Returns the minimum length of a haystack that is required in order for
+    /// packed searching to be effective.
+    ///
+    /// In some cases, the underlying packed searcher may not be able to search
+    /// very short haystacks. When that occurs, the implementation will defer
+    /// to a slower non-packed searcher (which is still generally faster than
+    /// Aho-Corasick for a small number of patterns). However, callers may
+    /// want to avoid ever using the slower variant, which one can do by
+    /// never passing a haystack shorter than the minimum length returned by
+    /// this method.
+    #[inline]
+    pub fn minimum_len(&self) -> usize {
+        self.minimum_len
+    }
+
+    /// Returns the approximate total amount of heap used by this searcher, in
+    /// units of bytes.
+    #[inline]
+    pub fn memory_usage(&self) -> usize {
+        self.patterns.memory_usage()
+            + self.rabinkarp.memory_usage()
+            + self.search_kind.memory_usage()
+    }
+
+    /// Use a slow (non-packed) searcher.
+    ///
+    /// This is useful when a packed searcher could be constructed, but could
+    /// not be used to search a specific haystack. For example, if Teddy was
+    /// built but the haystack is smaller than ~34 bytes, then Teddy might not
+    /// be able to run.
+    fn find_in_slow(&self, haystack: &[u8], span: Span) -> Option<Match> {
+        self.rabinkarp.find_at(&haystack[..span.end], span.start)
+    }
+}
+
+impl SearchKind {
+    fn memory_usage(&self) -> usize {
+        match *self {
+            SearchKind::Teddy(ref ted) => ted.memory_usage(),
+            SearchKind::RabinKarp => 0,
+        }
+    }
+}
+
+/// An iterator over non-overlapping matches from a packed searcher.
+///
+/// The lifetime `'s` refers to the lifetime of the underlying [`Searcher`],
+/// while the lifetime `'h` refers to the lifetime of the haystack being
+/// searched.
+#[derive(Debug)]
+pub struct FindIter<'s, 'h> {
+    searcher: &'s Searcher,
+    haystack: &'h [u8],
+    span: Span,
+}
+
+impl<'s, 'h> Iterator for FindIter<'s, 'h> {
+    type Item = Match;
+
+    fn next(&mut self) -> Option<Match> {
+        if self.span.start > self.span.end {
+            return None;
+        }
+        match self.searcher.find_in(&self.haystack, self.span) {
+            None => None,
+            Some(m) => {
+                self.span.start = m.end();
+                Some(m)
+            }
+        }
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/packed/ext.rs b/third_party/rust/aho-corasick/src/packed/ext.rs
new file mode 100644
index 0000000000..b689642bca
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/ext.rs
@@ -0,0 +1,39 @@
+/// A trait for adding some helper routines to pointers.
+pub(crate) trait Pointer {
+    /// Returns the distance, in units of `T`, between `self` and `origin`.
+    ///
+    /// # Safety
+    ///
+    /// Same as `ptr::offset_from` in addition to `self >= origin`.
+    unsafe fn distance(self, origin: Self) -> usize;
+
+    /// Casts this pointer to `usize`.
+    ///
+    /// Callers should not convert the `usize` back to a pointer if at all
+    /// possible. (And if you believe it's necessary, open an issue to discuss
+    /// why. Otherwise, it has the potential to violate pointer provenance.)
+    /// The purpose of this function is just to be able to do arithmetic, i.e.,
+    /// computing offsets or alignments.
+    fn as_usize(self) -> usize;
+}
+
+impl<T> Pointer for *const T {
+    unsafe fn distance(self, origin: *const T) -> usize {
+        // TODO: Replace with `ptr::sub_ptr` once stabilized.
+        usize::try_from(self.offset_from(origin)).unwrap_unchecked()
+    }
+
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
+
+impl<T> Pointer for *mut T {
+    unsafe fn distance(self, origin: *mut T) -> usize {
+        (self as *const T).distance(origin as *const T)
+    }
+
+    fn as_usize(self) -> usize {
+        (self as *const T).as_usize()
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/packed/mod.rs b/third_party/rust/aho-corasick/src/packed/mod.rs
new file mode 100644
index 0000000000..3990bc9330
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/mod.rs
@@ -0,0 +1,120 @@
+/*!
+Provides packed multiple substring search, principally for a small number of
+patterns.
+
+This sub-module provides vectorized routines for quickly finding
+matches of a small number of patterns. In general, users of this crate
+shouldn't need to interface with this module directly, as the primary
+[`AhoCorasick`](crate::AhoCorasick) searcher will use these routines
+automatically as a prefilter when applicable. However, in some cases, callers
+may want to bypass the Aho-Corasick machinery entirely and use this vectorized
+searcher directly.
+
+# Overview
+
+The primary types in this sub-module are:
+
+* [`Searcher`] executes the actual search algorithm to report matches in a
+haystack.
+* [`Builder`] accumulates patterns incrementally and can construct a
+`Searcher`.
+* [`Config`] permits tuning the searcher, and itself will produce a `Builder`
+(which can then be used to build a `Searcher`). Currently, the only tuneable
+knob are the match semantics, but this may be expanded in the future.
+
+# Examples
+
+This example shows how to create a searcher from an iterator of patterns.
+By default, leftmost-first match semantics are used. (See the top-level
+[`MatchKind`] type for more details about match semantics, which apply
+similarly to packed substring search.)
+
+```
+use aho_corasick::{packed::{MatchKind, Searcher}, PatternID};
+
+# fn example() -> Option<()> {
+let searcher = Searcher::new(["foobar", "foo"].iter().cloned())?;
+let matches: Vec<PatternID> = searcher
+    .find_iter("foobar")
+    .map(|mat| mat.pattern())
+    .collect();
+assert_eq!(vec![PatternID::ZERO], matches);
+# Some(()) }
+# if cfg!(all(feature = "std", any(
+#     target_arch = "x86_64", target_arch = "aarch64",
+# ))) {
+#     example().unwrap()
+# } else {
+#     assert!(example().is_none());
+# }
+```
+
+This example shows how to use [`Config`] to change the match semantics to
+leftmost-longest:
+
+```
+use aho_corasick::{packed::{Config, MatchKind}, PatternID};
+
+# fn example() -> Option<()> {
+let searcher = Config::new()
+    .match_kind(MatchKind::LeftmostLongest)
+    .builder()
+    .add("foo")
+    .add("foobar")
+    .build()?;
+let matches: Vec<PatternID> = searcher
+    .find_iter("foobar")
+    .map(|mat| mat.pattern())
+    .collect();
+assert_eq!(vec![PatternID::must(1)], matches);
+# Some(()) }
+# if cfg!(all(feature = "std", any(
+#     target_arch = "x86_64", target_arch = "aarch64",
+# ))) {
+#     example().unwrap()
+# } else {
+#     assert!(example().is_none());
+# }
+```
+
+# Packed substring searching
+
+Packed substring searching refers to the use of SIMD (Single Instruction,
+Multiple Data) to accelerate the detection of matches in a haystack. Unlike
+conventional algorithms, such as Aho-Corasick, SIMD algorithms for substring
+search tend to do better with a small number of patterns, where as Aho-Corasick
+generally maintains reasonably consistent performance regardless of the number
+of patterns you give it. Because of this, the vectorized searcher in this
+sub-module cannot be used as a general purpose searcher, since building the
+searcher may fail even when given a small number of patterns. However, in
+exchange, when searching for a small number of patterns, searching can be quite
+a bit faster than Aho-Corasick (sometimes by an order of magnitude).
+
+The key take away here is that constructing a searcher from a list of patterns
+is a fallible operation with no clear rules for when it will fail. While the
+precise conditions under which building a searcher can fail is specifically an
+implementation detail, here are some common reasons:
+
+* Too many patterns were given. Typically, the limit is on the order of 100 or
+  so, but this limit may fluctuate based on available CPU features.
+* The available packed algorithms require CPU features that aren't available.
+  For example, currently, this crate only provides packed algorithms for
+  `x86_64` and `aarch64`. Therefore, constructing a packed searcher on any
+  other target will always fail.
+* Zero patterns were given, or one of the patterns given was empty. Packed
+  searchers require at least one pattern and that all patterns are non-empty.
+* Something else about the nature of the patterns (typically based on
+  heuristics) suggests that a packed searcher would perform very poorly, so
+  no searcher is built.
+*/
+
+pub use crate::packed::api::{Builder, Config, FindIter, MatchKind, Searcher};
+
+mod api;
+mod ext;
+mod pattern;
+mod rabinkarp;
+mod teddy;
+#[cfg(all(feature = "std", test))]
+mod tests;
+mod vector;
diff --git a/third_party/rust/aho-corasick/src/packed/pattern.rs b/third_party/rust/aho-corasick/src/packed/pattern.rs
new file mode 100644
index 0000000000..95aca4d95b
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/pattern.rs
@@ -0,0 +1,480 @@
+use core::{cmp, fmt, mem, u16, usize};
+
+use alloc::{boxed::Box, string::String, vec, vec::Vec};
+
+use crate::{
+    packed::{api::MatchKind, ext::Pointer},
+    PatternID,
+};
+
+/// A non-empty collection of non-empty patterns to search for.
+///
+/// This collection of patterns is what is passed around to both execute
+/// searches and to construct the searchers themselves. Namely, this permits
+/// searches to avoid copying all of the patterns, and allows us to keep only
+/// one copy throughout all packed searchers.
+///
+/// Note that this collection is not a set. The same pattern can appear more
+/// than once.
+#[derive(Clone, Debug)]
+pub(crate) struct Patterns {
+    /// The match semantics supported by this collection of patterns.
+    ///
+    /// The match semantics determines the order of the iterator over patterns.
+    /// For leftmost-first, patterns are provided in the same order as were
+    /// provided by the caller. For leftmost-longest, patterns are provided in
+    /// descending order of length, with ties broken by the order in which they
+    /// were provided by the caller.
+    kind: MatchKind,
+    /// The collection of patterns, indexed by their identifier.
+    by_id: Vec<Vec<u8>>,
+    /// The order of patterns defined for iteration, given by pattern
+    /// identifiers. The order of `by_id` and `order` is always the same for
+    /// leftmost-first semantics, but may be different for leftmost-longest
+    /// semantics.
+    order: Vec<PatternID>,
+    /// The length of the smallest pattern, in bytes.
+    minimum_len: usize,
+    /// The total number of pattern bytes across the entire collection. This
+    /// is used for reporting total heap usage in constant time.
+    total_pattern_bytes: usize,
+}
+
+// BREADCRUMBS: I think we want to experiment with a different bucket
+// representation. Basically, each bucket is just a Range<usize> to a single
+// contiguous allocation? Maybe length-prefixed patterns or something? The
+// idea is to try to get rid of the pointer chasing in verification. I don't
+// know that that is the issue, but I suspect it is.
+
+impl Patterns {
+    /// Create a new collection of patterns for the given match semantics. The
+    /// ID of each pattern is the index of the pattern at which it occurs in
+    /// the `by_id` slice.
+    ///
+    /// If any of the patterns in the slice given are empty, then this panics.
+    /// Similarly, if the number of patterns given is zero, then this also
+    /// panics.
+    pub(crate) fn new() -> Patterns {
+        Patterns {
+            kind: MatchKind::default(),
+            by_id: vec![],
+            order: vec![],
+            minimum_len: usize::MAX,
+            total_pattern_bytes: 0,
+        }
+    }
+
+    /// Add a pattern to this collection.
+    ///
+    /// This panics if the pattern given is empty.
+    pub(crate) fn add(&mut self, bytes: &[u8]) {
+        assert!(!bytes.is_empty());
+        assert!(self.by_id.len() <= u16::MAX as usize);
+
+        let id = PatternID::new(self.by_id.len()).unwrap();
+        self.order.push(id);
+        self.by_id.push(bytes.to_vec());
+        self.minimum_len = cmp::min(self.minimum_len, bytes.len());
+        self.total_pattern_bytes += bytes.len();
+    }
+
+    /// Set the match kind semantics for this collection of patterns.
+    ///
+    /// If the kind is not set, then the default is leftmost-first.
+    pub(crate) fn set_match_kind(&mut self, kind: MatchKind) {
+        self.kind = kind;
+        match self.kind {
+            MatchKind::LeftmostFirst => {
+                self.order.sort();
+            }
+            MatchKind::LeftmostLongest => {
+                let (order, by_id) = (&mut self.order, &mut self.by_id);
+                order.sort_by(|&id1, &id2| {
+                    by_id[id1].len().cmp(&by_id[id2].len()).reverse()
+                });
+            }
+        }
+    }
+
+    /// Return the number of patterns in this collection.
+    ///
+    /// This is guaranteed to be greater than zero.
+    pub(crate) fn len(&self) -> usize {
+        self.by_id.len()
+    }
+
+    /// Returns true if and only if this collection of patterns is empty.
+    pub(crate) fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns the approximate total amount of heap used by these patterns, in
+    /// units of bytes.
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.order.len() * mem::size_of::<PatternID>()
+            + self.by_id.len() * mem::size_of::<Vec<u8>>()
+            + self.total_pattern_bytes
+    }
+
+    /// Clears all heap memory associated with this collection of patterns and
+    /// resets all state such that it is a valid empty collection.
+    pub(crate) fn reset(&mut self) {
+        self.kind = MatchKind::default();
+        self.by_id.clear();
+        self.order.clear();
+        self.minimum_len = usize::MAX;
+    }
+
+    /// Returns the length, in bytes, of the smallest pattern.
+    ///
+    /// This is guaranteed to be at least one.
+    pub(crate) fn minimum_len(&self) -> usize {
+        self.minimum_len
+    }
+
+    /// Returns the match semantics used by these patterns.
+    pub(crate) fn match_kind(&self) -> &MatchKind {
+        &self.kind
+    }
+
+    /// Return the pattern with the given identifier. If such a pattern does
+    /// not exist, then this panics.
+    pub(crate) fn get(&self, id: PatternID) -> Pattern<'_> {
+        Pattern(&self.by_id[id])
+    }
+
+    /// Return the pattern with the given identifier without performing bounds
+    /// checks.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that a pattern with the given identifier exists
+    /// before using this method.
+    pub(crate) unsafe fn get_unchecked(&self, id: PatternID) -> Pattern<'_> {
+        Pattern(self.by_id.get_unchecked(id.as_usize()))
+    }
+
+    /// Return an iterator over all the patterns in this collection, in the
+    /// order in which they should be matched.
+    ///
+    /// Specifically, in a naive multi-pattern matcher, the following is
+    /// guaranteed to satisfy the match semantics of this collection of
+    /// patterns:
+    ///
+    /// ```ignore
+    /// for i in 0..haystack.len():
+    ///   for p in patterns.iter():
+    ///     if haystack[i..].starts_with(p.bytes()):
+    ///       return Match(p.id(), i, i + p.bytes().len())
+    /// ```
+    ///
+    /// Namely, among the patterns in a collection, if they are matched in
+    /// the order provided by this iterator, then the result is guaranteed
+    /// to satisfy the correct match semantics. (Either leftmost-first or
+    /// leftmost-longest.)
+    pub(crate) fn iter(&self) -> PatternIter<'_> {
+        PatternIter { patterns: self, i: 0 }
+    }
+}
+
+/// An iterator over the patterns in the `Patterns` collection.
+///
+/// The order of the patterns provided by this iterator is consistent with the
+/// match semantics of the originating collection of patterns.
+///
+/// The lifetime `'p` corresponds to the lifetime of the collection of patterns
+/// this is iterating over.
+#[derive(Debug)]
+pub(crate) struct PatternIter<'p> {
+    patterns: &'p Patterns,
+    i: usize,
+}
+
+impl<'p> Iterator for PatternIter<'p> {
+    type Item = (PatternID, Pattern<'p>);
+
+    fn next(&mut self) -> Option<(PatternID, Pattern<'p>)> {
+        if self.i >= self.patterns.len() {
+            return None;
+        }
+        let id = self.patterns.order[self.i];
+        let p = self.patterns.get(id);
+        self.i += 1;
+        Some((id, p))
+    }
+}
+
+/// A pattern that is used in packed searching.
+#[derive(Clone)]
+pub(crate) struct Pattern<'a>(&'a [u8]);
+
+impl<'a> fmt::Debug for Pattern<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Pattern")
+            .field("lit", &String::from_utf8_lossy(&self.0))
+            .finish()
+    }
+}
+
+impl<'p> Pattern<'p> {
+    /// Returns the length of this pattern, in bytes.
+    pub(crate) fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    /// Returns the bytes of this pattern.
+    pub(crate) fn bytes(&self) -> &[u8] {
+        &self.0
+    }
+
+    /// Returns the first `len` low nybbles from this pattern. If this pattern
+    /// is shorter than `len`, then this panics.
+    pub(crate) fn low_nybbles(&self, len: usize) -> Box<[u8]> {
+        let mut nybs = vec![0; len].into_boxed_slice();
+        for (i, byte) in self.bytes().iter().take(len).enumerate() {
+            nybs[i] = byte & 0xF;
+        }
+        nybs
+    }
+
+    /// Returns true if this pattern is a prefix of the given bytes.
+    #[inline(always)]
+    pub(crate) fn is_prefix(&self, bytes: &[u8]) -> bool {
+        is_prefix(bytes, self.bytes())
+    }
+
+    /// Returns true if this pattern is a prefix of the haystack given by the
+    /// raw `start` and `end` pointers.
+    ///
+    /// # Safety
+    ///
+    /// * It must be the case that `start < end` and that the distance between
+    /// them is at least equal to `V::BYTES`. That is, it must always be valid
+    /// to do at least an unaligned load of `V` at `start`.
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    #[inline(always)]
+    pub(crate) unsafe fn is_prefix_raw(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> bool {
+        let patlen = self.bytes().len();
+        let haylen = end.distance(start);
+        if patlen > haylen {
+            return false;
+        }
+        // SAFETY: We've checked that the haystack has length at least equal
+        // to this pattern. All other safety concerns are the responsibility
+        // of the caller.
+        is_equal_raw(start, self.bytes().as_ptr(), patlen)
+    }
+}
+
+/// Returns true if and only if `needle` is a prefix of `haystack`.
+///
+/// This uses a latency optimized variant of `memcmp` internally which *might*
+/// make this faster for very short strings.
+///
+/// # Inlining
+///
+/// This routine is marked `inline(always)`. If you want to call this function
+/// in a way that is not always inlined, you'll need to wrap a call to it in
+/// another function that is marked as `inline(never)` or just `inline`.
+#[inline(always)]
+fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool {
+    if needle.len() > haystack.len() {
+        return false;
+    }
+    // SAFETY: Our pointers are derived directly from borrowed slices which
+    // uphold all of our safety guarantees except for length. We account for
+    // length with the check above.
+    unsafe { is_equal_raw(haystack.as_ptr(), needle.as_ptr(), needle.len()) }
+}
+
+/// Compare corresponding bytes in `x` and `y` for equality.
+///
+/// That is, this returns true if and only if `x.len() == y.len()` and
+/// `x[i] == y[i]` for all `0 <= i < x.len()`.
+///
+/// Note that this isn't used. We only use it in tests as a convenient way
+/// of testing `is_equal_raw`.
+///
+/// # Inlining
+///
+/// This routine is marked `inline(always)`. If you want to call this function
+/// in a way that is not always inlined, you'll need to wrap a call to it in
+/// another function that is marked as `inline(never)` or just `inline`.
+///
+/// # Motivation
+///
+/// Why not use slice equality instead? Well, slice equality usually results in
+/// a call out to the current platform's `libc` which might not be inlineable
+/// or have other overhead. This routine isn't guaranteed to be a win, but it
+/// might be in some cases.
+#[cfg(test)]
+#[inline(always)]
+fn is_equal(x: &[u8], y: &[u8]) -> bool {
+    if x.len() != y.len() {
+        return false;
+    }
+    // SAFETY: Our pointers are derived directly from borrowed slices which
+    // uphold all of our safety guarantees except for length. We account for
+    // length with the check above.
+    unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) }
+}
+
+/// Compare `n` bytes at the given pointers for equality.
+///
+/// This returns true if and only if `*x.add(i) == *y.add(i)` for all
+/// `0 <= i < n`.
+///
+/// # Inlining
+///
+/// This routine is marked `inline(always)`. If you want to call this function
+/// in a way that is not always inlined, you'll need to wrap a call to it in
+/// another function that is marked as `inline(never)` or just `inline`.
+///
+/// # Motivation
+///
+/// Why not use slice equality instead? Well, slice equality usually results in
+/// a call out to the current platform's `libc` which might not be inlineable
+/// or have other overhead. This routine isn't guaranteed to be a win, but it
+/// might be in some cases.
+///
+/// # Safety
+///
+/// * Both `x` and `y` must be valid for reads of up to `n` bytes.
+/// * Both `x` and `y` must point to an initialized value.
+/// * Both `x` and `y` must each point to an allocated object and
+/// must either be in bounds or at most one byte past the end of the
+/// allocated object. `x` and `y` do not need to point to the same allocated
+/// object, but they may.
+/// * Both `x` and `y` must be _derived from_ a pointer to their respective
+/// allocated objects.
+/// * The distance between `x` and `x+n` must not overflow `isize`. Similarly
+/// for `y` and `y+n`.
+/// * The distance being in bounds must not rely on "wrapping around" the
+/// address space.
+#[inline(always)]
+unsafe fn is_equal_raw(mut x: *const u8, mut y: *const u8, n: usize) -> bool {
+    // If we don't have enough bytes to do 4-byte at a time loads, then
+    // handle each possible length specially. Note that I used to have a
+    // byte-at-a-time loop here and that turned out to be quite a bit slower
+    // for the memmem/pathological/defeat-simple-vector-alphabet benchmark.
+    if n < 4 {
+        return match n {
+            0 => true,
+            1 => x.read() == y.read(),
+            2 => {
+                x.cast::<u16>().read_unaligned()
+                    == y.cast::<u16>().read_unaligned()
+            }
+            // I also tried copy_nonoverlapping here and it looks like the
+            // codegen is the same.
+            3 => x.cast::<[u8; 3]>().read() == y.cast::<[u8; 3]>().read(),
+            _ => unreachable!(),
+        };
+    }
+    // When we have 4 or more bytes to compare, then proceed in chunks of 4 at
+    // a time using unaligned loads.
+    //
+    // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is
+    // that this particular version of memcmp is likely to be called with tiny
+    // needles. That means that if we do 8 byte loads, then a higher proportion
+    // of memcmp calls will use the slower variant above. With that said, this
+    // is a hypothesis and is only loosely supported by benchmarks. There's
+    // likely some improvement that could be made here. The main thing here
+    // though is to optimize for latency, not throughput.
+
+    // SAFETY: The caller is responsible for ensuring the pointers we get are
+    // valid and readable for at least `n` bytes. We also do unaligned loads,
+    // so there's no need to ensure we're aligned. (This is justified by this
+    // routine being specifically for short strings.)
+    let xend = x.add(n.wrapping_sub(4));
+    let yend = y.add(n.wrapping_sub(4));
+    while x < xend {
+        let vx = x.cast::<u32>().read_unaligned();
+        let vy = y.cast::<u32>().read_unaligned();
+        if vx != vy {
+            return false;
+        }
+        x = x.add(4);
+        y = y.add(4);
+    }
+    let vx = xend.cast::<u32>().read_unaligned();
+    let vy = yend.cast::<u32>().read_unaligned();
+    vx == vy
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn equals_different_lengths() {
+        assert!(!is_equal(b"", b"a"));
+        assert!(!is_equal(b"a", b""));
+        assert!(!is_equal(b"ab", b"a"));
+        assert!(!is_equal(b"a", b"ab"));
+    }
+
+    #[test]
+    fn equals_mismatch() {
+        let one_mismatch = [
+            (&b"a"[..], &b"x"[..]),
+            (&b"ab"[..], &b"ax"[..]),
+            (&b"abc"[..], &b"abx"[..]),
+            (&b"abcd"[..], &b"abcx"[..]),
+            (&b"abcde"[..], &b"abcdx"[..]),
+            (&b"abcdef"[..], &b"abcdex"[..]),
+            (&b"abcdefg"[..], &b"abcdefx"[..]),
+            (&b"abcdefgh"[..], &b"abcdefgx"[..]),
+            (&b"abcdefghi"[..], &b"abcdefghx"[..]),
+            (&b"abcdefghij"[..], &b"abcdefghix"[..]),
+            (&b"abcdefghijk"[..], &b"abcdefghijx"[..]),
+            (&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]),
+            (&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]),
+            (&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]),
+        ];
+        for (x, y) in one_mismatch {
+            assert_eq!(x.len(), y.len(), "lengths should match");
+            assert!(!is_equal(x, y));
+            assert!(!is_equal(y, x));
+        }
+    }
+
+    #[test]
+    fn equals_yes() {
+        assert!(is_equal(b"", b""));
+        assert!(is_equal(b"a", b"a"));
+        assert!(is_equal(b"ab", b"ab"));
+        assert!(is_equal(b"abc", b"abc"));
+        assert!(is_equal(b"abcd", b"abcd"));
+        assert!(is_equal(b"abcde", b"abcde"));
+        assert!(is_equal(b"abcdef", b"abcdef"));
+        assert!(is_equal(b"abcdefg", b"abcdefg"));
+        assert!(is_equal(b"abcdefgh", b"abcdefgh"));
+        assert!(is_equal(b"abcdefghi", b"abcdefghi"));
+    }
+
+    #[test]
+    fn prefix() {
+        assert!(is_prefix(b"", b""));
+        assert!(is_prefix(b"a", b""));
+        assert!(is_prefix(b"ab", b""));
+        assert!(is_prefix(b"foo", b"foo"));
+        assert!(is_prefix(b"foobar", b"foo"));
+
+        assert!(!is_prefix(b"foo", b"fob"));
+        assert!(!is_prefix(b"foobar", b"fob"));
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/packed/rabinkarp.rs b/third_party/rust/aho-corasick/src/packed/rabinkarp.rs
new file mode 100644
index 0000000000..fdd8a6f0b4
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/rabinkarp.rs
@@ -0,0 +1,168 @@
+use alloc::{sync::Arc, vec, vec::Vec};
+
+use crate::{packed::pattern::Patterns, util::search::Match, PatternID};
+
+/// The type of the rolling hash used in the Rabin-Karp algorithm.
+type Hash = usize;
+
+/// The number of buckets to store our patterns in. We don't want this to be
+/// too big in order to avoid wasting memory, but we don't want it to be too
+/// small either to avoid spending too much time confirming literals.
+///
+/// The number of buckets MUST be a power of two. Otherwise, determining the
+/// bucket from a hash will slow down the code considerably. Using a power
+/// of two means `hash % NUM_BUCKETS` can compile down to a simple `and`
+/// instruction.
+const NUM_BUCKETS: usize = 64;
+
+/// An implementation of the Rabin-Karp algorithm. The main idea of this
+/// algorithm is to maintain a rolling hash as it moves through the input, and
+/// then check whether that hash corresponds to the same hash for any of the
+/// patterns we're looking for.
+///
+/// A draw back of naively scaling Rabin-Karp to multiple patterns is that
+/// it requires all of the patterns to be the same length, which in turn
+/// corresponds to the number of bytes to hash. We adapt this to work for
+/// multiple patterns of varying size by fixing the number of bytes to hash
+/// to be the length of the smallest pattern. We also split the patterns into
+/// several buckets to hopefully make the confirmation step faster.
+///
+/// Wikipedia has a decent explanation, if a bit heavy on the theory:
+/// https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
+///
+/// But ESMAJ provides something a bit more concrete:
+/// https://www-igm.univ-mlv.fr/~lecroq/string/node5.html
+#[derive(Clone, Debug)]
+pub(crate) struct RabinKarp {
+    /// The patterns we're searching for.
+    patterns: Arc<Patterns>,
+    /// The order of patterns in each bucket is significant. Namely, they are
+    /// arranged such that the first one to match is the correct match. This
+    /// may not necessarily correspond to the order provided by the caller.
+    /// For example, if leftmost-longest semantics are used, then the patterns
+    /// are sorted by their length in descending order. If leftmost-first
+    /// semantics are used, then the patterns are sorted by their pattern ID
+    /// in ascending order (which corresponds to the caller's order).
+    buckets: Vec<Vec<(Hash, PatternID)>>,
+    /// The length of the hashing window. Generally, this corresponds to the
+    /// length of the smallest pattern.
+    hash_len: usize,
+    /// The factor to subtract out of a hash before updating it with a new
+    /// byte.
+    hash_2pow: usize,
+}
+
+impl RabinKarp {
+    /// Compile a new Rabin-Karp matcher from the patterns given.
+    ///
+    /// This panics if any of the patterns in the collection are empty, or if
+    /// the collection is itself empty.
+    pub(crate) fn new(patterns: &Arc<Patterns>) -> RabinKarp {
+        assert!(patterns.len() >= 1);
+        let hash_len = patterns.minimum_len();
+        assert!(hash_len >= 1);
+
+        let mut hash_2pow = 1usize;
+        for _ in 1..hash_len {
+            hash_2pow = hash_2pow.wrapping_shl(1);
+        }
+
+        let mut rk = RabinKarp {
+            patterns: Arc::clone(patterns),
+            buckets: vec![vec![]; NUM_BUCKETS],
+            hash_len,
+            hash_2pow,
+        };
+        for (id, pat) in patterns.iter() {
+            let hash = rk.hash(&pat.bytes()[..rk.hash_len]);
+            let bucket = hash % NUM_BUCKETS;
+            rk.buckets[bucket].push((hash, id));
+        }
+        rk
+    }
+
+    /// Return the first matching pattern in the given haystack, begining the
+    /// search at `at`.
+    pub(crate) fn find_at(
+        &self,
+        haystack: &[u8],
+        mut at: usize,
+    ) -> Option<Match> {
+        assert_eq!(NUM_BUCKETS, self.buckets.len());
+
+        if at + self.hash_len > haystack.len() {
+            return None;
+        }
+        let mut hash = self.hash(&haystack[at..at + self.hash_len]);
+        loop {
+            let bucket = &self.buckets[hash % NUM_BUCKETS];
+            for &(phash, pid) in bucket {
+                if phash == hash {
+                    if let Some(c) = self.verify(pid, haystack, at) {
+                        return Some(c);
+                    }
+                }
+            }
+            if at + self.hash_len >= haystack.len() {
+                return None;
+            }
+            hash = self.update_hash(
+                hash,
+                haystack[at],
+                haystack[at + self.hash_len],
+            );
+            at += 1;
+        }
+    }
+
+    /// Returns the approximate total amount of heap used by this searcher, in
+    /// units of bytes.
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.buckets.len() * core::mem::size_of::<Vec<(Hash, PatternID)>>()
+            + self.patterns.len() * core::mem::size_of::<(Hash, PatternID)>()
+    }
+
+    /// Verify whether the pattern with the given id matches at
+    /// `haystack[at..]`.
+    ///
+    /// We tag this function as `cold` because it helps improve codegen.
+    /// Intuitively, it would seem like inlining it would be better. However,
+    /// the only time this is called and a match is not found is when there
+    /// there is a hash collision, or when a prefix of a pattern matches but
+    /// the entire pattern doesn't match. This is hopefully fairly rare, and
+    /// if it does occur a lot, it's going to be slow no matter what we do.
+    #[cold]
+    fn verify(
+        &self,
+        id: PatternID,
+        haystack: &[u8],
+        at: usize,
+    ) -> Option<Match> {
+        let pat = self.patterns.get(id);
+        if pat.is_prefix(&haystack[at..]) {
+            Some(Match::new(id, at..at + pat.len()))
+        } else {
+            None
+        }
+    }
+
+    /// Hash the given bytes.
+    fn hash(&self, bytes: &[u8]) -> Hash {
+        assert_eq!(self.hash_len, bytes.len());
+
+        let mut hash = 0usize;
+        for &b in bytes {
+            hash = hash.wrapping_shl(1).wrapping_add(b as usize);
+        }
+        hash
+    }
+
+    /// Update the hash given based on removing `old_byte` at the beginning
+    /// of some byte string, and appending `new_byte` to the end of that same
+    /// byte string.
+    fn update_hash(&self, prev: Hash, old_byte: u8, new_byte: u8) -> Hash {
+        prev.wrapping_sub((old_byte as usize).wrapping_mul(self.hash_2pow))
+            .wrapping_shl(1)
+            .wrapping_add(new_byte as usize)
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/packed/teddy/README.md b/third_party/rust/aho-corasick/src/packed/teddy/README.md
new file mode 100644
index 0000000000..f0928cbe5c
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/teddy/README.md
@@ -0,0 +1,386 @@
+Teddy is a SIMD accelerated multiple substring matching algorithm. The name
+and the core ideas in the algorithm were learned from the [Hyperscan][1_u]
+project. The implementation in this repository was mostly motivated for use in
+accelerating regex searches by searching for small sets of required literals
+extracted from the regex.
+
+
+# Background
+
+The key idea of Teddy is to do *packed* substring matching. In the literature,
+packed substring matching is the idea of examining multiple bytes in a haystack
+at a time to detect matches. Implementations of, for example, memchr (which
+detects matches of a single byte) have been doing this for years. Only
+recently, with the introduction of various SIMD instructions, has this been
+extended to substring matching. The PCMPESTRI instruction (and its relatives),
+for example, implements substring matching in hardware. It is, however, limited
+to substrings of length 16 bytes or fewer, but this restriction is fine in a
+regex engine, since we rarely care about the performance difference between
+searching for a 16 byte literal and a 16 + N literal; 16 is already long
+enough. The key downside of the PCMPESTRI instruction, on current (2016) CPUs
+at least, is its latency and throughput. As a result, it is often faster to
+do substring search with a Boyer-Moore (or Two-Way) variant and a well placed
+memchr to quickly skip through the haystack.
+
+There are fewer results from the literature on packed substring matching,
+and even fewer for packed multiple substring matching. Ben-Kiki et al. [2]
+describes use of PCMPESTRI for substring matching, but is mostly theoretical
+and hand-waves performance. There is other theoretical work done by Bille [3]
+as well.
+
+The rest of the work in the field, as far as I'm aware, is by Faro and Kulekci
+and is generally focused on multiple pattern search. Their first paper [4a]
+introduces the concept of a fingerprint, which is computed for every block of
+N bytes in every pattern. The haystack is then scanned N bytes at a time and
+a fingerprint is computed in the same way it was computed for blocks in the
+patterns. If the fingerprint corresponds to one that was found in a pattern,
+then a verification step follows to confirm that one of the substrings with the
+corresponding fingerprint actually matches at the current location. Various
+implementation tricks are employed to make sure the fingerprint lookup is fast;
+typically by truncating the fingerprint. (This may, of course, provoke more
+steps in the verification process, so a balance must be struck.)
+
+The main downside of [4a] is that the minimum substring length is 32 bytes,
+presumably because of how the algorithm uses certain SIMD instructions. This
+essentially makes it useless for general purpose regex matching, where a small
+number of short patterns is far more likely.
+
+Faro and Kulekci published another paper [4b] that is conceptually very similar
+to [4a]. The key difference is that it uses the CRC32 instruction (introduced
+as part of SSE 4.2) to compute fingerprint values. This also enables the
+algorithm to work effectively on substrings as short as 7 bytes with 4 byte
+windows. 7 bytes is unfortunately still too long. The window could be
+technically shrunk to 2 bytes, thereby reducing minimum length to 3, but the
+small window size ends up negating most performance benefits—and it's likely
+the common case in a general purpose regex engine.
+
+Faro and Kulekci also published [4c] that appears to be intended as a
+replacement to using PCMPESTRI. In particular, it is specifically motivated by
+the high throughput/latency time of PCMPESTRI and therefore chooses other SIMD
+instructions that are faster. While this approach works for short substrings,
+I personally couldn't see a way to generalize it to multiple substring search.
+
+Faro and Kulekci have another paper [4d] that I haven't been able to read
+because it is behind a paywall.
+
+
+# Teddy
+
+Finally, we get to Teddy. If the above literature review is complete, then it
+appears that Teddy is a novel algorithm. More than that, in my experience, it
+completely blows away the competition for short substrings, which is exactly
+what we want in a general purpose regex engine. Again, the algorithm appears
+to be developed by the authors of [Hyperscan][1_u]. Hyperscan was open sourced
+late 2015, and no earlier history could be found. Therefore, tracking the exact
+provenance of the algorithm with respect to the published literature seems
+difficult.
+
+At a high level, Teddy works somewhat similarly to the fingerprint algorithms
+published by Faro and Kulekci, but Teddy does it in a way that scales a bit
+better. Namely:
+
+1. Teddy's core algorithm scans the haystack in 16 (for SSE, or 32 for AVX)
+   byte chunks. 16 (or 32) is significant because it corresponds to the number
+   of bytes in a SIMD vector.
+2. Bitwise operations are performed on each chunk to discover if any region of
+   it matches a set of precomputed fingerprints from the patterns. If there are
+   matches, then a verification step is performed. In this implementation, our
+   verification step is naive. This can be improved upon.
+
+The details to make this work are quite clever. First, we must choose how to
+pick our fingerprints. In Hyperscan's implementation, I *believe* they use the
+last N bytes of each substring, where N must be at least the minimum length of
+any substring in the set being searched. In this implementation, we use the
+first N bytes of each substring. (The tradeoffs between these choices aren't
+yet clear to me.) We then must figure out how to quickly test whether an
+occurrence of any fingerprint from the set of patterns appears in a 16 byte
+block from the haystack. To keep things simple, let's assume N = 1 and examine
+some examples to motivate the approach. Here are our patterns:
+
+```ignore
+foo
+bar
+baz
+```
+
+The corresponding fingerprints, for N = 1, are `f`, `b` and `b`. Now let's set
+our 16 byte block to:
+
+```ignore
+bat cat foo bump
+xxxxxxxxxxxxxxxx
+```
+
+To cut to the chase, Teddy works by using bitsets. In particular, Teddy creates
+a mask that allows us to quickly compute membership of a fingerprint in a 16
+byte block that also tells which pattern the fingerprint corresponds to. In
+this case, our fingerprint is a single byte, so an appropriate abstraction is
+a map from a single byte to a list of patterns that contain that fingerprint:
+
+```ignore
+f |--> foo
+b |--> bar, baz
+```
+
+Now, all we need to do is figure out how to represent this map in vector space
+and use normal SIMD operations to perform a lookup. The first simplification
+we can make is to represent our patterns as bit fields occupying a single
+byte. This is important, because a single SIMD vector can store 16 bytes.
+
+```ignore
+f |--> 00000001
+b |--> 00000010, 00000100
+```
+
+How do we perform lookup though? It turns out that SSSE3 introduced a very cool
+instruction called PSHUFB. The instruction takes two SIMD vectors, `A` and `B`,
+and returns a third vector `C`. All vectors are treated as 16 8-bit integers.
+`C` is formed by `C[i] = A[B[i]]`. (This is a bit of a simplification, but true
+for the purposes of this algorithm. For full details, see [Intel's Intrinsics
+Guide][5_u].) This essentially lets us use the values in `B` to lookup values
+in `A`.
+
+If we could somehow cause `B` to contain our 16 byte block from the haystack,
+and if `A` could contain our bitmasks, then we'd end up with something like
+this for `A`:
+
+```ignore
+    0x00 0x01 ... 0x62      ... 0x66      ... 0xFF
+A = 0    0        00000110      00000001      0
+```
+
+And if `B` contains our window from our haystack, we could use shuffle to take
+the values from `B` and use them to look up our bitsets in `A`. But of course,
+we can't do this because `A` in the above example contains 256 bytes, which
+is much larger than the size of a SIMD vector.
+
+Nybbles to the rescue! A nybble is 4 bits. Instead of one mask to hold all of
+our bitsets, we can use two masks, where one mask corresponds to the lower four
+bits of our fingerprint and the other mask corresponds to the upper four bits.
+So our map now looks like:
+
+```ignore
+'f' & 0xF = 0x6 |--> 00000001
+'f' >> 4  = 0x6 |--> 00000111
+'b' & 0xF = 0x2 |--> 00000110
+'b' >> 4  = 0x6 |--> 00000111
+```
+
+Notice that the bitsets for each nybble correspond to the union of all
+fingerprints that contain that nybble. For example, both `f` and `b` have the
+same upper 4 bits but differ on the lower 4 bits. Putting this together, we
+have `A0`, `A1` and `B`, where `A0` is our mask for the lower nybble, `A1` is
+our mask for the upper nybble and `B` is our 16 byte block from the haystack:
+
+```ignore
+      0x00 0x01 0x02      0x03 ... 0x06      ... 0xF
+A0 =  0    0    00000110  0        00000001      0
+A1 =  0    0    0         0        00000111      0
+B  =  b    a    t         _        t             p
+B  =  0x62 0x61 0x74      0x20     0x74          0x70
+```
+
+But of course, we can't use `B` with `PSHUFB` yet, since its values are 8 bits,
+and we need indexes that are at most 4 bits (corresponding to one of 16
+values). We can apply the same transformation to split `B` into lower and upper
+nybbles as we did `A`. As before, `B0` corresponds to the lower nybbles and
+`B1` corresponds to the upper nybbles:
+
+```ignore
+     b   a   t   _   c   a   t   _   f   o   o   _   b   u   m   p
+B0 = 0x2 0x1 0x4 0x0 0x3 0x1 0x4 0x0 0x6 0xF 0xF 0x0 0x2 0x5 0xD 0x0
+B1 = 0x6 0x6 0x7 0x2 0x6 0x6 0x7 0x2 0x6 0x6 0x6 0x2 0x6 0x7 0x6 0x7
+```
+
+And now we have a nice correspondence. `B0` can index `A0` and `B1` can index
+`A1`. Here's what we get when we apply `C0 = PSHUFB(A0, B0)`:
+
+```ignore
+     b         a        ... f         o         ... p
+     A0[0x2]   A0[0x1]      A0[0x6]   A0[0xF]       A0[0x0]
+C0 = 00000110  0            00000001  0             0
+```
+
+And `C1 = PSHUFB(A1, B1)`:
+
+```ignore
+     b         a        ... f         o        ... p
+     A1[0x6]   A1[0x6]      A1[0x6]   A1[0x6]      A1[0x7]
+C1 = 00000111  00000111     00000111  00000111     0
+```
+
+Notice how neither one of `C0` or `C1` is guaranteed to report fully correct
+results all on its own. For example, `C1` claims that `b` is a fingerprint for
+the pattern `foo` (since `A1[0x6] = 00000111`), and that `o` is a fingerprint
+for all of our patterns. But if we combined `C0` and `C1` with an `AND`
+operation:
+
+```ignore
+     b         a        ... f         o        ... p
+C  = 00000110  0            00000001  0            0
+```
+
+Then we now have that `C[i]` contains a bitset corresponding to the matching
+fingerprints in a haystack's 16 byte block, where `i` is the `ith` byte in that
+block.
+
+Once we have that, we can look for the position of the least significant bit
+in `C`. (Least significant because we only target little endian here. Thus,
+the least significant bytes correspond to bytes in our haystack at a lower
+address.) That position, modulo `8`, gives us the pattern that the fingerprint
+matches. That position, integer divided by `8`, also gives us the byte offset
+that the fingerprint occurs in inside the 16 byte haystack block. Using those
+two pieces of information, we can run a verification procedure that tries
+to match all substrings containing that fingerprint at that position in the
+haystack.
+
+
+# Implementation notes
+
+The problem with the algorithm as described above is that it uses a single byte
+for a fingerprint. This will work well if the fingerprints are rare in the
+haystack (e.g., capital letters or special characters in normal English text),
+but if the fingerprints are common, you'll wind up spending too much time in
+the verification step, which effectively negates the performance benefits of
+scanning 16 bytes at a time. Remember, the key to the performance of this
+algorithm is to do as little work as possible per 16 (or 32) bytes.
+
+This algorithm can be extrapolated in a relatively straight-forward way to use
+larger fingerprints. That is, instead of a single byte prefix, we might use a
+two or three byte prefix. The implementation here implements N = {1, 2, 3}
+and always picks the largest N possible. The rationale is that the bigger the
+fingerprint, the fewer verification steps we'll do. Of course, if N is too
+large, then we'll end up doing too much on each step.
+
+The way to extend it is:
+
+1. Add a mask for each byte in the fingerprint. (Remember that each mask is
+   composed of two SIMD vectors.) This results in a value of `C` for each byte
+   in the fingerprint while searching.
+2. When testing each 16 (or 32) byte block, each value of `C` must be shifted
+   so that they are aligned. Once aligned, they should all be `AND`'d together.
+   This will give you only the bitsets corresponding to the full match of the
+   fingerprint. To do this, one needs to save the last byte (for N=2) or last
+   two bytes (for N=3) from the previous iteration, and then line them up with
+   the first one or two bytes of the next iteration.
+
+## Verification
+
+Verification generally follows the procedure outlined above. The tricky parts
+are in the right formulation of operations to get our bits out of our vectors.
+We have a limited set of operations available to us on SIMD vectors as 128-bit
+or 256-bit numbers, so we wind up needing to rip out 2 (or 4) 64-bit integers
+from our vectors, and then run our verification step on each of those. The
+verification step looks at the least significant bit set, and from its
+position, we can derive the byte offset and bucket. (Again, as described
+above.) Once we know the bucket, we do a fairly naive exhaustive search for
+every literal in that bucket. (Hyperscan is a bit smarter here and uses a hash
+table, but I haven't had time to thoroughly explore that. A few initial
+half-hearted attempts resulted in worse performance.)
+
+## AVX
+
+The AVX version of Teddy extrapolates almost perfectly from the SSE version.
+The only hickup is that PALIGNR is used to align chunks in the 16-bit version,
+and there is no equivalent instruction in AVX. AVX does have VPALIGNR, but it
+only works within 128-bit lanes. So there's a bit of tomfoolery to get around
+this by shuffling the vectors before calling VPALIGNR.
+
+The only other aspect to AVX is that since our masks are still fundamentally
+16-bytes (0x0-0xF), they are duplicated to 32-bytes, so that they can apply to
+32-byte chunks.
+
+## Fat Teddy
+
+In the version of Teddy described above, 8 buckets are used to group patterns
+that we want to search for. However, when AVX is available, we can extend the
+number of buckets to 16 by permitting each byte in our masks to use 16-bits
+instead of 8-bits to represent the buckets it belongs to. (This variant is also
+in Hyperscan.) However, what we give up is the ability to scan 32 bytes at a
+time, even though we're using AVX. Instead, we have to scan 16 bytes at a time.
+What we gain, though, is (hopefully) less work in our verification routine.
+It patterns are more spread out across more buckets, then there should overall
+be fewer false positives. In general, Fat Teddy permits us to grow our capacity
+a bit and search for more literals before Teddy gets overwhelmed.
+
+The tricky part of Fat Teddy is in how we adjust our masks and our verification
+procedure. For the masks, we simply represent the first 8 buckets in each of
+the low 16 bytes, and then the second 8 buckets in each of the high 16 bytes.
+Then, in the search loop, instead of loading 32 bytes from the haystack, we
+load the same 16 bytes from the haystack into both the low and high 16 byte
+portions of our 256-bit vector. So for example, a mask might look like this:
+
+    bits:   00100001 00000000 ... 11000000 00000000 00000001 ... 00000000
+    byte:      31       30           16       15       14            0
+    offset:    15       14           0        15       14            0
+    buckets:  8-15     8-15         8-15      0-7      0-7           0-7
+
+Where `byte` is the position in the vector (higher numbers corresponding to
+more significant bits), `offset` is the corresponding position in the haystack
+chunk, and `buckets` corresponds to the bucket assignments for that particular
+byte.
+
+In particular, notice that the bucket assignments for offset `0` are spread
+out between bytes `0` and `16`. This works well for the chunk-by-chunk search
+procedure, but verification really wants to process all bucket assignments for
+each offset at once. Otherwise, we might wind up finding a match at offset
+`1` in one the first 8 buckets, when we really should have reported a match
+at offset `0` in one of the second 8 buckets. (Because we want the leftmost
+match.)
+
+Thus, for verification, we rearrange the above vector such that it is a
+sequence of 16-bit integers, where the least significant 16-bit integer
+corresponds to all of the bucket assignments for offset `0`. So with the
+above vector, the least significant 16-bit integer would be
+
+    11000000 000000
+
+which was taken from bytes `16` and `0`. Then the verification step pretty much
+runs as described, except with 16 buckets instead of 8.
+
+
+# References
+
+- **[1]** [Hyperscan on GitHub](https://github.com/intel/hyperscan),
+    [webpage](https://www.hyperscan.io/)
+- **[2a]** Ben-Kiki, O., Bille, P., Breslauer, D., Gasieniec, L., Grossi, R.,
+    & Weimann, O. (2011).
+    _Optimal packed string matching_.
+    In LIPIcs-Leibniz International Proceedings in Informatics (Vol. 13).
+    Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik.
+    DOI: 10.4230/LIPIcs.FSTTCS.2011.423.
+    [PDF](https://drops.dagstuhl.de/opus/volltexte/2011/3355/pdf/37.pdf).
+- **[2b]** Ben-Kiki, O., Bille, P., Breslauer, D., Ga̧sieniec, L., Grossi, R.,
+    & Weimann, O. (2014).
+    _Towards optimal packed string matching_.
+    Theoretical Computer Science, 525, 111-129.
+    DOI: 10.1016/j.tcs.2013.06.013.
+    [PDF](https://www.cs.haifa.ac.il/~oren/Publications/bpsm.pdf).
+- **[3]** Bille, P. (2011).
+    _Fast searching in packed strings_.
+    Journal of Discrete Algorithms, 9(1), 49-56.
+    DOI: 10.1016/j.jda.2010.09.003.
+    [PDF](https://www.sciencedirect.com/science/article/pii/S1570866710000353).
+- **[4a]** Faro, S., & Külekci, M. O. (2012, October).
+    _Fast multiple string matching using streaming SIMD extensions technology_.
+    In String Processing and Information Retrieval (pp. 217-228).
+    Springer Berlin Heidelberg.
+    DOI: 10.1007/978-3-642-34109-0_23.
+    [PDF](https://www.dmi.unict.it/faro/papers/conference/faro32.pdf).
+- **[4b]** Faro, S., & Külekci, M. O. (2013, September).
+    _Towards a Very Fast Multiple String Matching Algorithm for Short Patterns_.
+    In Stringology (pp. 78-91).
+    [PDF](https://www.dmi.unict.it/faro/papers/conference/faro36.pdf).
+- **[4c]** Faro, S., & Külekci, M. O. (2013, January).
+    _Fast packed string matching for short patterns_.
+    In Proceedings of the Meeting on Algorithm Engineering & Expermiments
+    (pp. 113-121).
+    Society for Industrial and Applied Mathematics.
+    [PDF](https://arxiv.org/pdf/1209.6449.pdf).
+- **[4d]** Faro, S., & Külekci, M. O. (2014).
+    _Fast and flexible packed string matching_.
+    Journal of Discrete Algorithms, 28, 61-72.
+    DOI: 10.1016/j.jda.2014.07.003.
+
+[1_u]: https://github.com/intel/hyperscan
+[5_u]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
diff --git a/third_party/rust/aho-corasick/src/packed/teddy/builder.rs b/third_party/rust/aho-corasick/src/packed/teddy/builder.rs
new file mode 100644
index 0000000000..be91777beb
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/teddy/builder.rs
@@ -0,0 +1,780 @@
+use core::{
+    fmt::Debug,
+    panic::{RefUnwindSafe, UnwindSafe},
+};
+
+use alloc::sync::Arc;
+
+use crate::packed::{ext::Pointer, pattern::Patterns, teddy::generic::Match};
+
+/// A builder for constructing a Teddy matcher.
+///
+/// The builder primarily permits fine grained configuration of the Teddy
+/// matcher. Most options are made only available for testing/benchmarking
+/// purposes. In reality, options are automatically determined by the nature
+/// and number of patterns given to the builder.
+#[derive(Clone, Debug)]
+pub(crate) struct Builder {
+    /// When none, this is automatically determined. Otherwise, `false` means
+    /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used
+    /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't
+    /// available and Fat Teddy was requested, no matcher will be built.
+    only_fat: Option<bool>,
+    /// When none, this is automatically determined. Otherwise, `false` means
+    /// that 128-bit vectors will be used (up to SSSE3 instructions) where as
+    /// `true` means that 256-bit vectors will be used. As with `fat`, if
+    /// 256-bit vectors are requested and they aren't available, then a
+    /// searcher will not be built.
+    only_256bit: Option<bool>,
+    /// When true (the default), the number of patterns will be used as a
+    /// heuristic for refusing construction of a Teddy searcher. The point here
+    /// is that too many patterns can overwhelm Teddy. But this can be disabled
+    /// in cases where the caller knows better.
+    heuristic_pattern_limits: bool,
+}
+
+impl Default for Builder {
+    fn default() -> Builder {
+        Builder::new()
+    }
+}
+
+impl Builder {
+    /// Create a new builder for configuring a Teddy matcher.
+    pub(crate) fn new() -> Builder {
+        Builder {
+            only_fat: None,
+            only_256bit: None,
+            heuristic_pattern_limits: true,
+        }
+    }
+
+    /// Build a matcher for the set of patterns given. If a matcher could not
+    /// be built, then `None` is returned.
+    ///
+    /// Generally, a matcher isn't built if the necessary CPU features aren't
+    /// available, an unsupported target or if the searcher is believed to be
+    /// slower than standard techniques (i.e., if there are too many literals).
+    pub(crate) fn build(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
+        self.build_imp(patterns)
+    }
+
+    /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses
+    /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful
+    /// for a larger set of literals.
+    ///
+    /// `None` is the default, which results in an automatic selection based
+    /// on the number of literals and available CPU features.
+    pub(crate) fn only_fat(&mut self, yes: Option<bool>) -> &mut Builder {
+        self.only_fat = yes;
+        self
+    }
+
+    /// Request the use of 256-bit vectors (true) or 128-bit vectors (false).
+    /// Generally, a larger vector size is better since it either permits
+    /// matching more patterns or matching more bytes in the haystack at once.
+    ///
+    /// `None` is the default, which results in an automatic selection based on
+    /// the number of literals and available CPU features.
+    pub(crate) fn only_256bit(&mut self, yes: Option<bool>) -> &mut Builder {
+        self.only_256bit = yes;
+        self
+    }
+
+    /// Request that heuristic limitations on the number of patterns be
+    /// employed. This useful to disable for benchmarking where one wants to
+    /// explore how Teddy performs on large number of patterns even if the
+    /// heuristics would otherwise refuse construction.
+    ///
+    /// This is enabled by default.
+    pub(crate) fn heuristic_pattern_limits(
+        &mut self,
+        yes: bool,
+    ) -> &mut Builder {
+        self.heuristic_pattern_limits = yes;
+        self
+    }
+
+    fn build_imp(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
+        let patlimit = self.heuristic_pattern_limits;
+        // There's no particular reason why we limit ourselves to little endian
+        // here, but it seems likely that some parts of Teddy as they are
+        // currently written (e.g., the uses of `trailing_zeros`) are likely
+        // wrong on non-little-endian targets. Such things are likely easy to
+        // fix, but at the time of writing (2023/09/18), I actually do not know
+        // how to test this code on a big-endian target. So for now, we're
+        // conservative and just bail out.
+        if !cfg!(target_endian = "little") {
+            debug!("skipping Teddy because target isn't little endian");
+            return None;
+        }
+        // Too many patterns will overwhelm Teddy and likely lead to slow
+        // downs, typically in the verification step.
+        if patlimit && patterns.len() > 64 {
+            debug!("skipping Teddy because of too many patterns");
+            return None;
+        }
+
+        #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
+        {
+            use self::x86_64::{FatAVX2, SlimAVX2, SlimSSSE3};
+
+            let mask_len = core::cmp::min(4, patterns.minimum_len());
+            let beefy = patterns.len() > 32;
+            let has_avx2 = self::x86_64::is_available_avx2();
+            let has_ssse3 = has_avx2 || self::x86_64::is_available_ssse3();
+            let use_avx2 = if self.only_256bit == Some(true) {
+                if !has_avx2 {
+                    debug!(
+                    "skipping Teddy because avx2 was demanded but unavailable"
+                );
+                    return None;
+                }
+                true
+            } else if self.only_256bit == Some(false) {
+                if !has_ssse3 {
+                    debug!(
+                    "skipping Teddy because ssse3 was demanded but unavailable"
+                );
+                    return None;
+                }
+                false
+            } else if !has_ssse3 && !has_avx2 {
+                debug!(
+                    "skipping Teddy because ssse3 and avx2 are unavailable"
+                );
+                return None;
+            } else {
+                has_avx2
+            };
+            let fat = match self.only_fat {
+                None => use_avx2 && beefy,
+                Some(false) => false,
+                Some(true) if !use_avx2 => {
+                    debug!(
+                        "skipping Teddy because fat was demanded, but fat \
+                         Teddy requires avx2 which is unavailable"
+                    );
+                    return None;
+                }
+                Some(true) => true,
+            };
+            // Just like for aarch64, it's possible that too many patterns will
+            // overhwelm Teddy. Unlike aarch64 though, we have Fat teddy which
+            // helps things scale a bit more by spreading patterns over more
+            // buckets.
+            //
+            // These thresholds were determined by looking at the measurements
+            // for the rust/aho-corasick/packed/leftmost-first and
+            // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
+            // benchmarks.
+            if patlimit && mask_len == 1 && patterns.len() > 16 {
+                debug!(
+                    "skipping Teddy (mask len: 1) because there are \
+                             too many patterns",
+                );
+                return None;
+            }
+            match (mask_len, use_avx2, fat) {
+                (1, false, _) => {
+                    debug!("Teddy choice: 128-bit slim, 1 byte");
+                    SlimSSSE3::<1>::new(&patterns)
+                }
+                (1, true, false) => {
+                    debug!("Teddy choice: 256-bit slim, 1 byte");
+                    SlimAVX2::<1>::new(&patterns)
+                }
+                (1, true, true) => {
+                    debug!("Teddy choice: 256-bit fat, 1 byte");
+                    FatAVX2::<1>::new(&patterns)
+                }
+                (2, false, _) => {
+                    debug!("Teddy choice: 128-bit slim, 2 bytes");
+                    SlimSSSE3::<2>::new(&patterns)
+                }
+                (2, true, false) => {
+                    debug!("Teddy choice: 256-bit slim, 2 bytes");
+                    SlimAVX2::<2>::new(&patterns)
+                }
+                (2, true, true) => {
+                    debug!("Teddy choice: 256-bit fat, 2 bytes");
+                    FatAVX2::<2>::new(&patterns)
+                }
+                (3, false, _) => {
+                    debug!("Teddy choice: 128-bit slim, 3 bytes");
+                    SlimSSSE3::<3>::new(&patterns)
+                }
+                (3, true, false) => {
+                    debug!("Teddy choice: 256-bit slim, 3 bytes");
+                    SlimAVX2::<3>::new(&patterns)
+                }
+                (3, true, true) => {
+                    debug!("Teddy choice: 256-bit fat, 3 bytes");
+                    FatAVX2::<3>::new(&patterns)
+                }
+                (4, false, _) => {
+                    debug!("Teddy choice: 128-bit slim, 4 bytes");
+                    SlimSSSE3::<4>::new(&patterns)
+                }
+                (4, true, false) => {
+                    debug!("Teddy choice: 256-bit slim, 4 bytes");
+                    SlimAVX2::<4>::new(&patterns)
+                }
+                (4, true, true) => {
+                    debug!("Teddy choice: 256-bit fat, 4 bytes");
+                    FatAVX2::<4>::new(&patterns)
+                }
+                _ => {
+                    debug!("no supported Teddy configuration found");
+                    None
+                }
+            }
+        }
+        #[cfg(target_arch = "aarch64")]
+        {
+            use self::aarch64::SlimNeon;
+
+            let mask_len = core::cmp::min(4, patterns.minimum_len());
+            if self.only_256bit == Some(true) {
+                debug!(
+                    "skipping Teddy because 256-bits were demanded \
+                     but unavailable"
+                );
+                return None;
+            }
+            if self.only_fat == Some(true) {
+                debug!(
+                    "skipping Teddy because fat was demanded but unavailable"
+                );
+            }
+            // Since we don't have Fat teddy in aarch64 (I think we'd want at
+            // least 256-bit vectors for that), we need to be careful not to
+            // allow too many patterns as it might overwhelm Teddy. Generally
+            // speaking, as the mask length goes up, the more patterns we can
+            // handle because the mask length results in fewer candidates
+            // generated.
+            //
+            // These thresholds were determined by looking at the measurements
+            // for the rust/aho-corasick/packed/leftmost-first and
+            // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
+            // benchmarks.
+            match mask_len {
+                1 => {
+                    if patlimit && patterns.len() > 16 {
+                        debug!(
+                            "skipping Teddy (mask len: 1) because there are \
+                             too many patterns",
+                        );
+                    }
+                    debug!("Teddy choice: 128-bit slim, 1 byte");
+                    SlimNeon::<1>::new(&patterns)
+                }
+                2 => {
+                    if patlimit && patterns.len() > 32 {
+                        debug!(
+                            "skipping Teddy (mask len: 2) because there are \
+                             too many patterns",
+                        );
+                    }
+                    debug!("Teddy choice: 128-bit slim, 2 bytes");
+                    SlimNeon::<2>::new(&patterns)
+                }
+                3 => {
+                    if patlimit && patterns.len() > 48 {
+                        debug!(
+                            "skipping Teddy (mask len: 3) because there are \
+                             too many patterns",
+                        );
+                    }
+                    debug!("Teddy choice: 128-bit slim, 3 bytes");
+                    SlimNeon::<3>::new(&patterns)
+                }
+                4 => {
+                    debug!("Teddy choice: 128-bit slim, 4 bytes");
+                    SlimNeon::<4>::new(&patterns)
+                }
+                _ => {
+                    debug!("no supported Teddy configuration found");
+                    None
+                }
+            }
+        }
+        #[cfg(not(any(
+            all(target_arch = "x86_64", target_feature = "sse2"),
+            target_arch = "aarch64"
+        )))]
+        {
+            None
+        }
+    }
+}
+
+/// A searcher that dispatches to one of several possible Teddy variants.
+#[derive(Clone, Debug)]
+pub(crate) struct Searcher {
+    /// The Teddy variant we use. We use dynamic dispatch under the theory that
+    /// it results in better codegen then a enum, although this is a specious
+    /// claim.
+    ///
+    /// This `Searcher` is essentially a wrapper for a `SearcherT` trait
+    /// object. We just make `memory_usage` and `minimum_len` available without
+    /// going through dynamic dispatch.
+    imp: Arc<dyn SearcherT>,
+    /// Total heap memory used by the Teddy variant.
+    memory_usage: usize,
+    /// The minimum haystack length this searcher can handle. It is intended
+    /// for callers to use some other search routine (such as Rabin-Karp) in
+    /// cases where the haystack (or remainer of the haystack) is too short.
+    minimum_len: usize,
+}
+
+impl Searcher {
+    /// Look for the leftmost occurrence of any pattern in this search in the
+    /// given haystack starting at the given position.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `haystack[at..].len()` is less than the minimum length
+    /// for this haystack.
+    #[inline(always)]
+    pub(crate) fn find(
+        &self,
+        haystack: &[u8],
+        at: usize,
+    ) -> Option<crate::Match> {
+        // SAFETY: The Teddy implementations all require a minimum haystack
+        // length, and this is required for safety. Therefore, we assert it
+        // here in order to make this method sound.
+        assert!(haystack[at..].len() >= self.minimum_len);
+        let hayptr = haystack.as_ptr();
+        // SAFETY: Construction of the searcher guarantees that we are able
+        // to run it in the current environment (i.e., we won't get an AVX2
+        // searcher on a x86-64 CPU without AVX2 support). Also, the pointers
+        // are valid as they are derived directly from a borrowed slice.
+        let teddym = unsafe {
+            self.imp.find(hayptr.add(at), hayptr.add(haystack.len()))?
+        };
+        let start = teddym.start().as_usize().wrapping_sub(hayptr.as_usize());
+        let end = teddym.end().as_usize().wrapping_sub(hayptr.as_usize());
+        let span = crate::Span { start, end };
+        // OK because we won't permit the construction of a searcher that
+        // could report a pattern ID bigger than what can fit in the crate-wide
+        // PatternID type.
+        let pid = crate::PatternID::new_unchecked(teddym.pattern().as_usize());
+        let m = crate::Match::new(pid, span);
+        Some(m)
+    }
+
+    /// Returns the approximate total amount of heap used by this type, in
+    /// units of bytes.
+    #[inline(always)]
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.memory_usage
+    }
+
+    /// Returns the minimum length, in bytes, that a haystack must be in order
+    /// to use it with this searcher.
+    #[inline(always)]
+    pub(crate) fn minimum_len(&self) -> usize {
+        self.minimum_len
+    }
+}
+
+/// A trait that provides dynamic dispatch over the different possible Teddy
+/// variants on the same algorithm.
+///
+/// On `x86_64` for example, it isn't known until runtime which of 12 possible
+/// variants will be used. One might use one of the four slim 128-bit vector
+/// variants, or one of the four 256-bit vector variants or even one of the
+/// four fat 256-bit vector variants.
+///
+/// Since this choice is generally made when the Teddy searcher is constructed
+/// and this choice is based on the patterns given and what the current CPU
+/// supports, it follows that there must be some kind of indirection at search
+/// time that "selects" the variant chosen at build time.
+///
+/// There are a few different ways to go about this. One approach is to use an
+/// enum. It works fine, but in my experiments, this generally results in worse
+/// codegen. Another approach, which is what we use here, is dynamic dispatch
+/// via a trait object. We basically implement this trait for each possible
+/// variant, select the variant we want at build time and convert it to a
+/// trait object for use at search time.
+///
+/// Another approach is to use function pointers and stick each of the possible
+/// variants into a union. This is essentially isomorphic to the dynamic
+/// dispatch approach, but doesn't require any allocations. Since this crate
+/// requires `alloc`, there's no real reason (AFAIK) to go down this path. (The
+/// `memchr` crate does this.)
+trait SearcherT:
+    Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static
+{
+    /// Execute a search on the given haystack (identified by `start` and `end`
+    /// raw pointers).
+    ///
+    /// # Safety
+    ///
+    /// Essentially, the `start` and `end` pointers must be valid and point
+    /// to a haystack one can read. As long as you derive them from, for
+    /// example, a `&[u8]`, they should automatically satisfy all of the safety
+    /// obligations:
+    ///
+    /// * Both `start` and `end` must be valid for reads.
+    /// * Both `start` and `end` must point to an initialized value.
+    /// * Both `start` and `end` must point to the same allocated object and
+    /// must either be in bounds or at most one byte past the end of the
+    /// allocated object.
+    /// * Both `start` and `end` must be _derived from_ a pointer to the same
+    /// object.
+    /// * The distance between `start` and `end` must not overflow `isize`.
+    /// * The distance being in bounds must not rely on "wrapping around" the
+    /// address space.
+    /// * It must be the case that `start <= end`.
+    /// * `end - start` must be greater than the minimum length for this
+    /// searcher.
+    ///
+    /// Also, it is expected that implementations of this trait will tag this
+    /// method with a `target_feature` attribute. Callers must ensure that
+    /// they are executing this method in an environment where that attribute
+    /// is valid.
+    unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match>;
+}
+
+#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
+mod x86_64 {
+    use core::arch::x86_64::{__m128i, __m256i};
+
+    use alloc::sync::Arc;
+
+    use crate::packed::{
+        ext::Pointer,
+        pattern::Patterns,
+        teddy::generic::{self, Match},
+    };
+
+    use super::{Searcher, SearcherT};
+
+    #[derive(Clone, Debug)]
+    pub(super) struct SlimSSSE3<const BYTES: usize> {
+        slim128: generic::Slim<__m128i, BYTES>,
+    }
+
+    // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
+    macro_rules! slim_ssse3 {
+        ($len:expr) => {
+            impl SlimSSSE3<$len> {
+                /// Creates a new searcher using "slim" Teddy with 128-bit
+                /// vectors. If SSSE3 is not available in the current
+                /// environment, then this returns `None`.
+                pub(super) fn new(
+                    patterns: &Arc<Patterns>,
+                ) -> Option<Searcher> {
+                    if !is_available_ssse3() {
+                        return None;
+                    }
+                    Some(unsafe { SlimSSSE3::<$len>::new_unchecked(patterns) })
+                }
+
+                /// Creates a new searcher using "slim" Teddy with 256-bit
+                /// vectors without checking whether SSSE3 is available or not.
+                ///
+                /// # Safety
+                ///
+                /// Callers must ensure that SSSE3 is available in the current
+                /// environment.
+                #[target_feature(enable = "ssse3")]
+                unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
+                    let slim128 = generic::Slim::<__m128i, $len>::new(
+                        Arc::clone(patterns),
+                    );
+                    let memory_usage = slim128.memory_usage();
+                    let minimum_len = slim128.minimum_len();
+                    let imp = Arc::new(SlimSSSE3 { slim128 });
+                    Searcher { imp, memory_usage, minimum_len }
+                }
+            }
+
+            impl SearcherT for SlimSSSE3<$len> {
+                #[target_feature(enable = "ssse3")]
+                #[inline]
+                unsafe fn find(
+                    &self,
+                    start: *const u8,
+                    end: *const u8,
+                ) -> Option<Match> {
+                    // SAFETY: All obligations except for `target_feature` are
+                    // passed to the caller. Our use of `target_feature` is
+                    // safe because construction of this type requires that the
+                    // requisite target features are available.
+                    self.slim128.find(start, end)
+                }
+            }
+        };
+    }
+
+    slim_ssse3!(1);
+    slim_ssse3!(2);
+    slim_ssse3!(3);
+    slim_ssse3!(4);
+
+    #[derive(Clone, Debug)]
+    pub(super) struct SlimAVX2<const BYTES: usize> {
+        slim128: generic::Slim<__m128i, BYTES>,
+        slim256: generic::Slim<__m256i, BYTES>,
+    }
+
+    // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
+    macro_rules! slim_avx2 {
+        ($len:expr) => {
+            impl SlimAVX2<$len> {
+                /// Creates a new searcher using "slim" Teddy with 256-bit
+                /// vectors. If AVX2 is not available in the current
+                /// environment, then this returns `None`.
+                pub(super) fn new(
+                    patterns: &Arc<Patterns>,
+                ) -> Option<Searcher> {
+                    if !is_available_avx2() {
+                        return None;
+                    }
+                    Some(unsafe { SlimAVX2::<$len>::new_unchecked(patterns) })
+                }
+
+                /// Creates a new searcher using "slim" Teddy with 256-bit
+                /// vectors without checking whether AVX2 is available or not.
+                ///
+                /// # Safety
+                ///
+                /// Callers must ensure that AVX2 is available in the current
+                /// environment.
+                #[target_feature(enable = "avx2")]
+                unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
+                    let slim128 = generic::Slim::<__m128i, $len>::new(
+                        Arc::clone(&patterns),
+                    );
+                    let slim256 = generic::Slim::<__m256i, $len>::new(
+                        Arc::clone(&patterns),
+                    );
+                    let memory_usage =
+                        slim128.memory_usage() + slim256.memory_usage();
+                    let minimum_len = slim128.minimum_len();
+                    let imp = Arc::new(SlimAVX2 { slim128, slim256 });
+                    Searcher { imp, memory_usage, minimum_len }
+                }
+            }
+
+            impl SearcherT for SlimAVX2<$len> {
+                #[target_feature(enable = "avx2")]
+                #[inline]
+                unsafe fn find(
+                    &self,
+                    start: *const u8,
+                    end: *const u8,
+                ) -> Option<Match> {
+                    // SAFETY: All obligations except for `target_feature` are
+                    // passed to the caller. Our use of `target_feature` is
+                    // safe because construction of this type requires that the
+                    // requisite target features are available.
+                    let len = end.distance(start);
+                    if len < self.slim256.minimum_len() {
+                        self.slim128.find(start, end)
+                    } else {
+                        self.slim256.find(start, end)
+                    }
+                }
+            }
+        };
+    }
+
+    slim_avx2!(1);
+    slim_avx2!(2);
+    slim_avx2!(3);
+    slim_avx2!(4);
+
+    #[derive(Clone, Debug)]
+    pub(super) struct FatAVX2<const BYTES: usize> {
+        fat256: generic::Fat<__m256i, BYTES>,
+    }
+
+    // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
+    macro_rules! fat_avx2 {
+        ($len:expr) => {
+            impl FatAVX2<$len> {
+                /// Creates a new searcher using "slim" Teddy with 256-bit
+                /// vectors. If AVX2 is not available in the current
+                /// environment, then this returns `None`.
+                pub(super) fn new(
+                    patterns: &Arc<Patterns>,
+                ) -> Option<Searcher> {
+                    if !is_available_avx2() {
+                        return None;
+                    }
+                    Some(unsafe { FatAVX2::<$len>::new_unchecked(patterns) })
+                }
+
+                /// Creates a new searcher using "slim" Teddy with 256-bit
+                /// vectors without checking whether AVX2 is available or not.
+                ///
+                /// # Safety
+                ///
+                /// Callers must ensure that AVX2 is available in the current
+                /// environment.
+                #[target_feature(enable = "avx2")]
+                unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
+                    let fat256 = generic::Fat::<__m256i, $len>::new(
+                        Arc::clone(&patterns),
+                    );
+                    let memory_usage = fat256.memory_usage();
+                    let minimum_len = fat256.minimum_len();
+                    let imp = Arc::new(FatAVX2 { fat256 });
+                    Searcher { imp, memory_usage, minimum_len }
+                }
+            }
+
+            impl SearcherT for FatAVX2<$len> {
+                #[target_feature(enable = "avx2")]
+                #[inline]
+                unsafe fn find(
+                    &self,
+                    start: *const u8,
+                    end: *const u8,
+                ) -> Option<Match> {
+                    // SAFETY: All obligations except for `target_feature` are
+                    // passed to the caller. Our use of `target_feature` is
+                    // safe because construction of this type requires that the
+                    // requisite target features are available.
+                    self.fat256.find(start, end)
+                }
+            }
+        };
+    }
+
+    fat_avx2!(1);
+    fat_avx2!(2);
+    fat_avx2!(3);
+    fat_avx2!(4);
+
+    #[inline]
+    pub(super) fn is_available_ssse3() -> bool {
+        #[cfg(not(target_feature = "sse2"))]
+        {
+            false
+        }
+        #[cfg(target_feature = "sse2")]
+        {
+            #[cfg(target_feature = "ssse3")]
+            {
+                true
+            }
+            #[cfg(not(target_feature = "ssse3"))]
+            {
+                #[cfg(feature = "std")]
+                {
+                    std::is_x86_feature_detected!("ssse3")
+                }
+                #[cfg(not(feature = "std"))]
+                {
+                    false
+                }
+            }
+        }
+    }
+
+    #[inline]
+    pub(super) fn is_available_avx2() -> bool {
+        #[cfg(not(target_feature = "sse2"))]
+        {
+            false
+        }
+        #[cfg(target_feature = "sse2")]
+        {
+            #[cfg(target_feature = "avx2")]
+            {
+                true
+            }
+            #[cfg(not(target_feature = "avx2"))]
+            {
+                #[cfg(feature = "std")]
+                {
+                    std::is_x86_feature_detected!("avx2")
+                }
+                #[cfg(not(feature = "std"))]
+                {
+                    false
+                }
+            }
+        }
+    }
+}
+
+#[cfg(target_arch = "aarch64")]
+mod aarch64 {
+    use core::arch::aarch64::uint8x16_t;
+
+    use alloc::sync::Arc;
+
+    use crate::packed::{
+        pattern::Patterns,
+        teddy::generic::{self, Match},
+    };
+
+    use super::{Searcher, SearcherT};
+
+    #[derive(Clone, Debug)]
+    pub(super) struct SlimNeon<const BYTES: usize> {
+        slim128: generic::Slim<uint8x16_t, BYTES>,
+    }
+
+    // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
+    macro_rules! slim_neon {
+        ($len:expr) => {
+            impl SlimNeon<$len> {
+                /// Creates a new searcher using "slim" Teddy with 128-bit
+                /// vectors. If SSSE3 is not available in the current
+                /// environment, then this returns `None`.
+                pub(super) fn new(
+                    patterns: &Arc<Patterns>,
+                ) -> Option<Searcher> {
+                    Some(unsafe { SlimNeon::<$len>::new_unchecked(patterns) })
+                }
+
+                /// Creates a new searcher using "slim" Teddy with 256-bit
+                /// vectors without checking whether SSSE3 is available or not.
+                ///
+                /// # Safety
+                ///
+                /// Callers must ensure that SSSE3 is available in the current
+                /// environment.
+                #[target_feature(enable = "neon")]
+                unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
+                    let slim128 = generic::Slim::<uint8x16_t, $len>::new(
+                        Arc::clone(patterns),
+                    );
+                    let memory_usage = slim128.memory_usage();
+                    let minimum_len = slim128.minimum_len();
+                    let imp = Arc::new(SlimNeon { slim128 });
+                    Searcher { imp, memory_usage, minimum_len }
+                }
+            }
+
+            impl SearcherT for SlimNeon<$len> {
+                #[target_feature(enable = "neon")]
+                #[inline]
+                unsafe fn find(
+                    &self,
+                    start: *const u8,
+                    end: *const u8,
+                ) -> Option<Match> {
+                    // SAFETY: All obligations except for `target_feature` are
+                    // passed to the caller. Our use of `target_feature` is
+                    // safe because construction of this type requires that the
+                    // requisite target features are available.
+                    self.slim128.find(start, end)
+                }
+            }
+        };
+    }
+
+    slim_neon!(1);
+    slim_neon!(2);
+    slim_neon!(3);
+    slim_neon!(4);
+}
diff --git a/third_party/rust/aho-corasick/src/packed/teddy/generic.rs b/third_party/rust/aho-corasick/src/packed/teddy/generic.rs
new file mode 100644
index 0000000000..2aacd00357
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/teddy/generic.rs
@@ -0,0 +1,1382 @@
+use core::fmt::Debug;
+
+use alloc::{
+    boxed::Box, collections::BTreeMap, format, sync::Arc, vec, vec::Vec,
+};
+
+use crate::{
+    packed::{
+        ext::Pointer,
+        pattern::Patterns,
+        vector::{FatVector, Vector},
+    },
+    util::int::U32,
+    PatternID,
+};
+
+/// A match type specialized to the Teddy implementations below.
+///
+/// Essentially, instead of representing a match at byte offsets, we use
+/// raw pointers. This is because the implementations below operate on raw
+/// pointers, and so this is a more natural return type based on how the
+/// implementation works.
+///
+/// Also, the `PatternID` used here is a `u16`.
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct Match {
+    pid: PatternID,
+    start: *const u8,
+    end: *const u8,
+}
+
+impl Match {
+    /// Returns the ID of the pattern that matched.
+    pub(crate) fn pattern(&self) -> PatternID {
+        self.pid
+    }
+
+    /// Returns a pointer into the haystack at which the match starts.
+    pub(crate) fn start(&self) -> *const u8 {
+        self.start
+    }
+
+    /// Returns a pointer into the haystack at which the match ends.
+    pub(crate) fn end(&self) -> *const u8 {
+        self.end
+    }
+}
+
+/// A "slim" Teddy implementation that is generic over both the vector type
+/// and the minimum length of the patterns being searched for.
+///
+/// Only 1, 2, 3 and 4 bytes are supported as minimum lengths.
+#[derive(Clone, Debug)]
+pub(crate) struct Slim<V, const BYTES: usize> {
+    /// A generic data structure for doing "slim" Teddy verification.
+    teddy: Teddy<8>,
+    /// The masks used as inputs to the shuffle operation to generate
+    /// candidates (which are fed into the verification routines).
+    masks: [Mask<V>; BYTES],
+}
+
+impl<V: Vector, const BYTES: usize> Slim<V, BYTES> {
+    /// Create a new "slim" Teddy searcher for the given patterns.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `BYTES` is any value other than 1, 2, 3 or 4.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    pub(crate) unsafe fn new(patterns: Arc<Patterns>) -> Slim<V, BYTES> {
+        assert!(
+            1 <= BYTES && BYTES <= 4,
+            "only 1, 2, 3 or 4 bytes are supported"
+        );
+        let teddy = Teddy::new(patterns);
+        let masks = SlimMaskBuilder::from_teddy(&teddy);
+        Slim { teddy, masks }
+    }
+
+    /// Returns the approximate total amount of heap used by this type, in
+    /// units of bytes.
+    #[inline(always)]
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.teddy.memory_usage()
+    }
+
+    /// Returns the minimum length, in bytes, that a haystack must be in order
+    /// to use it with this searcher.
+    #[inline(always)]
+    pub(crate) fn minimum_len(&self) -> usize {
+        V::BYTES + (BYTES - 1)
+    }
+}
+
+impl<V: Vector> Slim<V, 1> {
+    /// Look for an occurrences of the patterns in this finder in the haystack
+    /// given by the `start` and `end` pointers.
+    ///
+    /// If no match could be found, then `None` is returned.
+    ///
+    /// # Safety
+    ///
+    /// The given pointers representing the haystack must be valid to read
+    /// from. They must also point to a region of memory that is at least the
+    /// minimum length required by this searcher.
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    pub(crate) unsafe fn find(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<Match> {
+        let len = end.distance(start);
+        debug_assert!(len >= self.minimum_len());
+        let mut cur = start;
+        while cur <= end.sub(V::BYTES) {
+            if let Some(m) = self.find_one(cur, end) {
+                return Some(m);
+            }
+            cur = cur.add(V::BYTES);
+        }
+        if cur < end {
+            cur = end.sub(V::BYTES);
+            if let Some(m) = self.find_one(cur, end) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// Look for a match starting at the `V::BYTES` at and after `cur`. If
+    /// there isn't one, then `None` is returned.
+    ///
+    /// # Safety
+    ///
+    /// The given pointers representing the haystack must be valid to read
+    /// from. They must also point to a region of memory that is at least the
+    /// minimum length required by this searcher.
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    unsafe fn find_one(
+        &self,
+        cur: *const u8,
+        end: *const u8,
+    ) -> Option<Match> {
+        let c = self.candidate(cur);
+        if !c.is_zero() {
+            if let Some(m) = self.teddy.verify(cur, end, c) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// Look for a candidate match (represented as a vector) starting at the
+    /// `V::BYTES` at and after `cur`. If there isn't one, then a vector with
+    /// all bits set to zero is returned.
+    ///
+    /// # Safety
+    ///
+    /// The given pointer representing the haystack must be valid to read
+    /// from.
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    unsafe fn candidate(&self, cur: *const u8) -> V {
+        let chunk = V::load_unaligned(cur);
+        Mask::members1(chunk, self.masks)
+    }
+}
+
+impl<V: Vector> Slim<V, 2> {
+    /// See Slim<V, 1>::find.
+    #[inline(always)]
+    pub(crate) unsafe fn find(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<Match> {
+        let len = end.distance(start);
+        debug_assert!(len >= self.minimum_len());
+        let mut cur = start.add(1);
+        let mut prev0 = V::splat(0xFF);
+        while cur <= end.sub(V::BYTES) {
+            if let Some(m) = self.find_one(cur, end, &mut prev0) {
+                return Some(m);
+            }
+            cur = cur.add(V::BYTES);
+        }
+        if cur < end {
+            cur = end.sub(V::BYTES);
+            prev0 = V::splat(0xFF);
+            if let Some(m) = self.find_one(cur, end, &mut prev0) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See Slim<V, 1>::find_one.
+    #[inline(always)]
+    unsafe fn find_one(
+        &self,
+        cur: *const u8,
+        end: *const u8,
+        prev0: &mut V,
+    ) -> Option<Match> {
+        let c = self.candidate(cur, prev0);
+        if !c.is_zero() {
+            if let Some(m) = self.teddy.verify(cur.sub(1), end, c) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See Slim<V, 1>::candidate.
+    #[inline(always)]
+    unsafe fn candidate(&self, cur: *const u8, prev0: &mut V) -> V {
+        let chunk = V::load_unaligned(cur);
+        let (res0, res1) = Mask::members2(chunk, self.masks);
+        let res0prev0 = res0.shift_in_one_byte(*prev0);
+        let res = res0prev0.and(res1);
+        *prev0 = res0;
+        res
+    }
+}
+
+impl<V: Vector> Slim<V, 3> {
+    /// See Slim<V, 1>::find.
+    #[inline(always)]
+    pub(crate) unsafe fn find(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<Match> {
+        let len = end.distance(start);
+        debug_assert!(len >= self.minimum_len());
+        let mut cur = start.add(2);
+        let mut prev0 = V::splat(0xFF);
+        let mut prev1 = V::splat(0xFF);
+        while cur <= end.sub(V::BYTES) {
+            if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) {
+                return Some(m);
+            }
+            cur = cur.add(V::BYTES);
+        }
+        if cur < end {
+            cur = end.sub(V::BYTES);
+            prev0 = V::splat(0xFF);
+            prev1 = V::splat(0xFF);
+            if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See Slim<V, 1>::find_one.
+    #[inline(always)]
+    unsafe fn find_one(
+        &self,
+        cur: *const u8,
+        end: *const u8,
+        prev0: &mut V,
+        prev1: &mut V,
+    ) -> Option<Match> {
+        let c = self.candidate(cur, prev0, prev1);
+        if !c.is_zero() {
+            if let Some(m) = self.teddy.verify(cur.sub(2), end, c) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See Slim<V, 1>::candidate.
+    #[inline(always)]
+    unsafe fn candidate(
+        &self,
+        cur: *const u8,
+        prev0: &mut V,
+        prev1: &mut V,
+    ) -> V {
+        let chunk = V::load_unaligned(cur);
+        let (res0, res1, res2) = Mask::members3(chunk, self.masks);
+        let res0prev0 = res0.shift_in_two_bytes(*prev0);
+        let res1prev1 = res1.shift_in_one_byte(*prev1);
+        let res = res0prev0.and(res1prev1).and(res2);
+        *prev0 = res0;
+        *prev1 = res1;
+        res
+    }
+}
+
+impl<V: Vector> Slim<V, 4> {
+    /// See Slim<V, 1>::find.
+    #[inline(always)]
+    pub(crate) unsafe fn find(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<Match> {
+        let len = end.distance(start);
+        debug_assert!(len >= self.minimum_len());
+        let mut cur = start.add(3);
+        let mut prev0 = V::splat(0xFF);
+        let mut prev1 = V::splat(0xFF);
+        let mut prev2 = V::splat(0xFF);
+        while cur <= end.sub(V::BYTES) {
+            if let Some(m) =
+                self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2)
+            {
+                return Some(m);
+            }
+            cur = cur.add(V::BYTES);
+        }
+        if cur < end {
+            cur = end.sub(V::BYTES);
+            prev0 = V::splat(0xFF);
+            prev1 = V::splat(0xFF);
+            prev2 = V::splat(0xFF);
+            if let Some(m) =
+                self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2)
+            {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See Slim<V, 1>::find_one.
+    #[inline(always)]
+    unsafe fn find_one(
+        &self,
+        cur: *const u8,
+        end: *const u8,
+        prev0: &mut V,
+        prev1: &mut V,
+        prev2: &mut V,
+    ) -> Option<Match> {
+        let c = self.candidate(cur, prev0, prev1, prev2);
+        if !c.is_zero() {
+            if let Some(m) = self.teddy.verify(cur.sub(3), end, c) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See Slim<V, 1>::candidate.
+    #[inline(always)]
+    unsafe fn candidate(
+        &self,
+        cur: *const u8,
+        prev0: &mut V,
+        prev1: &mut V,
+        prev2: &mut V,
+    ) -> V {
+        let chunk = V::load_unaligned(cur);
+        let (res0, res1, res2, res3) = Mask::members4(chunk, self.masks);
+        let res0prev0 = res0.shift_in_three_bytes(*prev0);
+        let res1prev1 = res1.shift_in_two_bytes(*prev1);
+        let res2prev2 = res2.shift_in_one_byte(*prev2);
+        let res = res0prev0.and(res1prev1).and(res2prev2).and(res3);
+        *prev0 = res0;
+        *prev1 = res1;
+        *prev2 = res2;
+        res
+    }
+}
+
+/// A "fat" Teddy implementation that is generic over both the vector type
+/// and the minimum length of the patterns being searched for.
+///
+/// Only 1, 2, 3 and 4 bytes are supported as minimum lengths.
+#[derive(Clone, Debug)]
+pub(crate) struct Fat<V, const BYTES: usize> {
+    /// A generic data structure for doing "fat" Teddy verification.
+    teddy: Teddy<16>,
+    /// The masks used as inputs to the shuffle operation to generate
+    /// candidates (which are fed into the verification routines).
+    masks: [Mask<V>; BYTES],
+}
+
+impl<V: FatVector, const BYTES: usize> Fat<V, BYTES> {
+    /// Create a new "fat" Teddy searcher for the given patterns.
+    ///
+    /// # Panics
+    ///
+    /// This panics when `BYTES` is any value other than 1, 2, 3 or 4.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    pub(crate) unsafe fn new(patterns: Arc<Patterns>) -> Fat<V, BYTES> {
+        assert!(
+            1 <= BYTES && BYTES <= 4,
+            "only 1, 2, 3 or 4 bytes are supported"
+        );
+        let teddy = Teddy::new(patterns);
+        let masks = FatMaskBuilder::from_teddy(&teddy);
+        Fat { teddy, masks }
+    }
+
+    /// Returns the approximate total amount of heap used by this type, in
+    /// units of bytes.
+    #[inline(always)]
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.teddy.memory_usage()
+    }
+
+    /// Returns the minimum length, in bytes, that a haystack must be in order
+    /// to use it with this searcher.
+    #[inline(always)]
+    pub(crate) fn minimum_len(&self) -> usize {
+        V::Half::BYTES + (BYTES - 1)
+    }
+}
+
+impl<V: FatVector> Fat<V, 1> {
+    /// Look for an occurrences of the patterns in this finder in the haystack
+    /// given by the `start` and `end` pointers.
+    ///
+    /// If no match could be found, then `None` is returned.
+    ///
+    /// # Safety
+    ///
+    /// The given pointers representing the haystack must be valid to read
+    /// from. They must also point to a region of memory that is at least the
+    /// minimum length required by this searcher.
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    pub(crate) unsafe fn find(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<Match> {
+        let len = end.distance(start);
+        debug_assert!(len >= self.minimum_len());
+        let mut cur = start;
+        while cur <= end.sub(V::Half::BYTES) {
+            if let Some(m) = self.find_one(cur, end) {
+                return Some(m);
+            }
+            cur = cur.add(V::Half::BYTES);
+        }
+        if cur < end {
+            cur = end.sub(V::Half::BYTES);
+            if let Some(m) = self.find_one(cur, end) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// Look for a match starting at the `V::BYTES` at and after `cur`. If
+    /// there isn't one, then `None` is returned.
+    ///
+    /// # Safety
+    ///
+    /// The given pointers representing the haystack must be valid to read
+    /// from. They must also point to a region of memory that is at least the
+    /// minimum length required by this searcher.
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    unsafe fn find_one(
+        &self,
+        cur: *const u8,
+        end: *const u8,
+    ) -> Option<Match> {
+        let c = self.candidate(cur);
+        if !c.is_zero() {
+            if let Some(m) = self.teddy.verify(cur, end, c) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// Look for a candidate match (represented as a vector) starting at the
+    /// `V::BYTES` at and after `cur`. If there isn't one, then a vector with
+    /// all bits set to zero is returned.
+    ///
+    /// # Safety
+    ///
+    /// The given pointer representing the haystack must be valid to read
+    /// from.
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    unsafe fn candidate(&self, cur: *const u8) -> V {
+        let chunk = V::load_half_unaligned(cur);
+        Mask::members1(chunk, self.masks)
+    }
+}
+
+impl<V: FatVector> Fat<V, 2> {
+    /// See `Fat<V, 1>::find`.
+    #[inline(always)]
+    pub(crate) unsafe fn find(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<Match> {
+        let len = end.distance(start);
+        debug_assert!(len >= self.minimum_len());
+        let mut cur = start.add(1);
+        let mut prev0 = V::splat(0xFF);
+        while cur <= end.sub(V::Half::BYTES) {
+            if let Some(m) = self.find_one(cur, end, &mut prev0) {
+                return Some(m);
+            }
+            cur = cur.add(V::Half::BYTES);
+        }
+        if cur < end {
+            cur = end.sub(V::Half::BYTES);
+            prev0 = V::splat(0xFF);
+            if let Some(m) = self.find_one(cur, end, &mut prev0) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See `Fat<V, 1>::find_one`.
+    #[inline(always)]
+    unsafe fn find_one(
+        &self,
+        cur: *const u8,
+        end: *const u8,
+        prev0: &mut V,
+    ) -> Option<Match> {
+        let c = self.candidate(cur, prev0);
+        if !c.is_zero() {
+            if let Some(m) = self.teddy.verify(cur.sub(1), end, c) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See `Fat<V, 1>::candidate`.
+    #[inline(always)]
+    unsafe fn candidate(&self, cur: *const u8, prev0: &mut V) -> V {
+        let chunk = V::load_half_unaligned(cur);
+        let (res0, res1) = Mask::members2(chunk, self.masks);
+        let res0prev0 = res0.half_shift_in_one_byte(*prev0);
+        let res = res0prev0.and(res1);
+        *prev0 = res0;
+        res
+    }
+}
+
+impl<V: FatVector> Fat<V, 3> {
+    /// See `Fat<V, 1>::find`.
+    #[inline(always)]
+    pub(crate) unsafe fn find(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<Match> {
+        let len = end.distance(start);
+        debug_assert!(len >= self.minimum_len());
+        let mut cur = start.add(2);
+        let mut prev0 = V::splat(0xFF);
+        let mut prev1 = V::splat(0xFF);
+        while cur <= end.sub(V::Half::BYTES) {
+            if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) {
+                return Some(m);
+            }
+            cur = cur.add(V::Half::BYTES);
+        }
+        if cur < end {
+            cur = end.sub(V::Half::BYTES);
+            prev0 = V::splat(0xFF);
+            prev1 = V::splat(0xFF);
+            if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See `Fat<V, 1>::find_one`.
+    #[inline(always)]
+    unsafe fn find_one(
+        &self,
+        cur: *const u8,
+        end: *const u8,
+        prev0: &mut V,
+        prev1: &mut V,
+    ) -> Option<Match> {
+        let c = self.candidate(cur, prev0, prev1);
+        if !c.is_zero() {
+            if let Some(m) = self.teddy.verify(cur.sub(2), end, c) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See `Fat<V, 1>::candidate`.
+    #[inline(always)]
+    unsafe fn candidate(
+        &self,
+        cur: *const u8,
+        prev0: &mut V,
+        prev1: &mut V,
+    ) -> V {
+        let chunk = V::load_half_unaligned(cur);
+        let (res0, res1, res2) = Mask::members3(chunk, self.masks);
+        let res0prev0 = res0.half_shift_in_two_bytes(*prev0);
+        let res1prev1 = res1.half_shift_in_one_byte(*prev1);
+        let res = res0prev0.and(res1prev1).and(res2);
+        *prev0 = res0;
+        *prev1 = res1;
+        res
+    }
+}
+
+impl<V: FatVector> Fat<V, 4> {
+    /// See `Fat<V, 1>::find`.
+    #[inline(always)]
+    pub(crate) unsafe fn find(
+        &self,
+        start: *const u8,
+        end: *const u8,
+    ) -> Option<Match> {
+        let len = end.distance(start);
+        debug_assert!(len >= self.minimum_len());
+        let mut cur = start.add(3);
+        let mut prev0 = V::splat(0xFF);
+        let mut prev1 = V::splat(0xFF);
+        let mut prev2 = V::splat(0xFF);
+        while cur <= end.sub(V::Half::BYTES) {
+            if let Some(m) =
+                self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2)
+            {
+                return Some(m);
+            }
+            cur = cur.add(V::Half::BYTES);
+        }
+        if cur < end {
+            cur = end.sub(V::Half::BYTES);
+            prev0 = V::splat(0xFF);
+            prev1 = V::splat(0xFF);
+            prev2 = V::splat(0xFF);
+            if let Some(m) =
+                self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2)
+            {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See `Fat<V, 1>::find_one`.
+    #[inline(always)]
+    unsafe fn find_one(
+        &self,
+        cur: *const u8,
+        end: *const u8,
+        prev0: &mut V,
+        prev1: &mut V,
+        prev2: &mut V,
+    ) -> Option<Match> {
+        let c = self.candidate(cur, prev0, prev1, prev2);
+        if !c.is_zero() {
+            if let Some(m) = self.teddy.verify(cur.sub(3), end, c) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// See `Fat<V, 1>::candidate`.
+    #[inline(always)]
+    unsafe fn candidate(
+        &self,
+        cur: *const u8,
+        prev0: &mut V,
+        prev1: &mut V,
+        prev2: &mut V,
+    ) -> V {
+        let chunk = V::load_half_unaligned(cur);
+        let (res0, res1, res2, res3) = Mask::members4(chunk, self.masks);
+        let res0prev0 = res0.half_shift_in_three_bytes(*prev0);
+        let res1prev1 = res1.half_shift_in_two_bytes(*prev1);
+        let res2prev2 = res2.half_shift_in_one_byte(*prev2);
+        let res = res0prev0.and(res1prev1).and(res2prev2).and(res3);
+        *prev0 = res0;
+        *prev1 = res1;
+        *prev2 = res2;
+        res
+    }
+}
+
+/// The common elements of all "slim" and "fat" Teddy search implementations.
+///
+/// Essentially, this contains the patterns and the buckets. Namely, it
+/// contains enough to implement the verification step after candidates are
+/// identified via the shuffle masks.
+///
+/// It is generic over the number of buckets used. In general, the number of
+/// buckets is either 8 (for "slim" Teddy) or 16 (for "fat" Teddy). The generic
+/// parameter isn't really meant to be instantiated for any value other than
+/// 8 or 16, although it is technically possible. The main hiccup is that there
+/// is some bit-shifting done in the critical part of verification that could
+/// be quite expensive if `N` is not a multiple of 2.
+#[derive(Clone, Debug)]
+struct Teddy<const BUCKETS: usize> {
+    /// The patterns we are searching for.
+    ///
+    /// A pattern string can be found by its `PatternID`.
+    patterns: Arc<Patterns>,
+    /// The allocation of patterns in buckets. This only contains the IDs of
+    /// patterns. In order to do full verification, callers must provide the
+    /// actual patterns when using Teddy.
+    buckets: [Vec<PatternID>; BUCKETS],
+    // N.B. The above representation is very simple, but it definitely results
+    // in ping-ponging between different allocations during verification. I've
+    // tried experimenting with other representations that flatten the pattern
+    // strings into a single allocation, but it doesn't seem to help much.
+    // Probably everything is small enough to fit into cache anyway, and so the
+    // pointer chasing isn't a big deal?
+    //
+    // One other avenue I haven't explored is some kind of hashing trick
+    // that let's us do another high-confidence check before launching into
+    // `memcmp`.
+}
+
+impl<const BUCKETS: usize> Teddy<BUCKETS> {
+    /// Create a new generic data structure for Teddy verification.
+    fn new(patterns: Arc<Patterns>) -> Teddy<BUCKETS> {
+        assert_ne!(0, patterns.len(), "Teddy requires at least one pattern");
+        assert_ne!(
+            0,
+            patterns.minimum_len(),
+            "Teddy does not support zero-length patterns"
+        );
+        assert!(
+            BUCKETS == 8 || BUCKETS == 16,
+            "Teddy only supports 8 or 16 buckets"
+        );
+        // MSRV(1.63): Use core::array::from_fn below instead of allocating a
+        // superfluous outer Vec. Not a big deal (especially given the BTreeMap
+        // allocation below), but nice to not do it.
+        let buckets =
+            <[Vec<PatternID>; BUCKETS]>::try_from(vec![vec![]; BUCKETS])
+                .unwrap();
+        let mut t = Teddy { patterns, buckets };
+
+        let mut map: BTreeMap<Box<[u8]>, usize> = BTreeMap::new();
+        for (id, pattern) in t.patterns.iter() {
+            // We try to be slightly clever in how we assign patterns into
+            // buckets. Generally speaking, we want patterns with the same
+            // prefix to be in the same bucket, since it minimizes the amount
+            // of time we spend churning through buckets in the verification
+            // step.
+            //
+            // So we could assign patterns with the same N-prefix (where N is
+            // the size of the mask, which is one of {1, 2, 3}) to the same
+            // bucket. However, case insensitive searches are fairly common, so
+            // we'd for example, ideally want to treat `abc` and `ABC` as if
+            // they shared the same prefix. ASCII has the nice property that
+            // the lower 4 bits of A and a are the same, so we therefore group
+            // patterns with the same low-nybble-N-prefix into the same bucket.
+            //
+            // MOREOVER, this is actually necessary for correctness! In
+            // particular, by grouping patterns with the same prefix into the
+            // same bucket, we ensure that we preserve correct leftmost-first
+            // and leftmost-longest match semantics. In addition to the fact
+            // that `patterns.iter()` iterates in the correct order, this
+            // guarantees that all possible ambiguous matches will occur in
+            // the same bucket. The verification routine could be adjusted to
+            // support correct leftmost match semantics regardless of bucket
+            // allocation, but that results in a performance hit. It's much
+            // nicer to be able to just stop as soon as a match is found.
+            let lonybs = pattern.low_nybbles(t.mask_len());
+            if let Some(&bucket) = map.get(&lonybs) {
+                t.buckets[bucket].push(id);
+            } else {
+                // N.B. We assign buckets in reverse because it shouldn't have
+                // any influence on performance, but it does make it harder to
+                // get leftmost match semantics accidentally correct.
+                let bucket = (BUCKETS - 1) - (id.as_usize() % BUCKETS);
+                t.buckets[bucket].push(id);
+                map.insert(lonybs, bucket);
+            }
+        }
+        t
+    }
+
+    /// Verify whether there are any matches starting at or after `cur` in the
+    /// haystack. The candidate chunk given should correspond to 8-bit bitsets
+    /// for N buckets.
+    ///
+    /// # Safety
+    ///
+    /// The given pointers representing the haystack must be valid to read
+    /// from.
+    #[inline(always)]
+    unsafe fn verify64(
+        &self,
+        cur: *const u8,
+        end: *const u8,
+        mut candidate_chunk: u64,
+    ) -> Option<Match> {
+        while candidate_chunk != 0 {
+            let bit = candidate_chunk.trailing_zeros().as_usize();
+            candidate_chunk &= !(1 << bit);
+
+            let cur = cur.add(bit / BUCKETS);
+            let bucket = bit % BUCKETS;
+            if let Some(m) = self.verify_bucket(cur, end, bucket) {
+                return Some(m);
+            }
+        }
+        None
+    }
+
+    /// Verify whether there are any matches starting at `at` in the given
+    /// `haystack` corresponding only to patterns in the given bucket.
+    ///
+    /// # Safety
+    ///
+    /// The given pointers representing the haystack must be valid to read
+    /// from.
+    ///
+    /// The bucket index must be less than or equal to `self.buckets.len()`.
+    #[inline(always)]
+    unsafe fn verify_bucket(
+        &self,
+        cur: *const u8,
+        end: *const u8,
+        bucket: usize,
+    ) -> Option<Match> {
+        debug_assert!(bucket < self.buckets.len());
+        // SAFETY: The caller must ensure that the bucket index is correct.
+        for pid in self.buckets.get_unchecked(bucket).iter().copied() {
+            // SAFETY: This is safe because we are guaranteed that every
+            // index in a Teddy bucket is a valid index into `pats`, by
+            // construction.
+            debug_assert!(pid.as_usize() < self.patterns.len());
+            let pat = self.patterns.get_unchecked(pid);
+            if pat.is_prefix_raw(cur, end) {
+                let start = cur;
+                let end = start.add(pat.len());
+                return Some(Match { pid, start, end });
+            }
+        }
+        None
+    }
+
+    /// Returns the total number of masks required by the patterns in this
+    /// Teddy searcher.
+    ///
+    /// Basically, the mask length corresponds to the type of Teddy searcher
+    /// to use: a 1-byte, 2-byte, 3-byte or 4-byte searcher. The bigger the
+    /// better, typically, since searching for longer substrings usually
+    /// decreases the rate of false positives. Therefore, the number of masks
+    /// needed is the length of the shortest pattern in this searcher. If the
+    /// length of the shortest pattern (in bytes) is bigger than 4, then the
+    /// mask length is 4 since there are no Teddy searchers for more than 4
+    /// bytes.
+    fn mask_len(&self) -> usize {
+        core::cmp::min(4, self.patterns.minimum_len())
+    }
+
+    /// Returns the approximate total amount of heap used by this type, in
+    /// units of bytes.
+    fn memory_usage(&self) -> usize {
+        // This is an upper bound rather than a precise accounting. No
+        // particular reason, other than it's probably very close to actual
+        // memory usage in practice.
+        self.patterns.len() * core::mem::size_of::<PatternID>()
+    }
+}
+
+impl Teddy<8> {
+    /// Runs the verification routine for "slim" Teddy.
+    ///
+    /// The candidate given should be a collection of 8-bit bitsets (one bitset
+    /// per lane), where the ith bit is set in the jth lane if and only if the
+    /// byte occurring at `at + j` in `cur` is in the bucket `i`.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    ///
+    /// The given pointers must be valid to read from.
+    #[inline(always)]
+    unsafe fn verify<V: Vector>(
+        &self,
+        mut cur: *const u8,
+        end: *const u8,
+        candidate: V,
+    ) -> Option<Match> {
+        debug_assert!(!candidate.is_zero());
+        // Convert the candidate into 64-bit chunks, and then verify each of
+        // those chunks.
+        candidate.for_each_64bit_lane(
+            #[inline(always)]
+            |_, chunk| {
+                let result = self.verify64(cur, end, chunk);
+                cur = cur.add(8);
+                result
+            },
+        )
+    }
+}
+
+impl Teddy<16> {
+    /// Runs the verification routine for "fat" Teddy.
+    ///
+    /// The candidate given should be a collection of 8-bit bitsets (one bitset
+    /// per lane), where the ith bit is set in the jth lane if and only if the
+    /// byte occurring at `at + (j < 16 ? j : j - 16)` in `cur` is in the
+    /// bucket `j < 16 ? i : i + 8`.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    ///
+    /// The given pointers must be valid to read from.
+    #[inline(always)]
+    unsafe fn verify<V: FatVector>(
+        &self,
+        mut cur: *const u8,
+        end: *const u8,
+        candidate: V,
+    ) -> Option<Match> {
+        // This is a bit tricky, but we basically want to convert our
+        // candidate, which looks like this (assuming a 256-bit vector):
+        //
+        //     a31 a30 ... a17 a16 a15 a14 ... a01 a00
+        //
+        // where each a(i) is an 8-bit bitset corresponding to the activated
+        // buckets, to this
+        //
+        //     a31 a15 a30 a14 a29 a13 ... a18 a02 a17 a01 a16 a00
+        //
+        // Namely, for Fat Teddy, the high 128-bits of the candidate correspond
+        // to the same bytes in the haystack in the low 128-bits (so we only
+        // scan 16 bytes at a time), but are for buckets 8-15 instead of 0-7.
+        //
+        // The verification routine wants to look at all potentially matching
+        // buckets before moving on to the next lane. So for example, both
+        // a16 and a00 both correspond to the first byte in our window; a00
+        // contains buckets 0-7 and a16 contains buckets 8-15. Specifically,
+        // a16 should be checked before a01. So the transformation shown above
+        // allows us to use our normal verification procedure with one small
+        // change: we treat each bitset as 16 bits instead of 8 bits.
+        debug_assert!(!candidate.is_zero());
+
+        // Swap the 128-bit lanes in the candidate vector.
+        let swapped = candidate.swap_halves();
+        // Interleave the bytes from the low 128-bit lanes, starting with
+        // cand first.
+        let r1 = candidate.interleave_low_8bit_lanes(swapped);
+        // Interleave the bytes from the high 128-bit lanes, starting with
+        // cand first.
+        let r2 = candidate.interleave_high_8bit_lanes(swapped);
+        // Now just take the 2 low 64-bit integers from both r1 and r2. We
+        // can drop the high 64-bit integers because they are a mirror image
+        // of the low 64-bit integers. All we care about are the low 128-bit
+        // lanes of r1 and r2. Combined, they contain all our 16-bit bitsets
+        // laid out in the desired order, as described above.
+        r1.for_each_low_64bit_lane(
+            r2,
+            #[inline(always)]
+            |_, chunk| {
+                let result = self.verify64(cur, end, chunk);
+                cur = cur.add(4);
+                result
+            },
+        )
+    }
+}
+
+/// A vector generic mask for the low and high nybbles in a set of patterns.
+/// Each 8-bit lane `j` in a vector corresponds to a bitset where the `i`th bit
+/// is set if and only if the nybble `j` is in the bucket `i` at a particular
+/// position.
+///
+/// This is slightly tweaked dependending on whether Slim or Fat Teddy is being
+/// used. For Slim Teddy, the bitsets in the lower half are the same as the
+/// bitsets in the higher half, so that we can search `V::BYTES` bytes at a
+/// time. (Remember, the nybbles in the haystack are used as indices into these
+/// masks, and 256-bit shuffles only operate on 128-bit lanes.)
+///
+/// For Fat Teddy, the bitsets are not repeated, but instead, the high half
+/// bits correspond to an addition 8 buckets. So that a bitset `00100010` has
+/// buckets 1 and 5 set if it's in the lower half, but has buckets 9 and 13 set
+/// if it's in the higher half.
+#[derive(Clone, Copy, Debug)]
+struct Mask<V> {
+    lo: V,
+    hi: V,
+}
+
+impl<V: Vector> Mask<V> {
+    /// Return a candidate for Teddy (fat or slim) that is searching for 1-byte
+    /// candidates.
+    ///
+    /// If a candidate is returned, it will be a collection of 8-bit bitsets
+    /// (one bitset per lane), where the ith bit is set in the jth lane if and
+    /// only if the byte occurring at the jth lane in `chunk` is in the bucket
+    /// `i`. If no candidate is found, then the vector returned will have all
+    /// lanes set to zero.
+    ///
+    /// `chunk` should correspond to a `V::BYTES` window of the haystack (where
+    /// the least significant byte corresponds to the start of the window). For
+    /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with
+    /// the window repeated in each half of the vector.
+    ///
+    /// `mask1` should correspond to a low/high mask for the first byte of all
+    /// patterns that are being searched.
+    #[inline(always)]
+    unsafe fn members1(chunk: V, masks: [Mask<V>; 1]) -> V {
+        let lomask = V::splat(0xF);
+        let hlo = chunk.and(lomask);
+        let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask);
+        let locand = masks[0].lo.shuffle_bytes(hlo);
+        let hicand = masks[0].hi.shuffle_bytes(hhi);
+        locand.and(hicand)
+    }
+
+    /// Return a candidate for Teddy (fat or slim) that is searching for 2-byte
+    /// candidates.
+    ///
+    /// If candidates are returned, each will be a collection of 8-bit bitsets
+    /// (one bitset per lane), where the ith bit is set in the jth lane if and
+    /// only if the byte occurring at the jth lane in `chunk` is in the bucket
+    /// `i`. Each candidate returned corresponds to the first and second bytes
+    /// of the patterns being searched. If no candidate is found, then all of
+    /// the lanes will be set to zero in at least one of the vectors returned.
+    ///
+    /// `chunk` should correspond to a `V::BYTES` window of the haystack (where
+    /// the least significant byte corresponds to the start of the window). For
+    /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with
+    /// the window repeated in each half of the vector.
+    ///
+    /// The masks should correspond to the masks computed for the first and
+    /// second bytes of all patterns that are being searched.
+    #[inline(always)]
+    unsafe fn members2(chunk: V, masks: [Mask<V>; 2]) -> (V, V) {
+        let lomask = V::splat(0xF);
+        let hlo = chunk.and(lomask);
+        let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask);
+
+        let locand1 = masks[0].lo.shuffle_bytes(hlo);
+        let hicand1 = masks[0].hi.shuffle_bytes(hhi);
+        let cand1 = locand1.and(hicand1);
+
+        let locand2 = masks[1].lo.shuffle_bytes(hlo);
+        let hicand2 = masks[1].hi.shuffle_bytes(hhi);
+        let cand2 = locand2.and(hicand2);
+
+        (cand1, cand2)
+    }
+
+    /// Return a candidate for Teddy (fat or slim) that is searching for 3-byte
+    /// candidates.
+    ///
+    /// If candidates are returned, each will be a collection of 8-bit bitsets
+    /// (one bitset per lane), where the ith bit is set in the jth lane if and
+    /// only if the byte occurring at the jth lane in `chunk` is in the bucket
+    /// `i`. Each candidate returned corresponds to the first, second and third
+    /// bytes of the patterns being searched. If no candidate is found, then
+    /// all of the lanes will be set to zero in at least one of the vectors
+    /// returned.
+    ///
+    /// `chunk` should correspond to a `V::BYTES` window of the haystack (where
+    /// the least significant byte corresponds to the start of the window). For
+    /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with
+    /// the window repeated in each half of the vector.
+    ///
+    /// The masks should correspond to the masks computed for the first, second
+    /// and third bytes of all patterns that are being searched.
+    #[inline(always)]
+    unsafe fn members3(chunk: V, masks: [Mask<V>; 3]) -> (V, V, V) {
+        let lomask = V::splat(0xF);
+        let hlo = chunk.and(lomask);
+        let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask);
+
+        let locand1 = masks[0].lo.shuffle_bytes(hlo);
+        let hicand1 = masks[0].hi.shuffle_bytes(hhi);
+        let cand1 = locand1.and(hicand1);
+
+        let locand2 = masks[1].lo.shuffle_bytes(hlo);
+        let hicand2 = masks[1].hi.shuffle_bytes(hhi);
+        let cand2 = locand2.and(hicand2);
+
+        let locand3 = masks[2].lo.shuffle_bytes(hlo);
+        let hicand3 = masks[2].hi.shuffle_bytes(hhi);
+        let cand3 = locand3.and(hicand3);
+
+        (cand1, cand2, cand3)
+    }
+
+    /// Return a candidate for Teddy (fat or slim) that is searching for 4-byte
+    /// candidates.
+    ///
+    /// If candidates are returned, each will be a collection of 8-bit bitsets
+    /// (one bitset per lane), where the ith bit is set in the jth lane if and
+    /// only if the byte occurring at the jth lane in `chunk` is in the bucket
+    /// `i`. Each candidate returned corresponds to the first, second, third
+    /// and fourth bytes of the patterns being searched. If no candidate is
+    /// found, then all of the lanes will be set to zero in at least one of the
+    /// vectors returned.
+    ///
+    /// `chunk` should correspond to a `V::BYTES` window of the haystack (where
+    /// the least significant byte corresponds to the start of the window). For
+    /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with
+    /// the window repeated in each half of the vector.
+    ///
+    /// The masks should correspond to the masks computed for the first,
+    /// second, third and fourth bytes of all patterns that are being searched.
+    #[inline(always)]
+    unsafe fn members4(chunk: V, masks: [Mask<V>; 4]) -> (V, V, V, V) {
+        let lomask = V::splat(0xF);
+        let hlo = chunk.and(lomask);
+        let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask);
+
+        let locand1 = masks[0].lo.shuffle_bytes(hlo);
+        let hicand1 = masks[0].hi.shuffle_bytes(hhi);
+        let cand1 = locand1.and(hicand1);
+
+        let locand2 = masks[1].lo.shuffle_bytes(hlo);
+        let hicand2 = masks[1].hi.shuffle_bytes(hhi);
+        let cand2 = locand2.and(hicand2);
+
+        let locand3 = masks[2].lo.shuffle_bytes(hlo);
+        let hicand3 = masks[2].hi.shuffle_bytes(hhi);
+        let cand3 = locand3.and(hicand3);
+
+        let locand4 = masks[3].lo.shuffle_bytes(hlo);
+        let hicand4 = masks[3].hi.shuffle_bytes(hhi);
+        let cand4 = locand4.and(hicand4);
+
+        (cand1, cand2, cand3, cand4)
+    }
+}
+
+/// Represents the low and high nybble masks that will be used during
+/// search. Each mask is 32 bytes wide, although only the first 16 bytes are
+/// used for 128-bit vectors.
+///
+/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set
+/// if and only if the corresponding nybble is in the ith bucket. The index of
+/// the byte (0-15, inclusive) corresponds to the nybble.
+///
+/// Each mask is used as the target of a shuffle, where the indices for the
+/// shuffle are taken from the haystack. AND'ing the shuffles for both the
+/// low and high masks together also results in 8-bit bitsets, but where bit
+/// `i` is set if and only if the correspond *byte* is in the ith bucket.
+#[derive(Clone, Default)]
+struct SlimMaskBuilder {
+    lo: [u8; 32],
+    hi: [u8; 32],
+}
+
+impl SlimMaskBuilder {
+    /// Update this mask by adding the given byte to the given bucket. The
+    /// given bucket must be in the range 0-7.
+    ///
+    /// # Panics
+    ///
+    /// When `bucket >= 8`.
+    fn add(&mut self, bucket: usize, byte: u8) {
+        assert!(bucket < 8);
+
+        let bucket = u8::try_from(bucket).unwrap();
+        let byte_lo = usize::from(byte & 0xF);
+        let byte_hi = usize::from((byte >> 4) & 0xF);
+        // When using 256-bit vectors, we need to set this bucket assignment in
+        // the low and high 128-bit portions of the mask. This allows us to
+        // process 32 bytes at a time. Namely, AVX2 shuffles operate on each
+        // of the 128-bit lanes, rather than the full 256-bit vector at once.
+        self.lo[byte_lo] |= 1 << bucket;
+        self.lo[byte_lo + 16] |= 1 << bucket;
+        self.hi[byte_hi] |= 1 << bucket;
+        self.hi[byte_hi + 16] |= 1 << bucket;
+    }
+
+    /// Turn this builder into a vector mask.
+    ///
+    /// # Panics
+    ///
+    /// When `V` represents a vector bigger than what `MaskBytes` can contain.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    unsafe fn build<V: Vector>(&self) -> Mask<V> {
+        assert!(V::BYTES <= self.lo.len());
+        assert!(V::BYTES <= self.hi.len());
+        Mask {
+            lo: V::load_unaligned(self.lo[..].as_ptr()),
+            hi: V::load_unaligned(self.hi[..].as_ptr()),
+        }
+    }
+
+    /// A convenience function for building `N` vector masks from a slim
+    /// `Teddy` value.
+    ///
+    /// # Panics
+    ///
+    /// When `V` represents a vector bigger than what `MaskBytes` can contain.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    unsafe fn from_teddy<const BYTES: usize, V: Vector>(
+        teddy: &Teddy<8>,
+    ) -> [Mask<V>; BYTES] {
+        // MSRV(1.63): Use core::array::from_fn to just build the array here
+        // instead of creating a vector and turning it into an array.
+        let mut mask_builders = vec![SlimMaskBuilder::default(); BYTES];
+        for (bucket_index, bucket) in teddy.buckets.iter().enumerate() {
+            for pid in bucket.iter().copied() {
+                let pat = teddy.patterns.get(pid);
+                for (i, builder) in mask_builders.iter_mut().enumerate() {
+                    builder.add(bucket_index, pat.bytes()[i]);
+                }
+            }
+        }
+        let array =
+            <[SlimMaskBuilder; BYTES]>::try_from(mask_builders).unwrap();
+        array.map(|builder| builder.build())
+    }
+}
+
+impl Debug for SlimMaskBuilder {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let (mut parts_lo, mut parts_hi) = (vec![], vec![]);
+        for i in 0..32 {
+            parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i]));
+            parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i]));
+        }
+        f.debug_struct("SlimMaskBuilder")
+            .field("lo", &parts_lo)
+            .field("hi", &parts_hi)
+            .finish()
+    }
+}
+
+/// Represents the low and high nybble masks that will be used during "fat"
+/// Teddy search.
+///
+/// Each mask is 32 bytes wide, and at the time of writing, only 256-bit vectors
+/// support fat Teddy.
+///
+/// A fat Teddy mask is like a slim Teddy mask, except that instead of
+/// repeating the bitsets in the high and low 128-bits in 256-bit vectors, the
+/// high and low 128-bit halves each represent distinct buckets. (Bringing the
+/// total to 16 instead of 8.) This permits spreading the patterns out a bit
+/// more and thus putting less pressure on verification to be fast.
+///
+/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set
+/// if and only if the corresponding nybble is in the ith bucket. The index of
+/// the byte (0-15, inclusive) corresponds to the nybble.
+#[derive(Clone, Copy, Default)]
+struct FatMaskBuilder {
+    lo: [u8; 32],
+    hi: [u8; 32],
+}
+
+impl FatMaskBuilder {
+    /// Update this mask by adding the given byte to the given bucket. The
+    /// given bucket must be in the range 0-15.
+    ///
+    /// # Panics
+    ///
+    /// When `bucket >= 16`.
+    fn add(&mut self, bucket: usize, byte: u8) {
+        assert!(bucket < 16);
+
+        let bucket = u8::try_from(bucket).unwrap();
+        let byte_lo = usize::from(byte & 0xF);
+        let byte_hi = usize::from((byte >> 4) & 0xF);
+        // Unlike slim teddy, fat teddy only works with AVX2. For fat teddy,
+        // the high 128 bits of our mask correspond to buckets 8-15, while the
+        // low 128 bits correspond to buckets 0-7.
+        if bucket < 8 {
+            self.lo[byte_lo] |= 1 << bucket;
+            self.hi[byte_hi] |= 1 << bucket;
+        } else {
+            self.lo[byte_lo + 16] |= 1 << (bucket % 8);
+            self.hi[byte_hi + 16] |= 1 << (bucket % 8);
+        }
+    }
+
+    /// Turn this builder into a vector mask.
+    ///
+    /// # Panics
+    ///
+    /// When `V` represents a vector bigger than what `MaskBytes` can contain.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    unsafe fn build<V: Vector>(&self) -> Mask<V> {
+        assert!(V::BYTES <= self.lo.len());
+        assert!(V::BYTES <= self.hi.len());
+        Mask {
+            lo: V::load_unaligned(self.lo[..].as_ptr()),
+            hi: V::load_unaligned(self.hi[..].as_ptr()),
+        }
+    }
+
+    /// A convenience function for building `N` vector masks from a fat
+    /// `Teddy` value.
+    ///
+    /// # Panics
+    ///
+    /// When `V` represents a vector bigger than what `MaskBytes` can contain.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    #[inline(always)]
+    unsafe fn from_teddy<const BYTES: usize, V: Vector>(
+        teddy: &Teddy<16>,
+    ) -> [Mask<V>; BYTES] {
+        // MSRV(1.63): Use core::array::from_fn to just build the array here
+        // instead of creating a vector and turning it into an array.
+        let mut mask_builders = vec![FatMaskBuilder::default(); BYTES];
+        for (bucket_index, bucket) in teddy.buckets.iter().enumerate() {
+            for pid in bucket.iter().copied() {
+                let pat = teddy.patterns.get(pid);
+                for (i, builder) in mask_builders.iter_mut().enumerate() {
+                    builder.add(bucket_index, pat.bytes()[i]);
+                }
+            }
+        }
+        let array =
+            <[FatMaskBuilder; BYTES]>::try_from(mask_builders).unwrap();
+        array.map(|builder| builder.build())
+    }
+}
+
+impl Debug for FatMaskBuilder {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let (mut parts_lo, mut parts_hi) = (vec![], vec![]);
+        for i in 0..32 {
+            parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i]));
+            parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i]));
+        }
+        f.debug_struct("FatMaskBuilder")
+            .field("lo", &parts_lo)
+            .field("hi", &parts_hi)
+            .finish()
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/packed/teddy/mod.rs b/third_party/rust/aho-corasick/src/packed/teddy/mod.rs
new file mode 100644
index 0000000000..26cfcdc450
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/teddy/mod.rs
@@ -0,0 +1,9 @@
+// Regrettable, but Teddy stuff just isn't used on all targets. And for some
+// targets, like aarch64, only "slim" Teddy is used and so "fat" Teddy gets a
+// bunch of dead-code warnings. Just not worth trying to squash them. Blech.
+#![allow(dead_code)]
+
+pub(crate) use self::builder::{Builder, Searcher};
+
+mod builder;
+mod generic;
diff --git a/third_party/rust/aho-corasick/src/packed/tests.rs b/third_party/rust/aho-corasick/src/packed/tests.rs
new file mode 100644
index 0000000000..2b0d44ee6f
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/tests.rs
@@ -0,0 +1,583 @@
+use std::collections::HashMap;
+
+use alloc::{
+    format,
+    string::{String, ToString},
+    vec,
+    vec::Vec,
+};
+
+use crate::{
+    packed::{Config, MatchKind},
+    util::search::Match,
+};
+
+/// A description of a single test against a multi-pattern searcher.
+///
+/// A single test may not necessarily pass on every configuration of a
+/// searcher. The tests are categorized and grouped appropriately below.
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct SearchTest {
+    /// The name of this test, for debugging.
+    name: &'static str,
+    /// The patterns to search for.
+    patterns: &'static [&'static str],
+    /// The text to search.
+    haystack: &'static str,
+    /// Each match is a triple of (pattern_index, start, end), where
+    /// pattern_index is an index into `patterns` and `start`/`end` are indices
+    /// into `haystack`.
+    matches: &'static [(usize, usize, usize)],
+}
+
+struct SearchTestOwned {
+    offset: usize,
+    name: String,
+    patterns: Vec<String>,
+    haystack: String,
+    matches: Vec<(usize, usize, usize)>,
+}
+
+impl SearchTest {
+    fn variations(&self) -> Vec<SearchTestOwned> {
+        let count = if cfg!(miri) { 1 } else { 261 };
+        let mut tests = vec![];
+        for i in 0..count {
+            tests.push(self.offset_prefix(i));
+            tests.push(self.offset_suffix(i));
+            tests.push(self.offset_both(i));
+        }
+        tests
+    }
+
+    fn offset_both(&self, off: usize) -> SearchTestOwned {
+        SearchTestOwned {
+            offset: off,
+            name: self.name.to_string(),
+            patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
+            haystack: format!(
+                "{}{}{}",
+                "Z".repeat(off),
+                self.haystack,
+                "Z".repeat(off)
+            ),
+            matches: self
+                .matches
+                .iter()
+                .map(|&(id, s, e)| (id, s + off, e + off))
+                .collect(),
+        }
+    }
+
+    fn offset_prefix(&self, off: usize) -> SearchTestOwned {
+        SearchTestOwned {
+            offset: off,
+            name: self.name.to_string(),
+            patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
+            haystack: format!("{}{}", "Z".repeat(off), self.haystack),
+            matches: self
+                .matches
+                .iter()
+                .map(|&(id, s, e)| (id, s + off, e + off))
+                .collect(),
+        }
+    }
+
+    fn offset_suffix(&self, off: usize) -> SearchTestOwned {
+        SearchTestOwned {
+            offset: off,
+            name: self.name.to_string(),
+            patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
+            haystack: format!("{}{}", self.haystack, "Z".repeat(off)),
+            matches: self.matches.to_vec(),
+        }
+    }
+}
+
+/// Short-hand constructor for SearchTest. We use it a lot below.
+macro_rules! t {
+    ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
+        SearchTest {
+            name: stringify!($name),
+            patterns: $patterns,
+            haystack: $haystack,
+            matches: $matches,
+        }
+    };
+}
+
+/// A collection of test groups.
+type TestCollection = &'static [&'static [SearchTest]];
+
+// Define several collections corresponding to the different type of match
+// semantics supported. These collections have some overlap, but each
+// collection should have some tests that no other collection has.
+
+/// Tests for leftmost-first match semantics.
+const PACKED_LEFTMOST_FIRST: TestCollection =
+    &[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY];
+
+/// Tests for leftmost-longest match semantics.
+const PACKED_LEFTMOST_LONGEST: TestCollection =
+    &[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY];
+
+// Now define the individual tests that make up the collections above.
+
+/// A collection of tests for the that should always be true regardless of
+/// match semantics. That is, all combinations of leftmost-{first, longest}
+/// should produce the same answer.
+const BASICS: &'static [SearchTest] = &[
+    t!(basic001, &["a"], "", &[]),
+    t!(basic010, &["a"], "a", &[(0, 0, 1)]),
+    t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
+    t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
+    t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
+    t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
+    t!(basic060, &["a"], "bbb", &[]),
+    t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
+    t!(basic100, &["aa"], "", &[]),
+    t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
+    t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
+    t!(basic130, &["aa"], "abbab", &[]),
+    t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
+    t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]),
+    t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
+    t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
+    t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
+    t!(basic230, &["abcd"], "abcd", &[(0, 0, 4)]),
+    t!(basic240, &["abcd"], "zazabzabcdz", &[(0, 6, 10)]),
+    t!(basic250, &["abcd"], "zazabcdzabcdz", &[(0, 3, 7), (0, 8, 12)]),
+    t!(basic300, &["a", "b"], "", &[]),
+    t!(basic310, &["a", "b"], "z", &[]),
+    t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
+    t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
+    t!(
+        basic340,
+        &["a", "b"],
+        "abba",
+        &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
+    ),
+    t!(
+        basic350,
+        &["b", "a"],
+        "abba",
+        &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
+    ),
+    t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
+    t!(basic400, &["foo", "bar"], "", &[]),
+    t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
+    t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
+    t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
+    t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
+    t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
+    t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
+    t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
+    t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
+    t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
+    t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
+    t!(
+        basic720,
+        &["yabcdef", "bcdeyabc", "abcdezghi"],
+        "yabcdezghi",
+        &[(2, 1, 10),]
+    ),
+    t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
+    t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
+    t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
+    t!(
+        basic840,
+        &["ab", "ba"],
+        "abababa",
+        &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
+    ),
+    t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
+];
+
+/// Tests for leftmost match semantics. These should pass for both
+/// leftmost-first and leftmost-longest match kinds. Stated differently, among
+/// ambiguous matches, the longest match and the match that appeared first when
+/// constructing the automaton should always be the same.
+const LEFTMOST: &'static [SearchTest] = &[
+    t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
+    t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
+    t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
+    t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
+    t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
+    t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
+    t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
+    t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
+    t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
+    t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
+    t!(
+        leftmost360,
+        &["abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(2, 0, 8),]
+    ),
+    t!(
+        leftmost370,
+        &["abcdefghi", "cde", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(
+        leftmost380,
+        &["abcdefghi", "hz", "abcdefgh", "a"],
+        "abcdefghz",
+        &[(2, 0, 8),]
+    ),
+    t!(
+        leftmost390,
+        &["b", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(
+        leftmost400,
+        &["h", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(
+        leftmost410,
+        &["z", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8), (0, 8, 9),]
+    ),
+];
+
+/// Tests for non-overlapping leftmost-first match semantics. These tests
+/// should generally be specific to leftmost-first, which means they should
+/// generally fail under leftmost-longest semantics.
+const LEFTMOST_FIRST: &'static [SearchTest] = &[
+    t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
+    t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
+    t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
+    t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
+    t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
+    t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
+    t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
+    t!(
+        leftfirst310,
+        &["abcd", "b", "bce", "ce"],
+        "abce",
+        &[(1, 1, 2), (3, 2, 4),]
+    ),
+    t!(
+        leftfirst320,
+        &["a", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(0, 0, 1), (2, 7, 9),]
+    ),
+    t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
+    t!(
+        leftfirst340,
+        &["abcdef", "x", "x", "x", "x", "x", "x", "abcde"],
+        "abcdef",
+        &[(0, 0, 6)]
+    ),
+];
+
+/// Tests for non-overlapping leftmost-longest match semantics. These tests
+/// should generally be specific to leftmost-longest, which means they should
+/// generally fail under leftmost-first semantics.
+const LEFTMOST_LONGEST: &'static [SearchTest] = &[
+    t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
+    t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
+    t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
+    t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
+    t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
+    t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
+    t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
+    t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
+    t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
+    t!(
+        leftlong310,
+        &["a", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
+    t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
+    t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
+];
+
+/// Regression tests that are applied to all combinations.
+///
+/// If regression tests are needed for specific match semantics, then add them
+/// to the appropriate group above.
+const REGRESSION: &'static [SearchTest] = &[
+    t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
+    t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
+    t!(
+        regression030,
+        &["libcore/", "libstd/"],
+        "libcore/char/methods.rs",
+        &[(0, 0, 8),]
+    ),
+    t!(
+        regression040,
+        &["libstd/", "libcore/"],
+        "libcore/char/methods.rs",
+        &[(1, 0, 8),]
+    ),
+    t!(
+        regression050,
+        &["\x00\x00\x01", "\x00\x00\x00"],
+        "\x00\x00\x00",
+        &[(1, 0, 3),]
+    ),
+    t!(
+        regression060,
+        &["\x00\x00\x00", "\x00\x00\x01"],
+        "\x00\x00\x00",
+        &[(0, 0, 3),]
+    ),
+];
+
+const TEDDY: &'static [SearchTest] = &[
+    t!(
+        teddy010,
+        &["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
+        "abcdefghijk",
+        &[
+            (0, 0, 1),
+            (1, 1, 2),
+            (2, 2, 3),
+            (3, 3, 4),
+            (4, 4, 5),
+            (5, 5, 6),
+            (6, 6, 7),
+            (7, 7, 8),
+            (8, 8, 9),
+            (9, 9, 10),
+            (10, 10, 11)
+        ]
+    ),
+    t!(
+        teddy020,
+        &["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"],
+        "abcdefghijk",
+        &[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),]
+    ),
+    t!(
+        teddy030,
+        &["abc"],
+        "abcdefghijklmnopqrstuvwxyzabcdefghijk",
+        &[(0, 0, 3), (0, 26, 29)]
+    ),
+];
+
+// Now define a test for each combination of things above that we want to run.
+// Since there are a few different combinations for each collection of tests,
+// we define a couple of macros to avoid repetition drudgery. The testconfig
+// macro constructs the automaton from a given match kind, and runs the search
+// tests one-by-one over the given collection. The `with` parameter allows one
+// to configure the config with additional parameters. The testcombo macro
+// invokes testconfig in precisely this way: it sets up several tests where
+// each one turns a different knob on Config.
+
+macro_rules! testconfig {
+    ($name:ident, $collection:expr, $with:expr) => {
+        #[test]
+        fn $name() {
+            run_search_tests($collection, |test| {
+                let mut config = Config::new();
+                $with(&mut config);
+                let mut builder = config.builder();
+                builder.extend(test.patterns.iter().map(|p| p.as_bytes()));
+                let searcher = match builder.build() {
+                    Some(searcher) => searcher,
+                    None => {
+                        // For x86-64 and aarch64, not building a searcher is
+                        // probably a bug, so be loud.
+                        if cfg!(any(
+                            target_arch = "x86_64",
+                            target_arch = "aarch64"
+                        )) {
+                            panic!("failed to build packed searcher")
+                        }
+                        return None;
+                    }
+                };
+                Some(searcher.find_iter(&test.haystack).collect())
+            });
+        }
+    };
+}
+
+testconfig!(
+    search_default_leftmost_first,
+    PACKED_LEFTMOST_FIRST,
+    |_: &mut Config| {}
+);
+
+testconfig!(
+    search_default_leftmost_longest,
+    PACKED_LEFTMOST_LONGEST,
+    |c: &mut Config| {
+        c.match_kind(MatchKind::LeftmostLongest);
+    }
+);
+
+testconfig!(
+    search_teddy_leftmost_first,
+    PACKED_LEFTMOST_FIRST,
+    |c: &mut Config| {
+        c.only_teddy(true);
+    }
+);
+
+testconfig!(
+    search_teddy_leftmost_longest,
+    PACKED_LEFTMOST_LONGEST,
+    |c: &mut Config| {
+        c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
+    }
+);
+
+testconfig!(
+    search_teddy_ssse3_leftmost_first,
+    PACKED_LEFTMOST_FIRST,
+    |c: &mut Config| {
+        c.only_teddy(true);
+        #[cfg(target_arch = "x86_64")]
+        if std::is_x86_feature_detected!("ssse3") {
+            c.only_teddy_256bit(Some(false));
+        }
+    }
+);
+
+testconfig!(
+    search_teddy_ssse3_leftmost_longest,
+    PACKED_LEFTMOST_LONGEST,
+    |c: &mut Config| {
+        c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
+        #[cfg(target_arch = "x86_64")]
+        if std::is_x86_feature_detected!("ssse3") {
+            c.only_teddy_256bit(Some(false));
+        }
+    }
+);
+
+testconfig!(
+    search_teddy_avx2_leftmost_first,
+    PACKED_LEFTMOST_FIRST,
+    |c: &mut Config| {
+        c.only_teddy(true);
+        #[cfg(target_arch = "x86_64")]
+        if std::is_x86_feature_detected!("avx2") {
+            c.only_teddy_256bit(Some(true));
+        }
+    }
+);
+
+testconfig!(
+    search_teddy_avx2_leftmost_longest,
+    PACKED_LEFTMOST_LONGEST,
+    |c: &mut Config| {
+        c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
+        #[cfg(target_arch = "x86_64")]
+        if std::is_x86_feature_detected!("avx2") {
+            c.only_teddy_256bit(Some(true));
+        }
+    }
+);
+
+testconfig!(
+    search_teddy_fat_leftmost_first,
+    PACKED_LEFTMOST_FIRST,
+    |c: &mut Config| {
+        c.only_teddy(true);
+        #[cfg(target_arch = "x86_64")]
+        if std::is_x86_feature_detected!("avx2") {
+            c.only_teddy_fat(Some(true));
+        }
+    }
+);
+
+testconfig!(
+    search_teddy_fat_leftmost_longest,
+    PACKED_LEFTMOST_LONGEST,
+    |c: &mut Config| {
+        c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
+        #[cfg(target_arch = "x86_64")]
+        if std::is_x86_feature_detected!("avx2") {
+            c.only_teddy_fat(Some(true));
+        }
+    }
+);
+
+testconfig!(
+    search_rabinkarp_leftmost_first,
+    PACKED_LEFTMOST_FIRST,
+    |c: &mut Config| {
+        c.only_rabin_karp(true);
+    }
+);
+
+testconfig!(
+    search_rabinkarp_leftmost_longest,
+    PACKED_LEFTMOST_LONGEST,
+    |c: &mut Config| {
+        c.only_rabin_karp(true).match_kind(MatchKind::LeftmostLongest);
+    }
+);
+
+#[test]
+fn search_tests_have_unique_names() {
+    let assert = |constname, tests: &[SearchTest]| {
+        let mut seen = HashMap::new(); // map from test name to position
+        for (i, test) in tests.iter().enumerate() {
+            if !seen.contains_key(test.name) {
+                seen.insert(test.name, i);
+            } else {
+                let last = seen[test.name];
+                panic!(
+                    "{} tests have duplicate names at positions {} and {}",
+                    constname, last, i
+                );
+            }
+        }
+    };
+    assert("BASICS", BASICS);
+    assert("LEFTMOST", LEFTMOST);
+    assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
+    assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
+    assert("REGRESSION", REGRESSION);
+    assert("TEDDY", TEDDY);
+}
+
+fn run_search_tests<F: FnMut(&SearchTestOwned) -> Option<Vec<Match>>>(
+    which: TestCollection,
+    mut f: F,
+) {
+    let get_match_triples =
+        |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
+            matches
+                .into_iter()
+                .map(|m| (m.pattern().as_usize(), m.start(), m.end()))
+                .collect()
+        };
+    for &tests in which {
+        for spec in tests {
+            for test in spec.variations() {
+                let results = match f(&test) {
+                    None => continue,
+                    Some(results) => results,
+                };
+                assert_eq!(
+                    test.matches,
+                    get_match_triples(results).as_slice(),
+                    "test: {}, patterns: {:?}, haystack(len={:?}): {:?}, \
+                     offset: {:?}",
+                    test.name,
+                    test.patterns,
+                    test.haystack.len(),
+                    test.haystack,
+                    test.offset,
+                );
+            }
+        }
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/packed/vector.rs b/third_party/rust/aho-corasick/src/packed/vector.rs
new file mode 100644
index 0000000000..f19b86ce1e
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/packed/vector.rs
@@ -0,0 +1,1750 @@
+// NOTE: The descriptions for each of the vector methods on the traits below
+// are pretty inscrutable. For this reason, there are tests for every method
+// on for every trait impl below. If you're confused about what an op does,
+// consult its test. (They probably should be doc tests, but I couldn't figure
+// out how to write them in a non-annoying way.)
+
+use core::{
+    fmt::Debug,
+    panic::{RefUnwindSafe, UnwindSafe},
+};
+
+/// A trait for describing vector operations used by vectorized searchers.
+///
+/// The trait is highly constrained to low level vector operations needed for
+/// the specific algorithms used in this crate. In general, it was invented
+/// mostly to be generic over x86's __m128i and __m256i types. At time of
+/// writing, it also supports wasm and aarch64 128-bit vector types as well.
+///
+/// # Safety
+///
+/// All methods are not safe since they are intended to be implemented using
+/// vendor intrinsics, which are also not safe. Callers must ensure that
+/// the appropriate target features are enabled in the calling function,
+/// and that the current CPU supports them. All implementations should
+/// avoid marking the routines with `#[target_feature]` and instead mark
+/// them as `#[inline(always)]` to ensure they get appropriately inlined.
+/// (`inline(always)` cannot be used with target_feature.)
+pub(crate) trait Vector:
+    Copy + Debug + Send + Sync + UnwindSafe + RefUnwindSafe
+{
+    /// The number of bits in the vector.
+    const BITS: usize;
+    /// The number of bytes in the vector. That is, this is the size of the
+    /// vector in memory.
+    const BYTES: usize;
+
+    /// Create a vector with 8-bit lanes with the given byte repeated into each
+    /// lane.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn splat(byte: u8) -> Self;
+
+    /// Read a vector-size number of bytes from the given pointer. The pointer
+    /// does not need to be aligned.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    ///
+    /// Callers must guarantee that at least `BYTES` bytes are readable from
+    /// `data`.
+    unsafe fn load_unaligned(data: *const u8) -> Self;
+
+    /// Returns true if and only if this vector has zero in all of its lanes.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn is_zero(self) -> bool;
+
+    /// Do an 8-bit pairwise equality check. If lane `i` is equal in this
+    /// vector and the one given, then lane `i` in the resulting vector is set
+    /// to `0xFF`. Otherwise, it is set to `0x00`.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn cmpeq(self, vector2: Self) -> Self;
+
+    /// Perform a bitwise 'and' of this vector and the one given and return
+    /// the result.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn and(self, vector2: Self) -> Self;
+
+    /// Perform a bitwise 'or' of this vector and the one given and return
+    /// the result.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn or(self, vector2: Self) -> Self;
+
+    /// Shift each 8-bit lane in this vector to the right by the number of
+    /// bits indictated by the `BITS` type parameter.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn shift_8bit_lane_right<const BITS: i32>(self) -> Self;
+
+    /// Shift this vector to the left by one byte and shift the most
+    /// significant byte of `vector2` into the least significant position of
+    /// this vector.
+    ///
+    /// Stated differently, this behaves as if `self` and `vector2` were
+    /// concatenated into a `2 * Self::BITS` temporary buffer and then shifted
+    /// right by `Self::BYTES - 1` bytes.
+    ///
+    /// With respect to the Teddy algorithm, `vector2` is usually a previous
+    /// `Self::BYTES` chunk from the haystack and `self` is the chunk
+    /// immediately following it. This permits combining the last two bytes
+    /// from the previous chunk (`vector2`) with the first `Self::BYTES - 1`
+    /// bytes from the current chunk. This permits aligning the result of
+    /// various shuffles so that they can be and-ed together and a possible
+    /// candidate discovered.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn shift_in_one_byte(self, vector2: Self) -> Self;
+
+    /// Shift this vector to the left by two bytes and shift the two most
+    /// significant bytes of `vector2` into the least significant position of
+    /// this vector.
+    ///
+    /// Stated differently, this behaves as if `self` and `vector2` were
+    /// concatenated into a `2 * Self::BITS` temporary buffer and then shifted
+    /// right by `Self::BYTES - 2` bytes.
+    ///
+    /// With respect to the Teddy algorithm, `vector2` is usually a previous
+    /// `Self::BYTES` chunk from the haystack and `self` is the chunk
+    /// immediately following it. This permits combining the last two bytes
+    /// from the previous chunk (`vector2`) with the first `Self::BYTES - 2`
+    /// bytes from the current chunk. This permits aligning the result of
+    /// various shuffles so that they can be and-ed together and a possible
+    /// candidate discovered.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self;
+
+    /// Shift this vector to the left by three bytes and shift the three most
+    /// significant bytes of `vector2` into the least significant position of
+    /// this vector.
+    ///
+    /// Stated differently, this behaves as if `self` and `vector2` were
+    /// concatenated into a `2 * Self::BITS` temporary buffer and then shifted
+    /// right by `Self::BYTES - 3` bytes.
+    ///
+    /// With respect to the Teddy algorithm, `vector2` is usually a previous
+    /// `Self::BYTES` chunk from the haystack and `self` is the chunk
+    /// immediately following it. This permits combining the last three bytes
+    /// from the previous chunk (`vector2`) with the first `Self::BYTES - 3`
+    /// bytes from the current chunk. This permits aligning the result of
+    /// various shuffles so that they can be and-ed together and a possible
+    /// candidate discovered.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self;
+
+    /// Shuffles the bytes in this vector according to the indices in each of
+    /// the corresponding lanes in `indices`.
+    ///
+    /// If `i` is the index of corresponding lanes, `A` is this vector, `B` is
+    /// indices and `C` is the resulting vector, then `C = A[B[i]]`.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn shuffle_bytes(self, indices: Self) -> Self;
+
+    /// Call the provided function for each 64-bit lane in this vector. The
+    /// given function is provided the lane index and lane value as a `u64`.
+    ///
+    /// If `f` returns `Some`, then iteration over the lanes is stopped and the
+    /// value is returned. Otherwise, this returns `None`.
+    ///
+    /// # Notes
+    ///
+    /// Conceptually it would be nice if we could have a
+    /// `unpack64(self) -> [u64; BITS / 64]` method, but defining that is
+    /// tricky given Rust's [current support for const generics][support].
+    /// And even if we could, it would be tricky to write generic code over
+    /// it. (Not impossible. We could introduce another layer that requires
+    /// `AsRef<[u64]>` or something.)
+    ///
+    /// [support]: https://github.com/rust-lang/rust/issues/60551
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn for_each_64bit_lane<T>(
+        self,
+        f: impl FnMut(usize, u64) -> Option<T>,
+    ) -> Option<T>;
+}
+
+/// This trait extends the `Vector` trait with additional operations to support
+/// Fat Teddy.
+///
+/// Fat Teddy uses 16 buckets instead of 8, but reads half as many bytes (as
+/// the vector size) instead of the full size of a vector per iteration. For
+/// example, when using a 256-bit vector, Slim Teddy reads 32 bytes at a timr
+/// but Fat Teddy reads 16 bytes at a time.
+///
+/// Fat Teddy is useful when searching for a large number of literals.
+/// The extra number of buckets spreads the literals out more and reduces
+/// verification time.
+///
+/// Currently we only implement this for AVX on x86_64. It would be nice to
+/// implement this for SSE on x86_64 and NEON on aarch64, with the latter two
+/// only reading 8 bytes at a time. It's not clear how well it would work, but
+/// there are some tricky things to figure out in terms of implementation. The
+/// `half_shift_in_{one,two,three}_bytes` methods in particular are probably
+/// the trickiest of the bunch. For AVX2, these are implemented by taking
+/// advantage of the fact that `_mm256_alignr_epi8` operates on each 128-bit
+/// half instead of the full 256-bit vector. (Where as `_mm_alignr_epi8`
+/// operates on the full 128-bit vector and not on each 64-bit half.) I didn't
+/// do a careful survey of NEON to see if it could easily support these
+/// operations.
+pub(crate) trait FatVector: Vector {
+    type Half: Vector;
+
+    /// Read a half-vector-size number of bytes from the given pointer, and
+    /// broadcast it across both halfs of a full vector. The pointer does not
+    /// need to be aligned.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    ///
+    /// Callers must guarantee that at least `Self::HALF::BYTES` bytes are
+    /// readable from `data`.
+    unsafe fn load_half_unaligned(data: *const u8) -> Self;
+
+    /// Like `Vector::shift_in_one_byte`, except this is done for each half
+    /// of the vector instead.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn half_shift_in_one_byte(self, vector2: Self) -> Self;
+
+    /// Like `Vector::shift_in_two_bytes`, except this is done for each half
+    /// of the vector instead.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn half_shift_in_two_bytes(self, vector2: Self) -> Self;
+
+    /// Like `Vector::shift_in_two_bytes`, except this is done for each half
+    /// of the vector instead.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn half_shift_in_three_bytes(self, vector2: Self) -> Self;
+
+    /// Swap the 128-bit lanes in this vector.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn swap_halves(self) -> Self;
+
+    /// Unpack and interleave the 8-bit lanes from the low 128 bits of each
+    /// vector and return the result.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn interleave_low_8bit_lanes(self, vector2: Self) -> Self;
+
+    /// Unpack and interleave the 8-bit lanes from the high 128 bits of each
+    /// vector and return the result.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn interleave_high_8bit_lanes(self, vector2: Self) -> Self;
+
+    /// Call the provided function for each 64-bit lane in the lower half
+    /// of this vector and then in the other vector. The given function is
+    /// provided the lane index and lane value as a `u64`. (The high 128-bits
+    /// of each vector are ignored.)
+    ///
+    /// If `f` returns `Some`, then iteration over the lanes is stopped and the
+    /// value is returned. Otherwise, this returns `None`.
+    ///
+    /// # Safety
+    ///
+    /// Callers must ensure that this is okay to call in the current target for
+    /// the current CPU.
+    unsafe fn for_each_low_64bit_lane<T>(
+        self,
+        vector2: Self,
+        f: impl FnMut(usize, u64) -> Option<T>,
+    ) -> Option<T>;
+}
+
+#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
+mod x86_64_ssse3 {
+    use core::arch::x86_64::*;
+
+    use crate::util::int::{I32, I64, I8};
+
+    use super::Vector;
+
+    impl Vector for __m128i {
+        const BITS: usize = 128;
+        const BYTES: usize = 16;
+
+        #[inline(always)]
+        unsafe fn splat(byte: u8) -> __m128i {
+            _mm_set1_epi8(i8::from_bits(byte))
+        }
+
+        #[inline(always)]
+        unsafe fn load_unaligned(data: *const u8) -> __m128i {
+            _mm_loadu_si128(data.cast::<__m128i>())
+        }
+
+        #[inline(always)]
+        unsafe fn is_zero(self) -> bool {
+            let cmp = self.cmpeq(Self::splat(0));
+            _mm_movemask_epi8(cmp).to_bits() == 0xFFFF
+        }
+
+        #[inline(always)]
+        unsafe fn cmpeq(self, vector2: Self) -> __m128i {
+            _mm_cmpeq_epi8(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn and(self, vector2: Self) -> __m128i {
+            _mm_and_si128(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn or(self, vector2: Self) -> __m128i {
+            _mm_or_si128(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_8bit_lane_right<const BITS: i32>(self) -> Self {
+            // Apparently there is no _mm_srli_epi8, so we emulate it by
+            // shifting 16-bit integers and masking out the high nybble of each
+            // 8-bit lane (since that nybble will contain bits from the low
+            // nybble of the previous lane).
+            let lomask = Self::splat(0xF);
+            _mm_srli_epi16(self, BITS).and(lomask)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_in_one_byte(self, vector2: Self) -> Self {
+            _mm_alignr_epi8(self, vector2, 15)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self {
+            _mm_alignr_epi8(self, vector2, 14)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self {
+            _mm_alignr_epi8(self, vector2, 13)
+        }
+
+        #[inline(always)]
+        unsafe fn shuffle_bytes(self, indices: Self) -> Self {
+            _mm_shuffle_epi8(self, indices)
+        }
+
+        #[inline(always)]
+        unsafe fn for_each_64bit_lane<T>(
+            self,
+            mut f: impl FnMut(usize, u64) -> Option<T>,
+        ) -> Option<T> {
+            let lane = _mm_extract_epi64(self, 0).to_bits();
+            if let Some(t) = f(0, lane) {
+                return Some(t);
+            }
+            let lane = _mm_extract_epi64(self, 1).to_bits();
+            if let Some(t) = f(1, lane) {
+                return Some(t);
+            }
+            None
+        }
+    }
+}
+
+#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
+mod x86_64_avx2 {
+    use core::arch::x86_64::*;
+
+    use crate::util::int::{I32, I64, I8};
+
+    use super::{FatVector, Vector};
+
+    impl Vector for __m256i {
+        const BITS: usize = 256;
+        const BYTES: usize = 32;
+
+        #[inline(always)]
+        unsafe fn splat(byte: u8) -> __m256i {
+            _mm256_set1_epi8(i8::from_bits(byte))
+        }
+
+        #[inline(always)]
+        unsafe fn load_unaligned(data: *const u8) -> __m256i {
+            _mm256_loadu_si256(data.cast::<__m256i>())
+        }
+
+        #[inline(always)]
+        unsafe fn is_zero(self) -> bool {
+            let cmp = self.cmpeq(Self::splat(0));
+            _mm256_movemask_epi8(cmp).to_bits() == 0xFFFFFFFF
+        }
+
+        #[inline(always)]
+        unsafe fn cmpeq(self, vector2: Self) -> __m256i {
+            _mm256_cmpeq_epi8(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn and(self, vector2: Self) -> __m256i {
+            _mm256_and_si256(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn or(self, vector2: Self) -> __m256i {
+            _mm256_or_si256(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_8bit_lane_right<const BITS: i32>(self) -> Self {
+            let lomask = Self::splat(0xF);
+            _mm256_srli_epi16(self, BITS).and(lomask)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_in_one_byte(self, vector2: Self) -> Self {
+            // Credit goes to jneem for figuring this out:
+            // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
+            //
+            // TL;DR avx2's PALIGNR instruction is actually just two 128-bit
+            // PALIGNR instructions, which is not what we want, so we need to
+            // do some extra shuffling.
+            let v = _mm256_permute2x128_si256(vector2, self, 0x21);
+            _mm256_alignr_epi8(self, v, 15)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self {
+            // Credit goes to jneem for figuring this out:
+            // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
+            //
+            // TL;DR avx2's PALIGNR instruction is actually just two 128-bit
+            // PALIGNR instructions, which is not what we want, so we need to
+            // do some extra shuffling.
+            let v = _mm256_permute2x128_si256(vector2, self, 0x21);
+            _mm256_alignr_epi8(self, v, 14)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self {
+            // Credit goes to jneem for figuring this out:
+            // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
+            //
+            // TL;DR avx2's PALIGNR instruction is actually just two 128-bit
+            // PALIGNR instructions, which is not what we want, so we need to
+            // do some extra shuffling.
+            let v = _mm256_permute2x128_si256(vector2, self, 0x21);
+            _mm256_alignr_epi8(self, v, 13)
+        }
+
+        #[inline(always)]
+        unsafe fn shuffle_bytes(self, indices: Self) -> Self {
+            _mm256_shuffle_epi8(self, indices)
+        }
+
+        #[inline(always)]
+        unsafe fn for_each_64bit_lane<T>(
+            self,
+            mut f: impl FnMut(usize, u64) -> Option<T>,
+        ) -> Option<T> {
+            // NOTE: At one point in the past, I used transmute to this to
+            // get a [u64; 4], but it turned out to lead to worse codegen IIRC.
+            // I've tried it more recently, and it looks like that's no longer
+            // the case. But since there's no difference, we stick with the
+            // slightly more complicated but transmute-free version.
+            let lane = _mm256_extract_epi64(self, 0).to_bits();
+            if let Some(t) = f(0, lane) {
+                return Some(t);
+            }
+            let lane = _mm256_extract_epi64(self, 1).to_bits();
+            if let Some(t) = f(1, lane) {
+                return Some(t);
+            }
+            let lane = _mm256_extract_epi64(self, 2).to_bits();
+            if let Some(t) = f(2, lane) {
+                return Some(t);
+            }
+            let lane = _mm256_extract_epi64(self, 3).to_bits();
+            if let Some(t) = f(3, lane) {
+                return Some(t);
+            }
+            None
+        }
+    }
+
+    impl FatVector for __m256i {
+        type Half = __m128i;
+
+        #[inline(always)]
+        unsafe fn load_half_unaligned(data: *const u8) -> Self {
+            let half = Self::Half::load_unaligned(data);
+            _mm256_broadcastsi128_si256(half)
+        }
+
+        #[inline(always)]
+        unsafe fn half_shift_in_one_byte(self, vector2: Self) -> Self {
+            _mm256_alignr_epi8(self, vector2, 15)
+        }
+
+        #[inline(always)]
+        unsafe fn half_shift_in_two_bytes(self, vector2: Self) -> Self {
+            _mm256_alignr_epi8(self, vector2, 14)
+        }
+
+        #[inline(always)]
+        unsafe fn half_shift_in_three_bytes(self, vector2: Self) -> Self {
+            _mm256_alignr_epi8(self, vector2, 13)
+        }
+
+        #[inline(always)]
+        unsafe fn swap_halves(self) -> Self {
+            _mm256_permute4x64_epi64(self, 0x4E)
+        }
+
+        #[inline(always)]
+        unsafe fn interleave_low_8bit_lanes(self, vector2: Self) -> Self {
+            _mm256_unpacklo_epi8(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn interleave_high_8bit_lanes(self, vector2: Self) -> Self {
+            _mm256_unpackhi_epi8(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn for_each_low_64bit_lane<T>(
+            self,
+            vector2: Self,
+            mut f: impl FnMut(usize, u64) -> Option<T>,
+        ) -> Option<T> {
+            let lane = _mm256_extract_epi64(self, 0).to_bits();
+            if let Some(t) = f(0, lane) {
+                return Some(t);
+            }
+            let lane = _mm256_extract_epi64(self, 1).to_bits();
+            if let Some(t) = f(1, lane) {
+                return Some(t);
+            }
+            let lane = _mm256_extract_epi64(vector2, 0).to_bits();
+            if let Some(t) = f(2, lane) {
+                return Some(t);
+            }
+            let lane = _mm256_extract_epi64(vector2, 1).to_bits();
+            if let Some(t) = f(3, lane) {
+                return Some(t);
+            }
+            None
+        }
+    }
+}
+
+#[cfg(target_arch = "aarch64")]
+mod aarch64_neon {
+    use core::arch::aarch64::*;
+
+    use super::Vector;
+
+    impl Vector for uint8x16_t {
+        const BITS: usize = 128;
+        const BYTES: usize = 16;
+
+        #[inline(always)]
+        unsafe fn splat(byte: u8) -> uint8x16_t {
+            vdupq_n_u8(byte)
+        }
+
+        #[inline(always)]
+        unsafe fn load_unaligned(data: *const u8) -> uint8x16_t {
+            vld1q_u8(data)
+        }
+
+        #[inline(always)]
+        unsafe fn is_zero(self) -> bool {
+            // Could also use vmaxvq_u8.
+            // ... I tried that and couldn't observe any meaningful difference
+            // in benchmarks.
+            let maxes = vreinterpretq_u64_u8(vpmaxq_u8(self, self));
+            vgetq_lane_u64(maxes, 0) == 0
+        }
+
+        #[inline(always)]
+        unsafe fn cmpeq(self, vector2: Self) -> uint8x16_t {
+            vceqq_u8(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn and(self, vector2: Self) -> uint8x16_t {
+            vandq_u8(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn or(self, vector2: Self) -> uint8x16_t {
+            vorrq_u8(self, vector2)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_8bit_lane_right<const BITS: i32>(self) -> Self {
+            debug_assert!(BITS <= 7);
+            vshrq_n_u8(self, BITS)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_in_one_byte(self, vector2: Self) -> Self {
+            vextq_u8(vector2, self, 15)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self {
+            vextq_u8(vector2, self, 14)
+        }
+
+        #[inline(always)]
+        unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self {
+            vextq_u8(vector2, self, 13)
+        }
+
+        #[inline(always)]
+        unsafe fn shuffle_bytes(self, indices: Self) -> Self {
+            vqtbl1q_u8(self, indices)
+        }
+
+        #[inline(always)]
+        unsafe fn for_each_64bit_lane<T>(
+            self,
+            mut f: impl FnMut(usize, u64) -> Option<T>,
+        ) -> Option<T> {
+            let this = vreinterpretq_u64_u8(self);
+            let lane = vgetq_lane_u64(this, 0);
+            if let Some(t) = f(0, lane) {
+                return Some(t);
+            }
+            let lane = vgetq_lane_u64(this, 1);
+            if let Some(t) = f(1, lane) {
+                return Some(t);
+            }
+            None
+        }
+    }
+}
+
+#[cfg(all(test, target_arch = "x86_64", target_feature = "sse2"))]
+mod tests_x86_64_ssse3 {
+    use core::arch::x86_64::*;
+
+    use crate::util::int::{I32, U32};
+
+    use super::*;
+
+    fn is_runnable() -> bool {
+        std::is_x86_feature_detected!("ssse3")
+    }
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn load(lanes: [u8; 16]) -> __m128i {
+        __m128i::load_unaligned(&lanes as *const u8)
+    }
+
+    #[target_feature(enable = "ssse3")]
+    unsafe fn unload(v: __m128i) -> [u8; 16] {
+        [
+            _mm_extract_epi8(v, 0).to_bits().low_u8(),
+            _mm_extract_epi8(v, 1).to_bits().low_u8(),
+            _mm_extract_epi8(v, 2).to_bits().low_u8(),
+            _mm_extract_epi8(v, 3).to_bits().low_u8(),
+            _mm_extract_epi8(v, 4).to_bits().low_u8(),
+            _mm_extract_epi8(v, 5).to_bits().low_u8(),
+            _mm_extract_epi8(v, 6).to_bits().low_u8(),
+            _mm_extract_epi8(v, 7).to_bits().low_u8(),
+            _mm_extract_epi8(v, 8).to_bits().low_u8(),
+            _mm_extract_epi8(v, 9).to_bits().low_u8(),
+            _mm_extract_epi8(v, 10).to_bits().low_u8(),
+            _mm_extract_epi8(v, 11).to_bits().low_u8(),
+            _mm_extract_epi8(v, 12).to_bits().low_u8(),
+            _mm_extract_epi8(v, 13).to_bits().low_u8(),
+            _mm_extract_epi8(v, 14).to_bits().low_u8(),
+            _mm_extract_epi8(v, 15).to_bits().low_u8(),
+        ]
+    }
+
+    #[test]
+    fn vector_splat() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v = __m128i::splat(0xAF);
+            assert_eq!(
+                unload(v),
+                [
+                    0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
+                    0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF
+                ]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_is_zero() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            assert!(!v.is_zero());
+            let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            assert!(v.is_zero());
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_cmpeq() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1]);
+            let v2 =
+                load([16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
+            assert_eq!(
+                unload(v1.cmpeq(v2)),
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_and() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v1 =
+                load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            let v2 =
+                load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            assert_eq!(
+                unload(v1.and(v2)),
+                [0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_or() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v1 =
+                load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            let v2 =
+                load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            assert_eq!(
+                unload(v1.or(v2)),
+                [0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_8bit_lane_right() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v = load([
+                0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ]);
+            assert_eq!(
+                unload(v.shift_8bit_lane_right::<2>()),
+                [0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_in_one_byte() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let v2 = load([
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            assert_eq!(
+                unload(v1.shift_in_one_byte(v2)),
+                [32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_in_two_bytes() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let v2 = load([
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            assert_eq!(
+                unload(v1.shift_in_two_bytes(v2)),
+                [31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_in_three_bytes() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let v2 = load([
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            assert_eq!(
+                unload(v1.shift_in_three_bytes(v2)),
+                [30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shuffle_bytes() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let v2 =
+                load([0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12]);
+            assert_eq!(
+                unload(v1.shuffle_bytes(v2)),
+                [1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_for_each_64bit_lane() {
+        #[target_feature(enable = "ssse3")]
+        unsafe fn test() {
+            let v = load([
+                0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+                0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10,
+            ]);
+            let mut lanes = [0u64; 2];
+            v.for_each_64bit_lane(|i, lane| {
+                lanes[i] = lane;
+                None::<()>
+            });
+            assert_eq!(lanes, [0x0807060504030201, 0x100F0E0D0C0B0A09],);
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+}
+
+#[cfg(all(test, target_arch = "x86_64", target_feature = "sse2"))]
+mod tests_x86_64_avx2 {
+    use core::arch::x86_64::*;
+
+    use crate::util::int::{I32, U32};
+
+    use super::*;
+
+    fn is_runnable() -> bool {
+        std::is_x86_feature_detected!("avx2")
+    }
+
+    #[target_feature(enable = "avx2")]
+    unsafe fn load(lanes: [u8; 32]) -> __m256i {
+        __m256i::load_unaligned(&lanes as *const u8)
+    }
+
+    #[target_feature(enable = "avx2")]
+    unsafe fn load_half(lanes: [u8; 16]) -> __m256i {
+        __m256i::load_half_unaligned(&lanes as *const u8)
+    }
+
+    #[target_feature(enable = "avx2")]
+    unsafe fn unload(v: __m256i) -> [u8; 32] {
+        [
+            _mm256_extract_epi8(v, 0).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 1).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 2).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 3).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 4).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 5).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 6).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 7).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 8).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 9).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 10).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 11).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 12).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 13).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 14).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 15).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 16).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 17).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 18).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 19).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 20).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 21).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 22).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 23).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 24).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 25).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 26).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 27).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 28).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 29).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 30).to_bits().low_u8(),
+            _mm256_extract_epi8(v, 31).to_bits().low_u8(),
+        ]
+    }
+
+    #[test]
+    fn vector_splat() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v = __m256i::splat(0xAF);
+            assert_eq!(
+                unload(v),
+                [
+                    0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
+                    0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
+                    0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
+                    0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
+                ]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_is_zero() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v = load([
+                0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ]);
+            assert!(!v.is_zero());
+            let v = load([
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ]);
+            assert!(v.is_zero());
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_cmpeq() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+                19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 1,
+            ]);
+            let v2 = load([
+                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
+                17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
+            ]);
+            assert_eq!(
+                unload(v1.cmpeq(v2)),
+                [
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF
+                ]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_and() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load([
+                0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ]);
+            let v2 = load([
+                0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ]);
+            assert_eq!(
+                unload(v1.and(v2)),
+                [
+                    0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                ]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_or() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load([
+                0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ]);
+            let v2 = load([
+                0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ]);
+            assert_eq!(
+                unload(v1.or(v2)),
+                [
+                    0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                ]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_8bit_lane_right() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v = load([
+                0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ]);
+            assert_eq!(
+                unload(v.shift_8bit_lane_right::<2>()),
+                [
+                    0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                ]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_in_one_byte() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+                19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            let v2 = load([
+                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+                63, 64,
+            ]);
+            assert_eq!(
+                unload(v1.shift_in_one_byte(v2)),
+                [
+                    64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                    31,
+                ],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_in_two_bytes() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+                19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            let v2 = load([
+                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+                63, 64,
+            ]);
+            assert_eq!(
+                unload(v1.shift_in_two_bytes(v2)),
+                [
+                    63, 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+                    30,
+                ],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_in_three_bytes() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+                19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            let v2 = load([
+                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+                63, 64,
+            ]);
+            assert_eq!(
+                unload(v1.shift_in_three_bytes(v2)),
+                [
+                    62, 63, 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+                    29,
+                ],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shuffle_bytes() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+                19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            let v2 = load([
+                0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16,
+                16, 16, 20, 20, 20, 20, 24, 24, 24, 24, 28, 28, 28, 28,
+            ]);
+            assert_eq!(
+                unload(v1.shuffle_bytes(v2)),
+                [
+                    1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13, 17,
+                    17, 17, 17, 21, 21, 21, 21, 25, 25, 25, 25, 29, 29, 29,
+                    29
+                ],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_for_each_64bit_lane() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v = load([
+                0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+                0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14,
+                0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E,
+                0x1F, 0x20,
+            ]);
+            let mut lanes = [0u64; 4];
+            v.for_each_64bit_lane(|i, lane| {
+                lanes[i] = lane;
+                None::<()>
+            });
+            assert_eq!(
+                lanes,
+                [
+                    0x0807060504030201,
+                    0x100F0E0D0C0B0A09,
+                    0x1817161514131211,
+                    0x201F1E1D1C1B1A19
+                ]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn fat_vector_half_shift_in_one_byte() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load_half([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+            ]);
+            let v2 = load_half([
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            assert_eq!(
+                unload(v1.half_shift_in_one_byte(v2)),
+                [
+                    32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32,
+                    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+                ],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn fat_vector_half_shift_in_two_bytes() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load_half([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+            ]);
+            let v2 = load_half([
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            assert_eq!(
+                unload(v1.half_shift_in_two_bytes(v2)),
+                [
+                    31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31,
+                    32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                ],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn fat_vector_half_shift_in_three_bytes() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load_half([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+            ]);
+            let v2 = load_half([
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            assert_eq!(
+                unload(v1.half_shift_in_three_bytes(v2)),
+                [
+                    30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 30,
+                    31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
+                ],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn fat_vector_swap_halves() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v = load([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+                19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            assert_eq!(
+                unload(v.swap_halves()),
+                [
+                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                    31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+                    16,
+                ],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn fat_vector_interleave_low_8bit_lanes() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+                19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            let v2 = load([
+                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+                63, 64,
+            ]);
+            assert_eq!(
+                unload(v1.interleave_low_8bit_lanes(v2)),
+                [
+                    1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 8, 40,
+                    17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55,
+                    24, 56,
+                ],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn fat_vector_interleave_high_8bit_lanes() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load([
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+                19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            let v2 = load([
+                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+                63, 64,
+            ]);
+            assert_eq!(
+                unload(v1.interleave_high_8bit_lanes(v2)),
+                [
+                    9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 16,
+                    48, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31,
+                    63, 32, 64,
+                ],
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn fat_vector_for_each_low_64bit_lane() {
+        #[target_feature(enable = "avx2")]
+        unsafe fn test() {
+            let v1 = load([
+                0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+                0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14,
+                0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E,
+                0x1F, 0x20,
+            ]);
+            let v2 = load([
+                0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A,
+                0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34,
+                0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E,
+                0x3F, 0x40,
+            ]);
+            let mut lanes = [0u64; 4];
+            v1.for_each_low_64bit_lane(v2, |i, lane| {
+                lanes[i] = lane;
+                None::<()>
+            });
+            assert_eq!(
+                lanes,
+                [
+                    0x0807060504030201,
+                    0x100F0E0D0C0B0A09,
+                    0x2827262524232221,
+                    0x302F2E2D2C2B2A29
+                ]
+            );
+        }
+        if !is_runnable() {
+            return;
+        }
+        unsafe { test() }
+    }
+}
+
+#[cfg(all(test, target_arch = "aarch64", target_feature = "neon"))]
+mod tests_aarch64_neon {
+    use core::arch::aarch64::*;
+
+    use super::*;
+
+    #[target_feature(enable = "neon")]
+    unsafe fn load(lanes: [u8; 16]) -> uint8x16_t {
+        uint8x16_t::load_unaligned(&lanes as *const u8)
+    }
+
+    #[target_feature(enable = "neon")]
+    unsafe fn unload(v: uint8x16_t) -> [u8; 16] {
+        [
+            vgetq_lane_u8(v, 0),
+            vgetq_lane_u8(v, 1),
+            vgetq_lane_u8(v, 2),
+            vgetq_lane_u8(v, 3),
+            vgetq_lane_u8(v, 4),
+            vgetq_lane_u8(v, 5),
+            vgetq_lane_u8(v, 6),
+            vgetq_lane_u8(v, 7),
+            vgetq_lane_u8(v, 8),
+            vgetq_lane_u8(v, 9),
+            vgetq_lane_u8(v, 10),
+            vgetq_lane_u8(v, 11),
+            vgetq_lane_u8(v, 12),
+            vgetq_lane_u8(v, 13),
+            vgetq_lane_u8(v, 14),
+            vgetq_lane_u8(v, 15),
+        ]
+    }
+
+    // Example functions. These don't test the Vector traits, but rather,
+    // specific NEON instructions. They are basically little experiments I
+    // wrote to figure out what an instruction does since their descriptions
+    // are so dense. I decided to keep the experiments around as example tests
+    // in case there' useful.
+
+    #[test]
+    fn example_vmaxvq_u8_non_zero() {
+        #[target_feature(enable = "neon")]
+        unsafe fn example() {
+            let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            assert_eq!(vmaxvq_u8(v), 1);
+        }
+        unsafe { example() }
+    }
+
+    #[test]
+    fn example_vmaxvq_u8_zero() {
+        #[target_feature(enable = "neon")]
+        unsafe fn example() {
+            let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            assert_eq!(vmaxvq_u8(v), 0);
+        }
+        unsafe { example() }
+    }
+
+    #[test]
+    fn example_vpmaxq_u8_non_zero() {
+        #[target_feature(enable = "neon")]
+        unsafe fn example() {
+            let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            let r = vpmaxq_u8(v, v);
+            assert_eq!(
+                unload(r),
+                [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
+            );
+        }
+        unsafe { example() }
+    }
+
+    #[test]
+    fn example_vpmaxq_u8_self() {
+        #[target_feature(enable = "neon")]
+        unsafe fn example() {
+            let v =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let r = vpmaxq_u8(v, v);
+            assert_eq!(
+                unload(r),
+                [2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16]
+            );
+        }
+        unsafe { example() }
+    }
+
+    #[test]
+    fn example_vpmaxq_u8_other() {
+        #[target_feature(enable = "neon")]
+        unsafe fn example() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let v2 = load([
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            let r = vpmaxq_u8(v1, v2);
+            assert_eq!(
+                unload(r),
+                [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]
+            );
+        }
+        unsafe { example() }
+    }
+
+    // Now we test the actual methods on the Vector trait.
+
+    #[test]
+    fn vector_splat() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v = uint8x16_t::splat(0xAF);
+            assert_eq!(
+                unload(v),
+                [
+                    0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF,
+                    0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF
+                ]
+            );
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_is_zero() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            assert!(!v.is_zero());
+            let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            assert!(v.is_zero());
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_cmpeq() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1]);
+            let v2 =
+                load([16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
+            assert_eq!(
+                unload(v1.cmpeq(v2)),
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF]
+            );
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_and() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v1 =
+                load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            let v2 =
+                load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            assert_eq!(
+                unload(v1.and(v2)),
+                [0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+            );
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_or() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v1 =
+                load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            let v2 =
+                load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
+            assert_eq!(
+                unload(v1.or(v2)),
+                [0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+            );
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_8bit_lane_right() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v = load([
+                0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ]);
+            assert_eq!(
+                unload(v.shift_8bit_lane_right::<2>()),
+                [0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+            );
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_in_one_byte() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let v2 = load([
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            assert_eq!(
+                unload(v1.shift_in_one_byte(v2)),
+                [32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
+            );
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_in_two_bytes() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let v2 = load([
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            assert_eq!(
+                unload(v1.shift_in_two_bytes(v2)),
+                [31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+            );
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shift_in_three_bytes() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let v2 = load([
+                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+            ]);
+            assert_eq!(
+                unload(v1.shift_in_three_bytes(v2)),
+                [30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
+            );
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_shuffle_bytes() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v1 =
+                load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let v2 =
+                load([0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12]);
+            assert_eq!(
+                unload(v1.shuffle_bytes(v2)),
+                [1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13],
+            );
+        }
+        unsafe { test() }
+    }
+
+    #[test]
+    fn vector_for_each_64bit_lane() {
+        #[target_feature(enable = "neon")]
+        unsafe fn test() {
+            let v = load([
+                0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A,
+                0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10,
+            ]);
+            let mut lanes = [0u64; 2];
+            v.for_each_64bit_lane(|i, lane| {
+                lanes[i] = lane;
+                None::<()>
+            });
+            assert_eq!(lanes, [0x0807060504030201, 0x100F0E0D0C0B0A09],);
+        }
+        unsafe { test() }
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/tests.rs b/third_party/rust/aho-corasick/src/tests.rs
new file mode 100644
index 0000000000..a5276f85f6
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/tests.rs
@@ -0,0 +1,1664 @@
+use std::{collections::HashMap, format, string::String, vec::Vec};
+
+use crate::{
+    AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, Anchored, Input, Match,
+    MatchKind, StartKind,
+};
+
+/// A description of a single test against an Aho-Corasick automaton.
+///
+/// A single test may not necessarily pass on every configuration of an
+/// Aho-Corasick automaton. The tests are categorized and grouped appropriately
+/// below.
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct SearchTest {
+    /// The name of this test, for debugging.
+    name: &'static str,
+    /// The patterns to search for.
+    patterns: &'static [&'static str],
+    /// The text to search.
+    haystack: &'static str,
+    /// Each match is a triple of (pattern_index, start, end), where
+    /// pattern_index is an index into `patterns` and `start`/`end` are indices
+    /// into `haystack`.
+    matches: &'static [(usize, usize, usize)],
+}
+
+/// Short-hand constructor for SearchTest. We use it a lot below.
+macro_rules! t {
+    ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
+        SearchTest {
+            name: stringify!($name),
+            patterns: $patterns,
+            haystack: $haystack,
+            matches: $matches,
+        }
+    };
+}
+
+/// A collection of test groups.
+type TestCollection = &'static [&'static [SearchTest]];
+
+// Define several collections corresponding to the different type of match
+// semantics supported by Aho-Corasick. These collections have some overlap,
+// but each collection should have some tests that no other collection has.
+
+/// Tests for Aho-Corasick's standard non-overlapping match semantics.
+const AC_STANDARD_NON_OVERLAPPING: TestCollection =
+    &[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION];
+
+/// Tests for Aho-Corasick's anchored standard non-overlapping match semantics.
+const AC_STANDARD_ANCHORED_NON_OVERLAPPING: TestCollection =
+    &[ANCHORED_BASICS, ANCHORED_NON_OVERLAPPING, STANDARD_ANCHORED];
+
+/// Tests for Aho-Corasick's standard overlapping match semantics.
+const AC_STANDARD_OVERLAPPING: TestCollection =
+    &[BASICS, OVERLAPPING, REGRESSION];
+
+/*
+Iterators of anchored overlapping searches were removed from the API in
+after 0.7, but we leave the tests commented out for posterity.
+/// Tests for Aho-Corasick's anchored standard overlapping match semantics.
+const AC_STANDARD_ANCHORED_OVERLAPPING: TestCollection =
+    &[ANCHORED_BASICS, ANCHORED_OVERLAPPING];
+*/
+
+/// Tests for Aho-Corasick's leftmost-first match semantics.
+const AC_LEFTMOST_FIRST: TestCollection =
+    &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION];
+
+/// Tests for Aho-Corasick's anchored leftmost-first match semantics.
+const AC_LEFTMOST_FIRST_ANCHORED: TestCollection = &[
+    ANCHORED_BASICS,
+    ANCHORED_NON_OVERLAPPING,
+    ANCHORED_LEFTMOST,
+    ANCHORED_LEFTMOST_FIRST,
+];
+
+/// Tests for Aho-Corasick's leftmost-longest match semantics.
+const AC_LEFTMOST_LONGEST: TestCollection =
+    &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION];
+
+/// Tests for Aho-Corasick's anchored leftmost-longest match semantics.
+const AC_LEFTMOST_LONGEST_ANCHORED: TestCollection = &[
+    ANCHORED_BASICS,
+    ANCHORED_NON_OVERLAPPING,
+    ANCHORED_LEFTMOST,
+    ANCHORED_LEFTMOST_LONGEST,
+];
+
+// Now define the individual tests that make up the collections above.
+
+/// A collection of tests for the Aho-Corasick algorithm that should always be
+/// true regardless of match semantics. That is, all combinations of
+/// leftmost-{shortest, first, longest} x {overlapping, non-overlapping}
+/// should produce the same answer.
+const BASICS: &'static [SearchTest] = &[
+    t!(basic000, &[], "", &[]),
+    t!(basic001, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(basic002, &["a"], "", &[]),
+    t!(basic010, &["a"], "a", &[(0, 0, 1)]),
+    t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
+    t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
+    t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
+    t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
+    t!(basic060, &["a"], "bbb", &[]),
+    t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
+    t!(basic100, &["aa"], "", &[]),
+    t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
+    t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
+    t!(basic130, &["aa"], "abbab", &[]),
+    t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
+    t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
+    t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
+    t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
+    t!(basic300, &["a", "b"], "", &[]),
+    t!(basic310, &["a", "b"], "z", &[]),
+    t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
+    t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
+    t!(
+        basic340,
+        &["a", "b"],
+        "abba",
+        &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
+    ),
+    t!(
+        basic350,
+        &["b", "a"],
+        "abba",
+        &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
+    ),
+    t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
+    t!(basic400, &["foo", "bar"], "", &[]),
+    t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
+    t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
+    t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
+    t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
+    t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
+    t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
+    t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
+    t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
+    t!(basic600, &[""], "", &[(0, 0, 0)]),
+    t!(basic610, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(basic620, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
+    t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
+    t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
+    t!(
+        basic720,
+        &["yabcdef", "bcdeyabc", "abcdezghi"],
+        "yabcdezghi",
+        &[(2, 1, 10),]
+    ),
+];
+
+/// A collection of *anchored* tests for the Aho-Corasick algorithm that should
+/// always be true regardless of match semantics. That is, all combinations of
+/// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} should
+/// produce the same answer.
+const ANCHORED_BASICS: &'static [SearchTest] = &[
+    t!(abasic000, &[], "", &[]),
+    t!(abasic001, &[], "a", &[]),
+    t!(abasic002, &[], "abc", &[]),
+    t!(abasic010, &[""], "", &[(0, 0, 0)]),
+    t!(abasic020, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(abasic030, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
+    t!(abasic100, &["a"], "a", &[(0, 0, 1)]),
+    t!(abasic110, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
+    t!(abasic120, &["a", "b"], "ab", &[(0, 0, 1), (1, 1, 2)]),
+    t!(abasic130, &["a", "b"], "ba", &[(1, 0, 1), (0, 1, 2)]),
+    t!(abasic140, &["foo", "foofoo"], "foo", &[(0, 0, 3)]),
+    t!(abasic150, &["foofoo", "foo"], "foo", &[(1, 0, 3)]),
+    t!(abasic200, &["foo"], "foofoo foo", &[(0, 0, 3), (0, 3, 6)]),
+];
+
+/// Tests for non-overlapping standard match semantics.
+///
+/// These tests generally shouldn't pass for leftmost-{first,longest}, although
+/// some do in order to write clearer tests. For example, standard000 will
+/// pass with leftmost-first semantics, but standard010 will not. We write
+/// both to emphasize how the match semantics work.
+const STANDARD: &'static [SearchTest] = &[
+    t!(standard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
+    t!(standard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
+    t!(standard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
+    t!(standard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
+    t!(standard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
+    t!(
+        standard400,
+        &["abcd", "bcd", "cd", "b"],
+        "abcd",
+        &[(3, 1, 2), (2, 2, 4),]
+    ),
+    t!(standard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
+    t!(standard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]),
+    t!(standard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
+    t!(standard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1),]),
+    t!(standard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
+];
+
+/// Like STANDARD, but for anchored searches.
+const STANDARD_ANCHORED: &'static [SearchTest] = &[
+    t!(astandard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
+    t!(astandard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
+    t!(astandard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
+    t!(astandard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
+    t!(astandard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
+    t!(astandard050, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
+    t!(astandard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(astandard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2)]),
+    t!(astandard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(astandard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
+    t!(astandard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
+];
+
+/// Tests for non-overlapping leftmost match semantics. These should pass for
+/// both leftmost-first and leftmost-longest match kinds. Stated differently,
+/// among ambiguous matches, the longest match and the match that appeared
+/// first when constructing the automaton should always be the same.
+const LEFTMOST: &'static [SearchTest] = &[
+    t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
+    t!(leftmost010, &["a", ""], "a", &[(0, 0, 1)]),
+    t!(leftmost011, &["a", ""], "ab", &[(0, 0, 1), (1, 2, 2)]),
+    t!(leftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
+    t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
+    t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
+    t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
+    t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
+    t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
+    t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
+    t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
+    t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
+    t!(
+        leftmost360,
+        &["abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(2, 0, 8),]
+    ),
+    t!(
+        leftmost370,
+        &["abcdefghi", "cde", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(
+        leftmost380,
+        &["abcdefghi", "hz", "abcdefgh", "a"],
+        "abcdefghz",
+        &[(2, 0, 8),]
+    ),
+    t!(
+        leftmost390,
+        &["b", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(
+        leftmost400,
+        &["h", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(
+        leftmost410,
+        &["z", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8), (0, 8, 9),]
+    ),
+];
+
+/// Like LEFTMOST, but for anchored searches.
+const ANCHORED_LEFTMOST: &'static [SearchTest] = &[
+    t!(aleftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
+    // We shouldn't allow an empty match immediately following a match, right?
+    t!(aleftmost010, &["a", ""], "a", &[(0, 0, 1)]),
+    t!(aleftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(aleftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
+    t!(aleftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
+    t!(aleftmost032, &["ab", "a"], "xayabbbz", &[]),
+    t!(aleftmost300, &["abcd", "bce", "b"], "abce", &[]),
+    t!(aleftmost301, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
+    t!(aleftmost310, &["abcd", "ce", "bc"], "abce", &[]),
+    t!(aleftmost320, &["abcd", "bce", "ce", "b"], "abce", &[]),
+    t!(aleftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[]),
+    t!(aleftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
+    t!(aleftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
+    t!(
+        aleftmost360,
+        &["abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(2, 0, 8),]
+    ),
+    t!(
+        aleftmost370,
+        &["abcdefghi", "cde", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(
+        aleftmost380,
+        &["abcdefghi", "hz", "abcdefgh", "a"],
+        "abcdefghz",
+        &[(2, 0, 8),]
+    ),
+    t!(
+        aleftmost390,
+        &["b", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(
+        aleftmost400,
+        &["h", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(
+        aleftmost410,
+        &["z", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghzyz",
+        &[(3, 0, 8), (0, 8, 9)]
+    ),
+];
+
+/// Tests for non-overlapping leftmost-first match semantics. These tests
+/// should generally be specific to leftmost-first, which means they should
+/// generally fail under leftmost-longest semantics.
+const LEFTMOST_FIRST: &'static [SearchTest] = &[
+    t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
+    t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(leftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
+    t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1)]),
+    t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(leftfirst014, &["a", ""], "a", &[(0, 0, 1)]),
+    t!(leftfirst015, &["a", ""], "ab", &[(0, 0, 1), (1, 2, 2)]),
+    t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
+    t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
+    t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
+    t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
+    t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
+    t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
+    t!(
+        leftfirst310,
+        &["abcd", "b", "bce", "ce"],
+        "abce",
+        &[(1, 1, 2), (3, 2, 4),]
+    ),
+    t!(
+        leftfirst320,
+        &["a", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(0, 0, 1), (2, 7, 9),]
+    ),
+    t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
+    t!(leftfirst400, &["amwix", "samwise", "sam"], "Zsamwix", &[(2, 1, 4)]),
+];
+
+/// Like LEFTMOST_FIRST, but for anchored searches.
+const ANCHORED_LEFTMOST_FIRST: &'static [SearchTest] = &[
+    t!(aleftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
+    t!(aleftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(aleftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(aleftfirst012, &["a", "", ""], "a", &[(0, 0, 1)]),
+    t!(aleftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
+    t!(aleftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
+    t!(aleftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
+    t!(aleftfirst040, &["a", "ab"], "xayabbbz", &[]),
+    t!(aleftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]),
+    t!(aleftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]),
+    t!(aleftfirst300, &["abcd", "b", "bce"], "abce", &[]),
+    t!(aleftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[]),
+    t!(
+        aleftfirst320,
+        &["a", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(0, 0, 1)]
+    ),
+    t!(aleftfirst330, &["a", "abab"], "abab", &[(0, 0, 1)]),
+    t!(aleftfirst400, &["wise", "samwise", "sam"], "samwix", &[(2, 0, 3)]),
+];
+
+/// Tests for non-overlapping leftmost-longest match semantics. These tests
+/// should generally be specific to leftmost-longest, which means they should
+/// generally fail under leftmost-first semantics.
+const LEFTMOST_LONGEST: &'static [SearchTest] = &[
+    t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
+    t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
+    t!(leftlong020, &["", "a"], "a", &[(1, 0, 1)]),
+    t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1)]),
+    t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1)]),
+    t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1)]),
+    t!(leftlong024, &["", "a"], "ab", &[(1, 0, 1), (0, 2, 2)]),
+    t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
+    t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
+    t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
+    t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
+    t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
+    t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
+    t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
+    t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
+    t!(
+        leftlong310,
+        &["a", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
+    t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
+    t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
+];
+
+/// Like LEFTMOST_LONGEST, but for anchored searches.
+const ANCHORED_LEFTMOST_LONGEST: &'static [SearchTest] = &[
+    t!(aleftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
+    t!(aleftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
+    t!(aleftlong020, &["", "a"], "a", &[(1, 0, 1)]),
+    t!(aleftlong021, &["", "a", ""], "a", &[(1, 0, 1)]),
+    t!(aleftlong022, &["a", "", ""], "a", &[(0, 0, 1)]),
+    t!(aleftlong023, &["", "", "a"], "a", &[(2, 0, 1)]),
+    t!(aleftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
+    t!(aleftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
+    t!(aleftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
+    t!(aleftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
+    t!(aleftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
+    t!(aleftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]),
+    t!(aleftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]),
+    t!(aleftlong300, &["abcd", "b", "bce"], "abce", &[]),
+    t!(
+        aleftlong310,
+        &["a", "abcdefghi", "hz", "abcdefgh"],
+        "abcdefghz",
+        &[(3, 0, 8),]
+    ),
+    t!(aleftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
+    t!(aleftlong330, &["abcd", "b", "ce"], "abce", &[]),
+    t!(aleftlong340, &["a", "ab"], "xayabbbz", &[]),
+];
+
+/// Tests for non-overlapping match semantics.
+///
+/// Generally these tests shouldn't pass when using overlapping semantics.
+/// These should pass for both standard and leftmost match semantics.
+const NON_OVERLAPPING: &'static [SearchTest] = &[
+    t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
+    t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
+    t!(nover030, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
+    t!(
+        nover100,
+        &["ab", "ba"],
+        "abababa",
+        &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
+    ),
+    t!(nover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
+    t!(nover300, &["", ""], "", &[(0, 0, 0),]),
+    t!(nover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
+];
+
+/// Like NON_OVERLAPPING, but for anchored searches.
+const ANCHORED_NON_OVERLAPPING: &'static [SearchTest] = &[
+    t!(anover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
+    t!(anover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
+    t!(anover030, &["abc", "bc"], "zazabcz", &[]),
+    t!(
+        anover100,
+        &["ab", "ba"],
+        "abababa",
+        &[(0, 0, 2), (0, 2, 4), (0, 4, 6)]
+    ),
+    t!(anover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3)]),
+    t!(anover300, &["", ""], "", &[(0, 0, 0)]),
+    t!(anover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
+];
+
+/// Tests for overlapping match semantics.
+///
+/// This only supports standard match semantics, since leftmost-{first,longest}
+/// do not support overlapping matches.
+const OVERLAPPING: &'static [SearchTest] = &[
+    t!(
+        over000,
+        &["abcd", "bcd", "cd", "b"],
+        "abcd",
+        &[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),]
+    ),
+    t!(
+        over010,
+        &["bcd", "cd", "b", "abcd"],
+        "abcd",
+        &[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),]
+    ),
+    t!(
+        over020,
+        &["abcd", "bcd", "cd"],
+        "abcd",
+        &[(0, 0, 4), (1, 1, 4), (2, 2, 4),]
+    ),
+    t!(
+        over030,
+        &["bcd", "abcd", "cd"],
+        "abcd",
+        &[(1, 0, 4), (0, 1, 4), (2, 2, 4),]
+    ),
+    t!(
+        over040,
+        &["bcd", "cd", "abcd"],
+        "abcd",
+        &[(2, 0, 4), (0, 1, 4), (1, 2, 4),]
+    ),
+    t!(over050, &["abc", "bc"], "zazabcz", &[(0, 3, 6), (1, 4, 6),]),
+    t!(
+        over100,
+        &["ab", "ba"],
+        "abababa",
+        &[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),]
+    ),
+    t!(
+        over200,
+        &["foo", "foo"],
+        "foobarfoo",
+        &[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),]
+    ),
+    t!(over300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
+    t!(
+        over310,
+        &["", ""],
+        "a",
+        &[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),]
+    ),
+    t!(over320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]),
+    t!(
+        over330,
+        &["", "a", ""],
+        "a",
+        &[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),]
+    ),
+    t!(
+        over340,
+        &["a", "", ""],
+        "a",
+        &[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),]
+    ),
+    t!(
+        over350,
+        &["", "", "a"],
+        "a",
+        &[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),]
+    ),
+    t!(
+        over360,
+        &["foo", "foofoo"],
+        "foofoo",
+        &[(0, 0, 3), (1, 0, 6), (0, 3, 6)]
+    ),
+];
+
+/*
+Iterators of anchored overlapping searches were removed from the API in
+after 0.7, but we leave the tests commented out for posterity.
+/// Like OVERLAPPING, but for anchored searches.
+const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[
+    t!(aover000, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
+    t!(aover010, &["bcd", "cd", "b", "abcd"], "abcd", &[(3, 0, 4)]),
+    t!(aover020, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4)]),
+    t!(aover030, &["bcd", "abcd", "cd"], "abcd", &[(1, 0, 4)]),
+    t!(aover040, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4)]),
+    t!(aover050, &["abc", "bc"], "zazabcz", &[]),
+    t!(aover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]),
+    t!(aover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (1, 0, 3)]),
+    t!(aover300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
+    t!(aover310, &["", ""], "a", &[(0, 0, 0), (1, 0, 0)]),
+    t!(aover320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1)]),
+    t!(aover330, &["", "a", ""], "a", &[(0, 0, 0), (2, 0, 0), (1, 0, 1)]),
+    t!(aover340, &["a", "", ""], "a", &[(1, 0, 0), (2, 0, 0), (0, 0, 1)]),
+    t!(aover350, &["", "", "a"], "a", &[(0, 0, 0), (1, 0, 0), (2, 0, 1)]),
+    t!(aover360, &["foo", "foofoo"], "foofoo", &[(0, 0, 3), (1, 0, 6)]),
+];
+*/
+
+/// Tests for ASCII case insensitivity.
+///
+/// These tests should all have the same behavior regardless of match semantics
+/// or whether the search is overlapping.
+const ASCII_CASE_INSENSITIVE: &'static [SearchTest] = &[
+    t!(acasei000, &["a"], "A", &[(0, 0, 1)]),
+    t!(acasei010, &["Samwise"], "SAMWISE", &[(0, 0, 7)]),
+    t!(acasei011, &["Samwise"], "SAMWISE.abcd", &[(0, 0, 7)]),
+    t!(acasei020, &["fOoBaR"], "quux foobar baz", &[(0, 5, 11)]),
+];
+
+/// Like ASCII_CASE_INSENSITIVE, but specifically for non-overlapping tests.
+const ASCII_CASE_INSENSITIVE_NON_OVERLAPPING: &'static [SearchTest] = &[
+    t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3)]),
+    t!(acasei000, &["FOO", "foo"], "fOo", &[(0, 0, 3)]),
+    t!(acasei010, &["abc", "def"], "abcdef", &[(0, 0, 3), (1, 3, 6)]),
+];
+
+/// Like ASCII_CASE_INSENSITIVE, but specifically for overlapping tests.
+const ASCII_CASE_INSENSITIVE_OVERLAPPING: &'static [SearchTest] = &[
+    t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3), (1, 0, 3)]),
+    t!(acasei001, &["FOO", "foo"], "fOo", &[(0, 0, 3), (1, 0, 3)]),
+    // This is a regression test from:
+    // https://github.com/BurntSushi/aho-corasick/issues/68
+    // Previously, it was reporting a duplicate (1, 3, 6) match.
+    t!(
+        acasei010,
+        &["abc", "def", "abcdef"],
+        "abcdef",
+        &[(0, 0, 3), (2, 0, 6), (1, 3, 6)]
+    ),
+];
+
+/// Regression tests that are applied to all Aho-Corasick combinations.
+///
+/// If regression tests are needed for specific match semantics, then add them
+/// to the appropriate group above.
+const REGRESSION: &'static [SearchTest] = &[
+    t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
+    t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
+    t!(
+        regression030,
+        &["libcore/", "libstd/"],
+        "libcore/char/methods.rs",
+        &[(0, 0, 8),]
+    ),
+    t!(
+        regression040,
+        &["libstd/", "libcore/"],
+        "libcore/char/methods.rs",
+        &[(1, 0, 8),]
+    ),
+    t!(
+        regression050,
+        &["\x00\x00\x01", "\x00\x00\x00"],
+        "\x00\x00\x00",
+        &[(1, 0, 3),]
+    ),
+    t!(
+        regression060,
+        &["\x00\x00\x00", "\x00\x00\x01"],
+        "\x00\x00\x00",
+        &[(0, 0, 3),]
+    ),
+];
+
+// Now define a test for each combination of things above that we want to run.
+// Since there are a few different combinations for each collection of tests,
+// we define a couple of macros to avoid repetition drudgery. The testconfig
+// macro constructs the automaton from a given match kind, and runs the search
+// tests one-by-one over the given collection. The `with` parameter allows one
+// to configure the builder with additional parameters. The testcombo macro
+// invokes testconfig in precisely this way: it sets up several tests where
+// each one turns a different knob on AhoCorasickBuilder.
+
+macro_rules! testconfig {
+    (anchored, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
+        #[test]
+        fn $name() {
+            run_search_tests($collection, |test| {
+                let mut builder = AhoCorasick::builder();
+                $with(&mut builder);
+                let input = Input::new(test.haystack).anchored(Anchored::Yes);
+                builder
+                    .match_kind(MatchKind::$kind)
+                    .build(test.patterns)
+                    .unwrap()
+                    .try_find_iter(input)
+                    .unwrap()
+                    .collect()
+            });
+        }
+    };
+    (overlapping, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
+        #[test]
+        fn $name() {
+            run_search_tests($collection, |test| {
+                let mut builder = AhoCorasick::builder();
+                $with(&mut builder);
+                builder
+                    .match_kind(MatchKind::$kind)
+                    .build(test.patterns)
+                    .unwrap()
+                    .find_overlapping_iter(test.haystack)
+                    .collect()
+            });
+        }
+    };
+    (stream, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
+        #[test]
+        fn $name() {
+            run_stream_search_tests($collection, |test| {
+                let buf = std::io::BufReader::with_capacity(
+                    1,
+                    test.haystack.as_bytes(),
+                );
+                let mut builder = AhoCorasick::builder();
+                $with(&mut builder);
+                builder
+                    .match_kind(MatchKind::$kind)
+                    .build(test.patterns)
+                    .unwrap()
+                    .stream_find_iter(buf)
+                    .map(|result| result.unwrap())
+                    .collect()
+            });
+        }
+    };
+    ($name:ident, $collection:expr, $kind:ident, $with:expr) => {
+        #[test]
+        fn $name() {
+            run_search_tests($collection, |test| {
+                let mut builder = AhoCorasick::builder();
+                $with(&mut builder);
+                builder
+                    .match_kind(MatchKind::$kind)
+                    .build(test.patterns)
+                    .unwrap()
+                    .find_iter(test.haystack)
+                    .collect()
+            });
+        }
+    };
+}
+
+macro_rules! testcombo {
+    ($name:ident, $collection:expr, $kind:ident) => {
+        mod $name {
+            use super::*;
+
+            testconfig!(default, $collection, $kind, |_| ());
+            testconfig!(
+                nfa_default,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::NoncontiguousNFA));
+                }
+            );
+            testconfig!(
+                nfa_noncontig_no_prefilter,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
+                        .prefilter(false);
+                }
+            );
+            testconfig!(
+                nfa_noncontig_all_sparse,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
+                        .dense_depth(0);
+                }
+            );
+            testconfig!(
+                nfa_noncontig_all_dense,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
+                        .dense_depth(usize::MAX);
+                }
+            );
+            testconfig!(
+                nfa_contig_default,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::ContiguousNFA));
+                }
+            );
+            testconfig!(
+                nfa_contig_no_prefilter,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::ContiguousNFA))
+                        .prefilter(false);
+                }
+            );
+            testconfig!(
+                nfa_contig_all_sparse,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::ContiguousNFA))
+                        .dense_depth(0);
+                }
+            );
+            testconfig!(
+                nfa_contig_all_dense,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::ContiguousNFA))
+                        .dense_depth(usize::MAX);
+                }
+            );
+            testconfig!(
+                nfa_contig_no_byte_class,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::ContiguousNFA))
+                        .byte_classes(false);
+                }
+            );
+            testconfig!(
+                dfa_default,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::DFA));
+                }
+            );
+            testconfig!(
+                dfa_start_both,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::DFA))
+                        .start_kind(StartKind::Both);
+                }
+            );
+            testconfig!(
+                dfa_no_prefilter,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::DFA)).prefilter(false);
+                }
+            );
+            testconfig!(
+                dfa_start_both_no_prefilter,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::DFA))
+                        .start_kind(StartKind::Both)
+                        .prefilter(false);
+                }
+            );
+            testconfig!(
+                dfa_no_byte_class,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::DFA)).byte_classes(false);
+                }
+            );
+            testconfig!(
+                dfa_start_both_no_byte_class,
+                $collection,
+                $kind,
+                |b: &mut AhoCorasickBuilder| {
+                    b.kind(Some(AhoCorasickKind::DFA))
+                        .start_kind(StartKind::Both)
+                        .byte_classes(false);
+                }
+            );
+        }
+    };
+}
+
+// Write out the various combinations of match semantics given the variety of
+// configurations tested by 'testcombo!'.
+testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest);
+testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst);
+testcombo!(
+    search_standard_nonoverlapping,
+    AC_STANDARD_NON_OVERLAPPING,
+    Standard
+);
+
+// Write out the overlapping combo by hand since there is only one of them.
+testconfig!(
+    overlapping,
+    search_standard_overlapping_default,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |_| ()
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_nfa_noncontig_default,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::NoncontiguousNFA));
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_nfa_noncontig_no_prefilter,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::NoncontiguousNFA)).prefilter(false);
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_nfa_contig_default,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::ContiguousNFA));
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_nfa_contig_no_prefilter,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::ContiguousNFA)).prefilter(false);
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_nfa_contig_all_sparse,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::ContiguousNFA)).dense_depth(0);
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_nfa_contig_all_dense,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::ContiguousNFA)).dense_depth(usize::MAX);
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_dfa_default,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA));
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_dfa_start_both,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA)).start_kind(StartKind::Both);
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_dfa_no_prefilter,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA)).prefilter(false);
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_dfa_start_both_no_prefilter,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA))
+            .start_kind(StartKind::Both)
+            .prefilter(false);
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_dfa_no_byte_class,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA)).byte_classes(false);
+    }
+);
+testconfig!(
+    overlapping,
+    search_standard_overlapping_dfa_start_both_no_byte_class,
+    AC_STANDARD_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA))
+            .start_kind(StartKind::Both)
+            .byte_classes(false);
+    }
+);
+
+// Also write out tests manually for streams, since we only test the standard
+// match semantics. We also don't bother testing different automaton
+// configurations, since those are well covered by tests above.
+#[cfg(feature = "std")]
+testconfig!(
+    stream,
+    search_standard_stream_default,
+    AC_STANDARD_NON_OVERLAPPING,
+    Standard,
+    |_| ()
+);
+#[cfg(feature = "std")]
+testconfig!(
+    stream,
+    search_standard_stream_nfa_noncontig_default,
+    AC_STANDARD_NON_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::NoncontiguousNFA));
+    }
+);
+#[cfg(feature = "std")]
+testconfig!(
+    stream,
+    search_standard_stream_nfa_contig_default,
+    AC_STANDARD_NON_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::ContiguousNFA));
+    }
+);
+#[cfg(feature = "std")]
+testconfig!(
+    stream,
+    search_standard_stream_dfa_default,
+    AC_STANDARD_NON_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA));
+    }
+);
+
+// Same thing for anchored searches. Write them out manually.
+testconfig!(
+    anchored,
+    search_standard_anchored_default,
+    AC_STANDARD_ANCHORED_NON_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored);
+    }
+);
+testconfig!(
+    anchored,
+    search_standard_anchored_nfa_noncontig_default,
+    AC_STANDARD_ANCHORED_NON_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored)
+            .kind(Some(AhoCorasickKind::NoncontiguousNFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_standard_anchored_nfa_contig_default,
+    AC_STANDARD_ANCHORED_NON_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored)
+            .kind(Some(AhoCorasickKind::ContiguousNFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_standard_anchored_dfa_default,
+    AC_STANDARD_ANCHORED_NON_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_standard_anchored_dfa_start_both,
+    AC_STANDARD_ANCHORED_NON_OVERLAPPING,
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_leftmost_first_anchored_default,
+    AC_LEFTMOST_FIRST_ANCHORED,
+    LeftmostFirst,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored);
+    }
+);
+testconfig!(
+    anchored,
+    search_leftmost_first_anchored_nfa_noncontig_default,
+    AC_LEFTMOST_FIRST_ANCHORED,
+    LeftmostFirst,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored)
+            .kind(Some(AhoCorasickKind::NoncontiguousNFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_leftmost_first_anchored_nfa_contig_default,
+    AC_LEFTMOST_FIRST_ANCHORED,
+    LeftmostFirst,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored)
+            .kind(Some(AhoCorasickKind::ContiguousNFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_leftmost_first_anchored_dfa_default,
+    AC_LEFTMOST_FIRST_ANCHORED,
+    LeftmostFirst,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_leftmost_first_anchored_dfa_start_both,
+    AC_LEFTMOST_FIRST_ANCHORED,
+    LeftmostFirst,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_leftmost_longest_anchored_default,
+    AC_LEFTMOST_LONGEST_ANCHORED,
+    LeftmostLongest,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored);
+    }
+);
+testconfig!(
+    anchored,
+    search_leftmost_longest_anchored_nfa_noncontig_default,
+    AC_LEFTMOST_LONGEST_ANCHORED,
+    LeftmostLongest,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored)
+            .kind(Some(AhoCorasickKind::NoncontiguousNFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_leftmost_longest_anchored_nfa_contig_default,
+    AC_LEFTMOST_LONGEST_ANCHORED,
+    LeftmostLongest,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored)
+            .kind(Some(AhoCorasickKind::ContiguousNFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_leftmost_longest_anchored_dfa_default,
+    AC_LEFTMOST_LONGEST_ANCHORED,
+    LeftmostLongest,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA));
+    }
+);
+testconfig!(
+    anchored,
+    search_leftmost_longest_anchored_dfa_start_both,
+    AC_LEFTMOST_LONGEST_ANCHORED,
+    LeftmostLongest,
+    |b: &mut AhoCorasickBuilder| {
+        b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA));
+    }
+);
+
+// And also write out the test combinations for ASCII case insensitivity.
+testconfig!(
+    acasei_standard_default,
+    &[ASCII_CASE_INSENSITIVE],
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.prefilter(false).ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_standard_nfa_noncontig_default,
+    &[ASCII_CASE_INSENSITIVE],
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
+            .prefilter(false)
+            .ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_standard_nfa_contig_default,
+    &[ASCII_CASE_INSENSITIVE],
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::ContiguousNFA))
+            .prefilter(false)
+            .ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_standard_dfa_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    overlapping,
+    acasei_standard_overlapping_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    overlapping,
+    acasei_standard_overlapping_nfa_noncontig_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
+            .ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    overlapping,
+    acasei_standard_overlapping_nfa_contig_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::ContiguousNFA))
+            .ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    overlapping,
+    acasei_standard_overlapping_dfa_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
+    Standard,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_leftmost_first_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
+    LeftmostFirst,
+    |b: &mut AhoCorasickBuilder| {
+        b.ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_leftmost_first_nfa_noncontig_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
+    LeftmostFirst,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
+            .ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_leftmost_first_nfa_contig_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
+    LeftmostFirst,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::ContiguousNFA))
+            .ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_leftmost_first_dfa_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
+    LeftmostFirst,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_leftmost_longest_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
+    LeftmostLongest,
+    |b: &mut AhoCorasickBuilder| {
+        b.ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_leftmost_longest_nfa_noncontig_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
+    LeftmostLongest,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
+            .ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_leftmost_longest_nfa_contig_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
+    LeftmostLongest,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::ContiguousNFA))
+            .ascii_case_insensitive(true);
+    }
+);
+testconfig!(
+    acasei_leftmost_longest_dfa_default,
+    &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
+    LeftmostLongest,
+    |b: &mut AhoCorasickBuilder| {
+        b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true);
+    }
+);
+
+fn run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>(
+    which: TestCollection,
+    mut f: F,
+) {
+    let get_match_triples =
+        |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
+            matches
+                .into_iter()
+                .map(|m| (m.pattern().as_usize(), m.start(), m.end()))
+                .collect()
+        };
+    for &tests in which {
+        for test in tests {
+            assert_eq!(
+                test.matches,
+                get_match_triples(f(&test)).as_slice(),
+                "test: {}, patterns: {:?}, haystack: {:?}",
+                test.name,
+                test.patterns,
+                test.haystack
+            );
+        }
+    }
+}
+
+// Like 'run_search_tests', but we skip any tests that contain the empty
+// pattern because stream searching doesn't support it.
+#[cfg(feature = "std")]
+fn run_stream_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>(
+    which: TestCollection,
+    mut f: F,
+) {
+    let get_match_triples =
+        |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
+            matches
+                .into_iter()
+                .map(|m| (m.pattern().as_usize(), m.start(), m.end()))
+                .collect()
+        };
+    for &tests in which {
+        for test in tests {
+            if test.patterns.iter().any(|p| p.is_empty()) {
+                continue;
+            }
+            assert_eq!(
+                test.matches,
+                get_match_triples(f(&test)).as_slice(),
+                "test: {}, patterns: {:?}, haystack: {:?}",
+                test.name,
+                test.patterns,
+                test.haystack
+            );
+        }
+    }
+}
+
+#[test]
+fn search_tests_have_unique_names() {
+    let assert = |constname, tests: &[SearchTest]| {
+        let mut seen = HashMap::new(); // map from test name to position
+        for (i, test) in tests.iter().enumerate() {
+            if !seen.contains_key(test.name) {
+                seen.insert(test.name, i);
+            } else {
+                let last = seen[test.name];
+                panic!(
+                    "{} tests have duplicate names at positions {} and {}",
+                    constname, last, i
+                );
+            }
+        }
+    };
+    assert("BASICS", BASICS);
+    assert("STANDARD", STANDARD);
+    assert("LEFTMOST", LEFTMOST);
+    assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
+    assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
+    assert("NON_OVERLAPPING", NON_OVERLAPPING);
+    assert("OVERLAPPING", OVERLAPPING);
+    assert("REGRESSION", REGRESSION);
+}
+
+#[cfg(feature = "std")]
+#[test]
+#[should_panic]
+fn stream_not_allowed_leftmost_first() {
+    let fsm = AhoCorasick::builder()
+        .match_kind(MatchKind::LeftmostFirst)
+        .build(None::<String>)
+        .unwrap();
+    assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0);
+}
+
+#[cfg(feature = "std")]
+#[test]
+#[should_panic]
+fn stream_not_allowed_leftmost_longest() {
+    let fsm = AhoCorasick::builder()
+        .match_kind(MatchKind::LeftmostLongest)
+        .build(None::<String>)
+        .unwrap();
+    assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0);
+}
+
+#[test]
+#[should_panic]
+fn overlapping_not_allowed_leftmost_first() {
+    let fsm = AhoCorasick::builder()
+        .match_kind(MatchKind::LeftmostFirst)
+        .build(None::<String>)
+        .unwrap();
+    assert_eq!(fsm.find_overlapping_iter("").count(), 0);
+}
+
+#[test]
+#[should_panic]
+fn overlapping_not_allowed_leftmost_longest() {
+    let fsm = AhoCorasick::builder()
+        .match_kind(MatchKind::LeftmostLongest)
+        .build(None::<String>)
+        .unwrap();
+    assert_eq!(fsm.find_overlapping_iter("").count(), 0);
+}
+
+// This tests that if we build an AC matcher with an "unanchored" start kind,
+// then we can't run an anchored search even if the underlying searcher
+// supports it.
+//
+// The key bit here is that both of the NFAs in this crate unconditionally
+// support both unanchored and anchored searches, but the DFA does not because
+// of the added cost of doing so. To avoid the top-level AC matcher sometimes
+// supporting anchored and sometimes not (depending on which searcher it
+// chooses to use internally), we ensure that the given 'StartKind' is always
+// respected.
+#[test]
+fn anchored_not_allowed_even_if_technically_available() {
+    let ac = AhoCorasick::builder()
+        .kind(Some(AhoCorasickKind::NoncontiguousNFA))
+        .start_kind(StartKind::Unanchored)
+        .build(&["foo"])
+        .unwrap();
+    assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err());
+
+    let ac = AhoCorasick::builder()
+        .kind(Some(AhoCorasickKind::ContiguousNFA))
+        .start_kind(StartKind::Unanchored)
+        .build(&["foo"])
+        .unwrap();
+    assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err());
+
+    // For completeness, check that the DFA returns an error too.
+    let ac = AhoCorasick::builder()
+        .kind(Some(AhoCorasickKind::DFA))
+        .start_kind(StartKind::Unanchored)
+        .build(&["foo"])
+        .unwrap();
+    assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err());
+}
+
+// This is like the test aboved, but with unanchored and anchored flipped. That
+// is, we asked for an AC searcher with anchored support and we check that
+// unanchored searches return an error even if the underlying searcher would
+// technically support it.
+#[test]
+fn unanchored_not_allowed_even_if_technically_available() {
+    let ac = AhoCorasick::builder()
+        .kind(Some(AhoCorasickKind::NoncontiguousNFA))
+        .start_kind(StartKind::Anchored)
+        .build(&["foo"])
+        .unwrap();
+    assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err());
+
+    let ac = AhoCorasick::builder()
+        .kind(Some(AhoCorasickKind::ContiguousNFA))
+        .start_kind(StartKind::Anchored)
+        .build(&["foo"])
+        .unwrap();
+    assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err());
+
+    // For completeness, check that the DFA returns an error too.
+    let ac = AhoCorasick::builder()
+        .kind(Some(AhoCorasickKind::DFA))
+        .start_kind(StartKind::Anchored)
+        .build(&["foo"])
+        .unwrap();
+    assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err());
+}
+
+// This tests that a prefilter does not cause a search to report a match
+// outside the bounds provided by the caller.
+//
+// This is a regression test for a bug I introduced during the rewrite of most
+// of the crate after 0.7. It was never released. The tricky part here is
+// ensuring we get a prefilter that can report matches on its own (such as the
+// packed searcher). Otherwise, prefilters that report false positives might
+// have searched past the bounds provided by the caller, but confirming the
+// match would subsequently fail.
+#[test]
+fn prefilter_stays_in_bounds() {
+    let ac = AhoCorasick::builder()
+        .match_kind(MatchKind::LeftmostFirst)
+        .build(&["sam", "frodo", "pippin", "merry", "gandalf", "sauron"])
+        .unwrap();
+    let haystack = "foo gandalf";
+    assert_eq!(None, ac.find(Input::new(haystack).range(0..10)));
+}
+
+// See: https://github.com/BurntSushi/aho-corasick/issues/44
+//
+// In short, this test ensures that enabling ASCII case insensitivity does not
+// visit an exponential number of states when filling in failure transitions.
+#[test]
+fn regression_ascii_case_insensitive_no_exponential() {
+    let ac = AhoCorasick::builder()
+        .ascii_case_insensitive(true)
+        .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹"])
+        .unwrap();
+    assert!(ac.find("").is_none());
+}
+
+// See: https://github.com/BurntSushi/aho-corasick/issues/53
+//
+// This test ensures that the rare byte prefilter works in a particular corner
+// case. In particular, the shift offset detected for '/' in the patterns below
+// was incorrect, leading to a false negative.
+#[test]
+fn regression_rare_byte_prefilter() {
+    use crate::AhoCorasick;
+
+    let ac = AhoCorasick::new(&["ab/j/", "x/"]).unwrap();
+    assert!(ac.is_match("ab/j/"));
+}
+
+#[test]
+fn regression_case_insensitive_prefilter() {
+    for c in b'a'..b'z' {
+        for c2 in b'a'..b'z' {
+            let c = c as char;
+            let c2 = c2 as char;
+            let needle = format!("{}{}", c, c2).to_lowercase();
+            let haystack = needle.to_uppercase();
+            let ac = AhoCorasick::builder()
+                .ascii_case_insensitive(true)
+                .prefilter(true)
+                .build(&[&needle])
+                .unwrap();
+            assert_eq!(
+                1,
+                ac.find_iter(&haystack).count(),
+                "failed to find {:?} in {:?}\n\nautomaton:\n{:?}",
+                needle,
+                haystack,
+                ac,
+            );
+        }
+    }
+}
+
+// See: https://github.com/BurntSushi/aho-corasick/issues/64
+//
+// This occurs when the rare byte prefilter is active.
+#[cfg(feature = "std")]
+#[test]
+fn regression_stream_rare_byte_prefilter() {
+    use std::io::Read;
+
+    // NOTE: The test only fails if this ends with j.
+    const MAGIC: [u8; 5] = *b"1234j";
+
+    // NOTE: The test fails for value in 8188..=8191 These value put the string
+    // to search accross two call to read because the buffer size is 64KB by
+    // default.
+    const BEGIN: usize = 65_535;
+
+    /// This is just a structure that implements Reader. The reader
+    /// implementation will simulate a file filled with 0, except for the MAGIC
+    /// string at offset BEGIN.
+    #[derive(Default)]
+    struct R {
+        read: usize,
+    }
+
+    impl Read for R {
+        fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+            if self.read > 100000 {
+                return Ok(0);
+            }
+            let mut from = 0;
+            if self.read < BEGIN {
+                from = buf.len().min(BEGIN - self.read);
+                for x in 0..from {
+                    buf[x] = 0;
+                }
+                self.read += from;
+            }
+            if self.read >= BEGIN && self.read <= BEGIN + MAGIC.len() {
+                let to = buf.len().min(BEGIN + MAGIC.len() - self.read + from);
+                if to > from {
+                    buf[from..to].copy_from_slice(
+                        &MAGIC
+                            [self.read - BEGIN..self.read - BEGIN + to - from],
+                    );
+                    self.read += to - from;
+                    from = to;
+                }
+            }
+            for x in from..buf.len() {
+                buf[x] = 0;
+                self.read += 1;
+            }
+            Ok(buf.len())
+        }
+    }
+
+    fn run() -> std::io::Result<()> {
+        let aut = AhoCorasick::builder()
+            // Enable byte classes to make debugging the automaton easier. It
+            // should have no effect on the test result.
+            .byte_classes(false)
+            .build(&[&MAGIC])
+            .unwrap();
+
+        // While reading from a vector, it works:
+        let mut buf = alloc::vec![];
+        R::default().read_to_end(&mut buf)?;
+        let from_whole = aut.find_iter(&buf).next().unwrap().start();
+
+        // But using stream_find_iter fails!
+        let mut file = std::io::BufReader::new(R::default());
+        let begin = aut
+            .stream_find_iter(&mut file)
+            .next()
+            .expect("NOT FOUND!!!!")? // Panic here
+            .start();
+        assert_eq!(from_whole, begin);
+        Ok(())
+    }
+
+    run().unwrap()
+}
diff --git a/third_party/rust/aho-corasick/src/transducer.rs b/third_party/rust/aho-corasick/src/transducer.rs
new file mode 100644
index 0000000000..39bb240f44
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/transducer.rs
@@ -0,0 +1,270 @@
+/*!
+Provides implementations of `fst::Automaton` for Aho-Corasick automata.
+
+This works by providing two wrapper types, [`Anchored`] and [`Unanchored`].
+The former executes an anchored search on an FST while the latter executes
+an unanchored search. Building these wrappers is fallible and will fail if
+the underlying Aho-Corasick automaton does not support the type of search it
+represents.
+*/
+
+use crate::{
+    automaton::{Automaton, StateID},
+    Anchored as AcAnchored, Input, MatchError,
+};
+
+/// Represents an unanchored Aho-Corasick search of a finite state transducer.
+///
+/// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the
+/// underlying automaton does not support unanchored searches.
+///
+/// # Example
+///
+/// This shows how to build an FST of keys and then run an unanchored search on
+/// those keys using an Aho-Corasick automaton.
+///
+/// ```
+/// use aho_corasick::{nfa::contiguous::NFA, transducer::Unanchored};
+/// use fst::{Automaton, IntoStreamer, Set, Streamer};
+///
+/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap();
+/// let nfa = NFA::new(&["bcd", "x"]).unwrap();
+/// // NFAs always support both unanchored and anchored searches.
+/// let searcher = Unanchored::new(&nfa).unwrap();
+///
+/// let mut stream = set.search(searcher).into_stream();
+/// let mut results = vec![];
+/// while let Some(key) = stream.next() {
+///     results.push(std::str::from_utf8(key).unwrap().to_string());
+/// }
+/// assert_eq!(vec!["abcd", "bcd", "xyz"], results);
+/// ```
+#[derive(Clone, Debug)]
+pub struct Unanchored<A>(A);
+
+impl<A: Automaton> Unanchored<A> {
+    /// Create a new `Unanchored` implementation of the `fst::Automaton` trait.
+    ///
+    /// If the given Aho-Corasick automaton does not support unanchored
+    /// searches, then this returns an error.
+    pub fn new(aut: A) -> Result<Unanchored<A>, MatchError> {
+        let input = Input::new("").anchored(AcAnchored::No);
+        let _ = aut.start_state(&input)?;
+        Ok(Unanchored(aut))
+    }
+
+    /// Returns a borrow to the underlying automaton.
+    pub fn as_ref(&self) -> &A {
+        &self.0
+    }
+
+    /// Unwrap this value and return the inner automaton.
+    pub fn into_inner(self) -> A {
+        self.0
+    }
+}
+
+impl<A: Automaton> fst::Automaton for Unanchored<A> {
+    type State = StateID;
+
+    #[inline]
+    fn start(&self) -> StateID {
+        let input = Input::new("").anchored(AcAnchored::No);
+        self.0.start_state(&input).expect("support for unanchored searches")
+    }
+
+    #[inline]
+    fn is_match(&self, state: &StateID) -> bool {
+        self.0.is_match(*state)
+    }
+
+    #[inline]
+    fn accept(&self, state: &StateID, byte: u8) -> StateID {
+        if fst::Automaton::is_match(self, state) {
+            return *state;
+        }
+        self.0.next_state(AcAnchored::No, *state, byte)
+    }
+
+    #[inline]
+    fn can_match(&self, state: &StateID) -> bool {
+        !self.0.is_dead(*state)
+    }
+}
+
+/// Represents an anchored Aho-Corasick search of a finite state transducer.
+///
+/// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the
+/// underlying automaton does not support unanchored searches.
+///
+/// # Example
+///
+/// This shows how to build an FST of keys and then run an anchored search on
+/// those keys using an Aho-Corasick automaton.
+///
+/// ```
+/// use aho_corasick::{nfa::contiguous::NFA, transducer::Anchored};
+/// use fst::{Automaton, IntoStreamer, Set, Streamer};
+///
+/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap();
+/// let nfa = NFA::new(&["bcd", "x"]).unwrap();
+/// // NFAs always support both unanchored and anchored searches.
+/// let searcher = Anchored::new(&nfa).unwrap();
+///
+/// let mut stream = set.search(searcher).into_stream();
+/// let mut results = vec![];
+/// while let Some(key) = stream.next() {
+///     results.push(std::str::from_utf8(key).unwrap().to_string());
+/// }
+/// assert_eq!(vec!["bcd", "xyz"], results);
+/// ```
+///
+/// This is like the example above, except we use an Aho-Corasick DFA, which
+/// requires explicitly configuring it to support anchored searches. (NFAs
+/// unconditionally support both unanchored and anchored searches.)
+///
+/// ```
+/// use aho_corasick::{dfa::DFA, transducer::Anchored, StartKind};
+/// use fst::{Automaton, IntoStreamer, Set, Streamer};
+///
+/// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap();
+/// let dfa = DFA::builder()
+///     .start_kind(StartKind::Anchored)
+///     .build(&["bcd", "x"])
+///     .unwrap();
+/// // We've explicitly configured our DFA to support anchored searches.
+/// let searcher = Anchored::new(&dfa).unwrap();
+///
+/// let mut stream = set.search(searcher).into_stream();
+/// let mut results = vec![];
+/// while let Some(key) = stream.next() {
+///     results.push(std::str::from_utf8(key).unwrap().to_string());
+/// }
+/// assert_eq!(vec!["bcd", "xyz"], results);
+/// ```
+#[derive(Clone, Debug)]
+pub struct Anchored<A>(A);
+
+impl<A: Automaton> Anchored<A> {
+    /// Create a new `Anchored` implementation of the `fst::Automaton` trait.
+    ///
+    /// If the given Aho-Corasick automaton does not support anchored searches,
+    /// then this returns an error.
+    pub fn new(aut: A) -> Result<Anchored<A>, MatchError> {
+        let input = Input::new("").anchored(AcAnchored::Yes);
+        let _ = aut.start_state(&input)?;
+        Ok(Anchored(aut))
+    }
+
+    /// Returns a borrow to the underlying automaton.
+    pub fn as_ref(&self) -> &A {
+        &self.0
+    }
+
+    /// Unwrap this value and return the inner automaton.
+    pub fn into_inner(self) -> A {
+        self.0
+    }
+}
+
+impl<A: Automaton> fst::Automaton for Anchored<A> {
+    type State = StateID;
+
+    #[inline]
+    fn start(&self) -> StateID {
+        let input = Input::new("").anchored(AcAnchored::Yes);
+        self.0.start_state(&input).expect("support for unanchored searches")
+    }
+
+    #[inline]
+    fn is_match(&self, state: &StateID) -> bool {
+        self.0.is_match(*state)
+    }
+
+    #[inline]
+    fn accept(&self, state: &StateID, byte: u8) -> StateID {
+        if fst::Automaton::is_match(self, state) {
+            return *state;
+        }
+        self.0.next_state(AcAnchored::Yes, *state, byte)
+    }
+
+    #[inline]
+    fn can_match(&self, state: &StateID) -> bool {
+        !self.0.is_dead(*state)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use alloc::{string::String, vec, vec::Vec};
+
+    use fst::{Automaton, IntoStreamer, Set, Streamer};
+
+    use crate::{
+        dfa::DFA,
+        nfa::{contiguous, noncontiguous},
+        StartKind,
+    };
+
+    use super::*;
+
+    fn search<A: Automaton, D: AsRef<[u8]>>(
+        set: &Set<D>,
+        aut: A,
+    ) -> Vec<String> {
+        let mut stream = set.search(aut).into_stream();
+        let mut results = vec![];
+        while let Some(key) = stream.next() {
+            results.push(String::from(core::str::from_utf8(key).unwrap()));
+        }
+        results
+    }
+
+    #[test]
+    fn unanchored() {
+        let set =
+            Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
+                .unwrap();
+        let patterns = vec!["baz", "bax"];
+        let expected = vec!["baz", "xbax"];
+
+        let aut = Unanchored(noncontiguous::NFA::new(&patterns).unwrap());
+        let got = search(&set, &aut);
+        assert_eq!(got, expected);
+
+        let aut = Unanchored(contiguous::NFA::new(&patterns).unwrap());
+        let got = search(&set, &aut);
+        assert_eq!(got, expected);
+
+        let aut = Unanchored(DFA::new(&patterns).unwrap());
+        let got = search(&set, &aut);
+        assert_eq!(got, expected);
+    }
+
+    #[test]
+    fn anchored() {
+        let set =
+            Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
+                .unwrap();
+        let patterns = vec!["baz", "bax"];
+        let expected = vec!["baz"];
+
+        let aut = Anchored(noncontiguous::NFA::new(&patterns).unwrap());
+        let got = search(&set, &aut);
+        assert_eq!(got, expected);
+
+        let aut = Anchored(contiguous::NFA::new(&patterns).unwrap());
+        let got = search(&set, &aut);
+        assert_eq!(got, expected);
+
+        let aut = Anchored(
+            DFA::builder()
+                .start_kind(StartKind::Anchored)
+                .build(&patterns)
+                .unwrap(),
+        );
+        let got = search(&set, &aut);
+        assert_eq!(got, expected);
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/alphabet.rs b/third_party/rust/aho-corasick/src/util/alphabet.rs
new file mode 100644
index 0000000000..69724fa3ab
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/alphabet.rs
@@ -0,0 +1,409 @@
+use crate::util::int::Usize;
+
+/// A representation of byte oriented equivalence classes.
+///
+/// This is used in finite state machines to reduce the size of the transition
+/// table. This can have a particularly large impact not only on the total size
+/// of an FSM, but also on FSM build times because it reduces the number of
+/// transitions that need to be visited/set.
+#[derive(Clone, Copy)]
+pub(crate) struct ByteClasses([u8; 256]);
+
+impl ByteClasses {
+    /// Creates a new set of equivalence classes where all bytes are mapped to
+    /// the same class.
+    pub(crate) fn empty() -> ByteClasses {
+        ByteClasses([0; 256])
+    }
+
+    /// Creates a new set of equivalence classes where each byte belongs to
+    /// its own equivalence class.
+    pub(crate) fn singletons() -> ByteClasses {
+        let mut classes = ByteClasses::empty();
+        for b in 0..=255 {
+            classes.set(b, b);
+        }
+        classes
+    }
+
+    /// Set the equivalence class for the given byte.
+    #[inline]
+    pub(crate) fn set(&mut self, byte: u8, class: u8) {
+        self.0[usize::from(byte)] = class;
+    }
+
+    /// Get the equivalence class for the given byte.
+    #[inline]
+    pub(crate) fn get(&self, byte: u8) -> u8 {
+        self.0[usize::from(byte)]
+    }
+
+    /// Return the total number of elements in the alphabet represented by
+    /// these equivalence classes. Equivalently, this returns the total number
+    /// of equivalence classes.
+    #[inline]
+    pub(crate) fn alphabet_len(&self) -> usize {
+        // Add one since the number of equivalence classes is one bigger than
+        // the last one.
+        usize::from(self.0[255]) + 1
+    }
+
+    /// Returns the stride, as a base-2 exponent, required for these
+    /// equivalence classes.
+    ///
+    /// The stride is always the smallest power of 2 that is greater than or
+    /// equal to the alphabet length. This is done so that converting between
+    /// state IDs and indices can be done with shifts alone, which is much
+    /// faster than integer division. The "stride2" is the exponent. i.e.,
+    /// `2^stride2 = stride`.
+    pub(crate) fn stride2(&self) -> usize {
+        let zeros = self.alphabet_len().next_power_of_two().trailing_zeros();
+        usize::try_from(zeros).unwrap()
+    }
+
+    /// Returns the stride for these equivalence classes, which corresponds
+    /// to the smallest power of 2 greater than or equal to the number of
+    /// equivalence classes.
+    pub(crate) fn stride(&self) -> usize {
+        1 << self.stride2()
+    }
+
+    /// Returns true if and only if every byte in this class maps to its own
+    /// equivalence class. Equivalently, there are 257 equivalence classes
+    /// and each class contains exactly one byte (plus the special EOI class).
+    #[inline]
+    pub(crate) fn is_singleton(&self) -> bool {
+        self.alphabet_len() == 256
+    }
+
+    /// Returns an iterator over all equivalence classes in this set.
+    pub(crate) fn iter(&self) -> ByteClassIter {
+        ByteClassIter { it: 0..self.alphabet_len() }
+    }
+
+    /// Returns an iterator of the bytes in the given equivalence class.
+    pub(crate) fn elements(&self, class: u8) -> ByteClassElements {
+        ByteClassElements { classes: self, class, bytes: 0..=255 }
+    }
+
+    /// Returns an iterator of byte ranges in the given equivalence class.
+    ///
+    /// That is, a sequence of contiguous ranges are returned. Typically, every
+    /// class maps to a single contiguous range.
+    fn element_ranges(&self, class: u8) -> ByteClassElementRanges {
+        ByteClassElementRanges { elements: self.elements(class), range: None }
+    }
+}
+
+impl core::fmt::Debug for ByteClasses {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        if self.is_singleton() {
+            write!(f, "ByteClasses(<one-class-per-byte>)")
+        } else {
+            write!(f, "ByteClasses(")?;
+            for (i, class) in self.iter().enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{:?} => [", class)?;
+                for (start, end) in self.element_ranges(class) {
+                    if start == end {
+                        write!(f, "{:?}", start)?;
+                    } else {
+                        write!(f, "{:?}-{:?}", start, end)?;
+                    }
+                }
+                write!(f, "]")?;
+            }
+            write!(f, ")")
+        }
+    }
+}
+
+/// An iterator over each equivalence class.
+#[derive(Debug)]
+pub(crate) struct ByteClassIter {
+    it: core::ops::Range<usize>,
+}
+
+impl Iterator for ByteClassIter {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<u8> {
+        self.it.next().map(|class| class.as_u8())
+    }
+}
+
+/// An iterator over all elements in a specific equivalence class.
+#[derive(Debug)]
+pub(crate) struct ByteClassElements<'a> {
+    classes: &'a ByteClasses,
+    class: u8,
+    bytes: core::ops::RangeInclusive<u8>,
+}
+
+impl<'a> Iterator for ByteClassElements<'a> {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<u8> {
+        while let Some(byte) = self.bytes.next() {
+            if self.class == self.classes.get(byte) {
+                return Some(byte);
+            }
+        }
+        None
+    }
+}
+
+/// An iterator over all elements in an equivalence class expressed as a
+/// sequence of contiguous ranges.
+#[derive(Debug)]
+pub(crate) struct ByteClassElementRanges<'a> {
+    elements: ByteClassElements<'a>,
+    range: Option<(u8, u8)>,
+}
+
+impl<'a> Iterator for ByteClassElementRanges<'a> {
+    type Item = (u8, u8);
+
+    fn next(&mut self) -> Option<(u8, u8)> {
+        loop {
+            let element = match self.elements.next() {
+                None => return self.range.take(),
+                Some(element) => element,
+            };
+            match self.range.take() {
+                None => {
+                    self.range = Some((element, element));
+                }
+                Some((start, end)) => {
+                    if usize::from(end) + 1 != usize::from(element) {
+                        self.range = Some((element, element));
+                        return Some((start, end));
+                    }
+                    self.range = Some((start, element));
+                }
+            }
+        }
+    }
+}
+
+/// A partitioning of bytes into equivalence classes.
+///
+/// A byte class set keeps track of an *approximation* of equivalence classes
+/// of bytes during NFA construction. That is, every byte in an equivalence
+/// class cannot discriminate between a match and a non-match.
+///
+/// Note that this may not compute the minimal set of equivalence classes.
+/// Basically, any byte in a pattern given to the noncontiguous NFA builder
+/// will automatically be treated as its own equivalence class. All other
+/// bytes---any byte not in any pattern---will be treated as their own
+/// equivalence classes. In theory, all bytes not in any pattern should
+/// be part of a single equivalence class, but in practice, we only treat
+/// contiguous ranges of bytes as an equivalence class. So the number of
+/// classes computed may be bigger than necessary. This usually doesn't make
+/// much of a difference, and keeps the implementation simple.
+#[derive(Clone, Debug)]
+pub(crate) struct ByteClassSet(ByteSet);
+
+impl Default for ByteClassSet {
+    fn default() -> ByteClassSet {
+        ByteClassSet::empty()
+    }
+}
+
+impl ByteClassSet {
+    /// Create a new set of byte classes where all bytes are part of the same
+    /// equivalence class.
+    pub(crate) fn empty() -> Self {
+        ByteClassSet(ByteSet::empty())
+    }
+
+    /// Indicate the the range of byte given (inclusive) can discriminate a
+    /// match between it and all other bytes outside of the range.
+    pub(crate) fn set_range(&mut self, start: u8, end: u8) {
+        debug_assert!(start <= end);
+        if start > 0 {
+            self.0.add(start - 1);
+        }
+        self.0.add(end);
+    }
+
+    /// Convert this boolean set to a map that maps all byte values to their
+    /// corresponding equivalence class. The last mapping indicates the largest
+    /// equivalence class identifier (which is never bigger than 255).
+    pub(crate) fn byte_classes(&self) -> ByteClasses {
+        let mut classes = ByteClasses::empty();
+        let mut class = 0u8;
+        let mut b = 0u8;
+        loop {
+            classes.set(b, class);
+            if b == 255 {
+                break;
+            }
+            if self.0.contains(b) {
+                class = class.checked_add(1).unwrap();
+            }
+            b = b.checked_add(1).unwrap();
+        }
+        classes
+    }
+}
+
+/// A simple set of bytes that is reasonably cheap to copy and allocation free.
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
+pub(crate) struct ByteSet {
+    bits: BitSet,
+}
+
+/// The representation of a byte set. Split out so that we can define a
+/// convenient Debug impl for it while keeping "ByteSet" in the output.
+#[derive(Clone, Copy, Default, Eq, PartialEq)]
+struct BitSet([u128; 2]);
+
+impl ByteSet {
+    /// Create an empty set of bytes.
+    pub(crate) fn empty() -> ByteSet {
+        ByteSet { bits: BitSet([0; 2]) }
+    }
+
+    /// Add a byte to this set.
+    ///
+    /// If the given byte already belongs to this set, then this is a no-op.
+    pub(crate) fn add(&mut self, byte: u8) {
+        let bucket = byte / 128;
+        let bit = byte % 128;
+        self.bits.0[usize::from(bucket)] |= 1 << bit;
+    }
+
+    /// Return true if and only if the given byte is in this set.
+    pub(crate) fn contains(&self, byte: u8) -> bool {
+        let bucket = byte / 128;
+        let bit = byte % 128;
+        self.bits.0[usize::from(bucket)] & (1 << bit) > 0
+    }
+}
+
+impl core::fmt::Debug for BitSet {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let mut fmtd = f.debug_set();
+        for b in 0u8..=255 {
+            if (ByteSet { bits: *self }).contains(b) {
+                fmtd.entry(&b);
+            }
+        }
+        fmtd.finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use alloc::{vec, vec::Vec};
+
+    use super::*;
+
+    #[test]
+    fn byte_classes() {
+        let mut set = ByteClassSet::empty();
+        set.set_range(b'a', b'z');
+
+        let classes = set.byte_classes();
+        assert_eq!(classes.get(0), 0);
+        assert_eq!(classes.get(1), 0);
+        assert_eq!(classes.get(2), 0);
+        assert_eq!(classes.get(b'a' - 1), 0);
+        assert_eq!(classes.get(b'a'), 1);
+        assert_eq!(classes.get(b'm'), 1);
+        assert_eq!(classes.get(b'z'), 1);
+        assert_eq!(classes.get(b'z' + 1), 2);
+        assert_eq!(classes.get(254), 2);
+        assert_eq!(classes.get(255), 2);
+
+        let mut set = ByteClassSet::empty();
+        set.set_range(0, 2);
+        set.set_range(4, 6);
+        let classes = set.byte_classes();
+        assert_eq!(classes.get(0), 0);
+        assert_eq!(classes.get(1), 0);
+        assert_eq!(classes.get(2), 0);
+        assert_eq!(classes.get(3), 1);
+        assert_eq!(classes.get(4), 2);
+        assert_eq!(classes.get(5), 2);
+        assert_eq!(classes.get(6), 2);
+        assert_eq!(classes.get(7), 3);
+        assert_eq!(classes.get(255), 3);
+    }
+
+    #[test]
+    fn full_byte_classes() {
+        let mut set = ByteClassSet::empty();
+        for b in 0u8..=255 {
+            set.set_range(b, b);
+        }
+        assert_eq!(set.byte_classes().alphabet_len(), 256);
+    }
+
+    #[test]
+    fn elements_typical() {
+        let mut set = ByteClassSet::empty();
+        set.set_range(b'b', b'd');
+        set.set_range(b'g', b'm');
+        set.set_range(b'z', b'z');
+        let classes = set.byte_classes();
+        // class 0: \x00-a
+        // class 1: b-d
+        // class 2: e-f
+        // class 3: g-m
+        // class 4: n-y
+        // class 5: z-z
+        // class 6: \x7B-\xFF
+        assert_eq!(classes.alphabet_len(), 7);
+
+        let elements = classes.elements(0).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 98);
+        assert_eq!(elements[0], b'\x00');
+        assert_eq!(elements[97], b'a');
+
+        let elements = classes.elements(1).collect::<Vec<_>>();
+        assert_eq!(elements, vec![b'b', b'c', b'd'],);
+
+        let elements = classes.elements(2).collect::<Vec<_>>();
+        assert_eq!(elements, vec![b'e', b'f'],);
+
+        let elements = classes.elements(3).collect::<Vec<_>>();
+        assert_eq!(elements, vec![b'g', b'h', b'i', b'j', b'k', b'l', b'm',],);
+
+        let elements = classes.elements(4).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 12);
+        assert_eq!(elements[0], b'n');
+        assert_eq!(elements[11], b'y');
+
+        let elements = classes.elements(5).collect::<Vec<_>>();
+        assert_eq!(elements, vec![b'z']);
+
+        let elements = classes.elements(6).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 133);
+        assert_eq!(elements[0], b'\x7B');
+        assert_eq!(elements[132], b'\xFF');
+    }
+
+    #[test]
+    fn elements_singletons() {
+        let classes = ByteClasses::singletons();
+        assert_eq!(classes.alphabet_len(), 256);
+
+        let elements = classes.elements(b'a').collect::<Vec<_>>();
+        assert_eq!(elements, vec![b'a']);
+    }
+
+    #[test]
+    fn elements_empty() {
+        let classes = ByteClasses::empty();
+        assert_eq!(classes.alphabet_len(), 1);
+
+        let elements = classes.elements(0).collect::<Vec<_>>();
+        assert_eq!(elements.len(), 256);
+        assert_eq!(elements[0], b'\x00');
+        assert_eq!(elements[255], b'\xFF');
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/buffer.rs b/third_party/rust/aho-corasick/src/util/buffer.rs
new file mode 100644
index 0000000000..e9e982af58
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/buffer.rs
@@ -0,0 +1,124 @@
+use alloc::{vec, vec::Vec};
+
+/// The default buffer capacity that we use for the stream buffer.
+const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
+
+/// A fairly simple roll buffer for supporting stream searches.
+///
+/// This buffer acts as a temporary place to store a fixed amount of data when
+/// reading from a stream. Its central purpose is to allow "rolling" some
+/// suffix of the data to the beginning of the buffer before refilling it with
+/// more data from the stream. For example, let's say we are trying to match
+/// "foobar" on a stream. When we report the match, we'd like to not only
+/// report the correct offsets at which the match occurs, but also the matching
+/// bytes themselves. So let's say our stream is a file with the following
+/// contents: `test test foobar test test`. Now assume that we happen to read
+/// the aforementioned file in two chunks: `test test foo` and `bar test test`.
+/// Naively, it would not be possible to report a single contiguous `foobar`
+/// match, but this roll buffer allows us to do that. Namely, after the second
+/// read, the contents of the buffer should be `st foobar test test`, where the
+/// search should ultimately resume immediately after `foo`. (The prefix `st `
+/// is included because the roll buffer saves N bytes at the end of the buffer,
+/// where N is the maximum possible length of a match.)
+///
+/// A lot of the logic for dealing with this is unfortunately split out between
+/// this roll buffer and the `StreamChunkIter`.
+///
+/// Note also that this buffer is not actually required to just report matches.
+/// Because a `Match` is just some offsets. But it *is* required for supporting
+/// things like `try_stream_replace_all` because that needs some mechanism for
+/// knowing which bytes in the stream correspond to a match and which don't. So
+/// when a match occurs across two `read` calls, *something* needs to retain
+/// the bytes from the previous `read` call because you don't know before the
+/// second read call whether a match exists or not.
+#[derive(Debug)]
+pub(crate) struct Buffer {
+    /// The raw buffer contents. This has a fixed size and never increases.
+    buf: Vec<u8>,
+    /// The minimum size of the buffer, which is equivalent to the maximum
+    /// possible length of a match. This corresponds to the amount that we
+    /// roll
+    min: usize,
+    /// The end of the contents of this buffer.
+    end: usize,
+}
+
+impl Buffer {
+    /// Create a new buffer for stream searching. The minimum buffer length
+    /// given should be the size of the maximum possible match length.
+    pub(crate) fn new(min_buffer_len: usize) -> Buffer {
+        let min = core::cmp::max(1, min_buffer_len);
+        // The minimum buffer amount is also the amount that we roll our
+        // buffer in order to support incremental searching. To this end,
+        // our actual capacity needs to be at least 1 byte bigger than our
+        // minimum amount, otherwise we won't have any overlap. In actuality,
+        // we want our buffer to be a bit bigger than that for performance
+        // reasons, so we set a lower bound of `8 * min`.
+        //
+        // TODO: It would be good to find a way to test the streaming
+        // implementation with the minimal buffer size. For now, we just
+        // uncomment out the next line and comment out the subsequent line.
+        // let capacity = 1 + min;
+        let capacity = core::cmp::max(min * 8, DEFAULT_BUFFER_CAPACITY);
+        Buffer { buf: vec![0; capacity], min, end: 0 }
+    }
+
+    /// Return the contents of this buffer.
+    #[inline]
+    pub(crate) fn buffer(&self) -> &[u8] {
+        &self.buf[..self.end]
+    }
+
+    /// Return the minimum size of the buffer. The only way a buffer may be
+    /// smaller than this is if the stream itself contains less than the
+    /// minimum buffer amount.
+    #[inline]
+    pub(crate) fn min_buffer_len(&self) -> usize {
+        self.min
+    }
+
+    /// Return all free capacity in this buffer.
+    fn free_buffer(&mut self) -> &mut [u8] {
+        &mut self.buf[self.end..]
+    }
+
+    /// Refill the contents of this buffer by reading as much as possible into
+    /// this buffer's free capacity. If no more bytes could be read, then this
+    /// returns false. Otherwise, this reads until it has filled the buffer
+    /// past the minimum amount.
+    pub(crate) fn fill<R: std::io::Read>(
+        &mut self,
+        mut rdr: R,
+    ) -> std::io::Result<bool> {
+        let mut readany = false;
+        loop {
+            let readlen = rdr.read(self.free_buffer())?;
+            if readlen == 0 {
+                return Ok(readany);
+            }
+            readany = true;
+            self.end += readlen;
+            if self.buffer().len() >= self.min {
+                return Ok(true);
+            }
+        }
+    }
+
+    /// Roll the contents of the buffer so that the suffix of this buffer is
+    /// moved to the front and all other contents are dropped. The size of the
+    /// suffix corresponds precisely to the minimum buffer length.
+    ///
+    /// This should only be called when the entire contents of this buffer have
+    /// been searched.
+    pub(crate) fn roll(&mut self) {
+        let roll_start = self
+            .end
+            .checked_sub(self.min)
+            .expect("buffer capacity should be bigger than minimum amount");
+        let roll_end = roll_start + self.min;
+
+        assert!(roll_end <= self.end);
+        self.buf.copy_within(roll_start..roll_end, 0);
+        self.end = self.min;
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/byte_frequencies.rs b/third_party/rust/aho-corasick/src/util/byte_frequencies.rs
new file mode 100644
index 0000000000..c313b629db
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/byte_frequencies.rs
@@ -0,0 +1,258 @@
+pub const BYTE_FREQUENCIES: [u8; 256] = [
+    55,  // '\x00'
+    52,  // '\x01'
+    51,  // '\x02'
+    50,  // '\x03'
+    49,  // '\x04'
+    48,  // '\x05'
+    47,  // '\x06'
+    46,  // '\x07'
+    45,  // '\x08'
+    103, // '\t'
+    242, // '\n'
+    66,  // '\x0b'
+    67,  // '\x0c'
+    229, // '\r'
+    44,  // '\x0e'
+    43,  // '\x0f'
+    42,  // '\x10'
+    41,  // '\x11'
+    40,  // '\x12'
+    39,  // '\x13'
+    38,  // '\x14'
+    37,  // '\x15'
+    36,  // '\x16'
+    35,  // '\x17'
+    34,  // '\x18'
+    33,  // '\x19'
+    56,  // '\x1a'
+    32,  // '\x1b'
+    31,  // '\x1c'
+    30,  // '\x1d'
+    29,  // '\x1e'
+    28,  // '\x1f'
+    255, // ' '
+    148, // '!'
+    164, // '"'
+    149, // '#'
+    136, // '$'
+    160, // '%'
+    155, // '&'
+    173, // "'"
+    221, // '('
+    222, // ')'
+    134, // '*'
+    122, // '+'
+    232, // ','
+    202, // '-'
+    215, // '.'
+    224, // '/'
+    208, // '0'
+    220, // '1'
+    204, // '2'
+    187, // '3'
+    183, // '4'
+    179, // '5'
+    177, // '6'
+    168, // '7'
+    178, // '8'
+    200, // '9'
+    226, // ':'
+    195, // ';'
+    154, // '<'
+    184, // '='
+    174, // '>'
+    126, // '?'
+    120, // '@'
+    191, // 'A'
+    157, // 'B'
+    194, // 'C'
+    170, // 'D'
+    189, // 'E'
+    162, // 'F'
+    161, // 'G'
+    150, // 'H'
+    193, // 'I'
+    142, // 'J'
+    137, // 'K'
+    171, // 'L'
+    176, // 'M'
+    185, // 'N'
+    167, // 'O'
+    186, // 'P'
+    112, // 'Q'
+    175, // 'R'
+    192, // 'S'
+    188, // 'T'
+    156, // 'U'
+    140, // 'V'
+    143, // 'W'
+    123, // 'X'
+    133, // 'Y'
+    128, // 'Z'
+    147, // '['
+    138, // '\\'
+    146, // ']'
+    114, // '^'
+    223, // '_'
+    151, // '`'
+    249, // 'a'
+    216, // 'b'
+    238, // 'c'
+    236, // 'd'
+    253, // 'e'
+    227, // 'f'
+    218, // 'g'
+    230, // 'h'
+    247, // 'i'
+    135, // 'j'
+    180, // 'k'
+    241, // 'l'
+    233, // 'm'
+    246, // 'n'
+    244, // 'o'
+    231, // 'p'
+    139, // 'q'
+    245, // 'r'
+    243, // 's'
+    251, // 't'
+    235, // 'u'
+    201, // 'v'
+    196, // 'w'
+    240, // 'x'
+    214, // 'y'
+    152, // 'z'
+    182, // '{'
+    205, // '|'
+    181, // '}'
+    127, // '~'
+    27,  // '\x7f'
+    212, // '\x80'
+    211, // '\x81'
+    210, // '\x82'
+    213, // '\x83'
+    228, // '\x84'
+    197, // '\x85'
+    169, // '\x86'
+    159, // '\x87'
+    131, // '\x88'
+    172, // '\x89'
+    105, // '\x8a'
+    80,  // '\x8b'
+    98,  // '\x8c'
+    96,  // '\x8d'
+    97,  // '\x8e'
+    81,  // '\x8f'
+    207, // '\x90'
+    145, // '\x91'
+    116, // '\x92'
+    115, // '\x93'
+    144, // '\x94'
+    130, // '\x95'
+    153, // '\x96'
+    121, // '\x97'
+    107, // '\x98'
+    132, // '\x99'
+    109, // '\x9a'
+    110, // '\x9b'
+    124, // '\x9c'
+    111, // '\x9d'
+    82,  // '\x9e'
+    108, // '\x9f'
+    118, // '\xa0'
+    141, // '¡'
+    113, // '¢'
+    129, // '£'
+    119, // '¤'
+    125, // '¥'
+    165, // '¦'
+    117, // '§'
+    92,  // '¨'
+    106, // '©'
+    83,  // 'ª'
+    72,  // '«'
+    99,  // '¬'
+    93,  // '\xad'
+    65,  // '®'
+    79,  // '¯'
+    166, // '°'
+    237, // '±'
+    163, // '²'
+    199, // '³'
+    190, // '´'
+    225, // 'µ'
+    209, // '¶'
+    203, // '·'
+    198, // '¸'
+    217, // '¹'
+    219, // 'º'
+    206, // '»'
+    234, // '¼'
+    248, // '½'
+    158, // '¾'
+    239, // '¿'
+    255, // 'À'
+    255, // 'Á'
+    255, // 'Â'
+    255, // 'Ã'
+    255, // 'Ä'
+    255, // 'Å'
+    255, // 'Æ'
+    255, // 'Ç'
+    255, // 'È'
+    255, // 'É'
+    255, // 'Ê'
+    255, // 'Ë'
+    255, // 'Ì'
+    255, // 'Í'
+    255, // 'Î'
+    255, // 'Ï'
+    255, // 'Ð'
+    255, // 'Ñ'
+    255, // 'Ò'
+    255, // 'Ó'
+    255, // 'Ô'
+    255, // 'Õ'
+    255, // 'Ö'
+    255, // '×'
+    255, // 'Ø'
+    255, // 'Ù'
+    255, // 'Ú'
+    255, // 'Û'
+    255, // 'Ü'
+    255, // 'Ý'
+    255, // 'Þ'
+    255, // 'ß'
+    255, // 'à'
+    255, // 'á'
+    255, // 'â'
+    255, // 'ã'
+    255, // 'ä'
+    255, // 'å'
+    255, // 'æ'
+    255, // 'ç'
+    255, // 'è'
+    255, // 'é'
+    255, // 'ê'
+    255, // 'ë'
+    255, // 'ì'
+    255, // 'í'
+    255, // 'î'
+    255, // 'ï'
+    255, // 'ð'
+    255, // 'ñ'
+    255, // 'ò'
+    255, // 'ó'
+    255, // 'ô'
+    255, // 'õ'
+    255, // 'ö'
+    255, // '÷'
+    255, // 'ø'
+    255, // 'ù'
+    255, // 'ú'
+    255, // 'û'
+    255, // 'ü'
+    255, // 'ý'
+    255, // 'þ'
+    255, // 'ÿ'
+];
diff --git a/third_party/rust/aho-corasick/src/util/debug.rs b/third_party/rust/aho-corasick/src/util/debug.rs
new file mode 100644
index 0000000000..22b5f2231f
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/debug.rs
@@ -0,0 +1,26 @@
+/// A type that wraps a single byte with a convenient fmt::Debug impl that
+/// escapes the byte.
+pub(crate) struct DebugByte(pub(crate) u8);
+
+impl core::fmt::Debug for DebugByte {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        // Special case ASCII space. It's too hard to read otherwise, so
+        // put quotes around it. I sometimes wonder whether just '\x20' would
+        // be better...
+        if self.0 == b' ' {
+            return write!(f, "' '");
+        }
+        // 10 bytes is enough to cover any output from ascii::escape_default.
+        let mut bytes = [0u8; 10];
+        let mut len = 0;
+        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
+            // capitalize \xab to \xAB
+            if i >= 2 && b'a' <= b && b <= b'f' {
+                b -= 32;
+            }
+            bytes[len] = b;
+            len += 1;
+        }
+        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/error.rs b/third_party/rust/aho-corasick/src/util/error.rs
new file mode 100644
index 0000000000..326d04657b
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/error.rs
@@ -0,0 +1,259 @@
+use crate::util::{
+    primitives::{PatternID, SmallIndex},
+    search::MatchKind,
+};
+
+/// An error that occurred during the construction of an Aho-Corasick
+/// automaton.
+///
+/// Build errors occur when some kind of limit has been exceeded, either in the
+/// number of states, the number of patterns of the length of a pattern. These
+/// limits aren't part of the public API, but they should generally be large
+/// enough to handle most use cases.
+///
+/// When the `std` feature is enabled, this implements the `std::error::Error`
+/// trait.
+#[derive(Clone, Debug)]
+pub struct BuildError {
+    kind: ErrorKind,
+}
+
+/// The kind of error that occurred.
+#[derive(Clone, Debug)]
+enum ErrorKind {
+    /// An error that occurs when allocating a new state would result in an
+    /// identifier that exceeds the capacity of a `StateID`.
+    StateIDOverflow {
+        /// The maximum possible id.
+        max: u64,
+        /// The maximum ID requested.
+        requested_max: u64,
+    },
+    /// An error that occurs when adding a pattern to an Aho-Corasick
+    /// automaton would result in an identifier that exceeds the capacity of a
+    /// `PatternID`.
+    PatternIDOverflow {
+        /// The maximum possible id.
+        max: u64,
+        /// The maximum ID requested.
+        requested_max: u64,
+    },
+    /// Occurs when a pattern string is given to the Aho-Corasick constructor
+    /// that is too long.
+    PatternTooLong {
+        /// The ID of the pattern that was too long.
+        pattern: PatternID,
+        /// The length that was too long.
+        len: usize,
+    },
+}
+
+impl BuildError {
+    pub(crate) fn state_id_overflow(
+        max: u64,
+        requested_max: u64,
+    ) -> BuildError {
+        BuildError { kind: ErrorKind::StateIDOverflow { max, requested_max } }
+    }
+
+    pub(crate) fn pattern_id_overflow(
+        max: u64,
+        requested_max: u64,
+    ) -> BuildError {
+        BuildError {
+            kind: ErrorKind::PatternIDOverflow { max, requested_max },
+        }
+    }
+
+    pub(crate) fn pattern_too_long(
+        pattern: PatternID,
+        len: usize,
+    ) -> BuildError {
+        BuildError { kind: ErrorKind::PatternTooLong { pattern, len } }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for BuildError {}
+
+impl core::fmt::Display for BuildError {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match self.kind {
+            ErrorKind::StateIDOverflow { max, requested_max } => {
+                write!(
+                    f,
+                    "state identifier overflow: failed to create state ID \
+                     from {}, which exceeds the max of {}",
+                    requested_max, max,
+                )
+            }
+            ErrorKind::PatternIDOverflow { max, requested_max } => {
+                write!(
+                    f,
+                    "pattern identifier overflow: failed to create pattern ID \
+                     from {}, which exceeds the max of {}",
+                    requested_max, max,
+                )
+            }
+            ErrorKind::PatternTooLong { pattern, len } => {
+                write!(
+                    f,
+                    "pattern {} with length {} exceeds \
+                     the maximum pattern length of {}",
+                    pattern.as_usize(),
+                    len,
+                    SmallIndex::MAX.as_usize(),
+                )
+            }
+        }
+    }
+}
+
+/// An error that occurred during an Aho-Corasick search.
+///
+/// An error that occurs during a search is limited to some kind of
+/// misconfiguration that resulted in an illegal call. Stated differently,
+/// whether an error occurs is not dependent on the specific bytes in the
+/// haystack.
+///
+/// Examples of misconfiguration:
+///
+/// * Executing a stream or overlapping search on a searcher that was built was
+/// something other than [`MatchKind::Standard`](crate::MatchKind::Standard)
+/// semantics.
+/// * Requested an anchored or an unanchored search on a searcher that doesn't
+/// support unanchored or anchored searches, respectively.
+///
+/// When the `std` feature is enabled, this implements the `std::error::Error`
+/// trait.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct MatchError(alloc::boxed::Box<MatchErrorKind>);
+
+impl MatchError {
+    /// Create a new error value with the given kind.
+    ///
+    /// This is a more verbose version of the kind-specific constructors, e.g.,
+    /// `MatchError::unsupported_stream`.
+    pub fn new(kind: MatchErrorKind) -> MatchError {
+        MatchError(alloc::boxed::Box::new(kind))
+    }
+
+    /// Returns a reference to the underlying error kind.
+    pub fn kind(&self) -> &MatchErrorKind {
+        &self.0
+    }
+
+    /// Create a new "invalid anchored search" error. This occurs when the
+    /// caller requests an anchored search but where anchored searches aren't
+    /// supported.
+    ///
+    /// This is the same as calling `MatchError::new` with a
+    /// [`MatchErrorKind::InvalidInputAnchored`] kind.
+    pub fn invalid_input_anchored() -> MatchError {
+        MatchError::new(MatchErrorKind::InvalidInputAnchored)
+    }
+
+    /// Create a new "invalid unanchored search" error. This occurs when the
+    /// caller requests an unanchored search but where unanchored searches
+    /// aren't supported.
+    ///
+    /// This is the same as calling `MatchError::new` with a
+    /// [`MatchErrorKind::InvalidInputUnanchored`] kind.
+    pub fn invalid_input_unanchored() -> MatchError {
+        MatchError::new(MatchErrorKind::InvalidInputUnanchored)
+    }
+
+    /// Create a new "unsupported stream search" error. This occurs when the
+    /// caller requests a stream search while using an Aho-Corasick automaton
+    /// with a match kind other than [`MatchKind::Standard`].
+    ///
+    /// The match kind given should be the match kind of the automaton. It
+    /// should never be `MatchKind::Standard`.
+    pub fn unsupported_stream(got: MatchKind) -> MatchError {
+        MatchError::new(MatchErrorKind::UnsupportedStream { got })
+    }
+
+    /// Create a new "unsupported overlapping search" error. This occurs when
+    /// the caller requests an overlapping search while using an Aho-Corasick
+    /// automaton with a match kind other than [`MatchKind::Standard`].
+    ///
+    /// The match kind given should be the match kind of the automaton. It
+    /// should never be `MatchKind::Standard`.
+    pub fn unsupported_overlapping(got: MatchKind) -> MatchError {
+        MatchError::new(MatchErrorKind::UnsupportedOverlapping { got })
+    }
+
+    /// Create a new "unsupported empty pattern" error. This occurs when the
+    /// caller requests a search for which matching an automaton that contains
+    /// an empty pattern string is not supported.
+    pub fn unsupported_empty() -> MatchError {
+        MatchError::new(MatchErrorKind::UnsupportedEmpty)
+    }
+}
+
+/// The underlying kind of a [`MatchError`].
+///
+/// This is a **non-exhaustive** enum. That means new variants may be added in
+/// a semver-compatible release.
+#[non_exhaustive]
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum MatchErrorKind {
+    /// An error indicating that an anchored search was requested, but from a
+    /// searcher that was built without anchored support.
+    InvalidInputAnchored,
+    /// An error indicating that an unanchored search was requested, but from a
+    /// searcher that was built without unanchored support.
+    InvalidInputUnanchored,
+    /// An error indicating that a stream search was attempted on an
+    /// Aho-Corasick automaton with an unsupported `MatchKind`.
+    UnsupportedStream {
+        /// The match semantics for the automaton that was used.
+        got: MatchKind,
+    },
+    /// An error indicating that an overlapping search was attempted on an
+    /// Aho-Corasick automaton with an unsupported `MatchKind`.
+    UnsupportedOverlapping {
+        /// The match semantics for the automaton that was used.
+        got: MatchKind,
+    },
+    /// An error indicating that the operation requested doesn't support
+    /// automatons that contain an empty pattern string.
+    UnsupportedEmpty,
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for MatchError {}
+
+impl core::fmt::Display for MatchError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        match *self.kind() {
+            MatchErrorKind::InvalidInputAnchored => {
+                write!(f, "anchored searches are not supported or enabled")
+            }
+            MatchErrorKind::InvalidInputUnanchored => {
+                write!(f, "unanchored searches are not supported or enabled")
+            }
+            MatchErrorKind::UnsupportedStream { got } => {
+                write!(
+                    f,
+                    "match kind {:?} does not support stream searching",
+                    got,
+                )
+            }
+            MatchErrorKind::UnsupportedOverlapping { got } => {
+                write!(
+                    f,
+                    "match kind {:?} does not support overlapping searches",
+                    got,
+                )
+            }
+            MatchErrorKind::UnsupportedEmpty => {
+                write!(
+                    f,
+                    "matching with an empty pattern string is not \
+                     supported for this operation",
+                )
+            }
+        }
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/int.rs b/third_party/rust/aho-corasick/src/util/int.rs
new file mode 100644
index 0000000000..28ede7a47f
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/int.rs
@@ -0,0 +1,284 @@
+/*!
+This module provides several integer oriented traits for converting between
+both fixed size integers and integers whose size varies based on the target
+(like `usize`).
+
+The main design principle for this module is to centralize all uses of `as`.
+The thinking here is that `as` makes it very easy to perform accidental lossy
+conversions, and if we centralize all its uses here under more descriptive
+higher level operations, its use and correctness becomes easier to audit.
+
+This was copied mostly wholesale from `regex-automata`.
+
+NOTE: for simplicity, we don't take target pointer width into account here for
+`usize` conversions. Since we currently only panic in debug mode, skipping the
+check when it can be proven it isn't needed at compile time doesn't really
+matter. Now, if we wind up wanting to do as many checks as possible in release
+mode, then we would want to skip those when we know the conversions are always
+non-lossy.
+*/
+
+pub(crate) trait U8 {
+    fn as_usize(self) -> usize;
+}
+
+impl U8 for u8 {
+    fn as_usize(self) -> usize {
+        usize::from(self)
+    }
+}
+
+pub(crate) trait U16 {
+    fn as_usize(self) -> usize;
+    fn low_u8(self) -> u8;
+    fn high_u8(self) -> u8;
+}
+
+impl U16 for u16 {
+    fn as_usize(self) -> usize {
+        usize::from(self)
+    }
+
+    fn low_u8(self) -> u8 {
+        self as u8
+    }
+
+    fn high_u8(self) -> u8 {
+        (self >> 8) as u8
+    }
+}
+
+pub(crate) trait U32 {
+    fn as_usize(self) -> usize;
+    fn low_u8(self) -> u8;
+    fn low_u16(self) -> u16;
+    fn high_u16(self) -> u16;
+}
+
+impl U32 for u32 {
+    #[inline]
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("u32 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn low_u8(self) -> u8 {
+        self as u8
+    }
+
+    fn low_u16(self) -> u16 {
+        self as u16
+    }
+
+    fn high_u16(self) -> u16 {
+        (self >> 16) as u16
+    }
+}
+
+pub(crate) trait U64 {
+    fn as_usize(self) -> usize;
+    fn low_u8(self) -> u8;
+    fn low_u16(self) -> u16;
+    fn low_u32(self) -> u32;
+    fn high_u32(self) -> u32;
+}
+
+impl U64 for u64 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("u64 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn low_u8(self) -> u8 {
+        self as u8
+    }
+
+    fn low_u16(self) -> u16 {
+        self as u16
+    }
+
+    fn low_u32(self) -> u32 {
+        self as u32
+    }
+
+    fn high_u32(self) -> u32 {
+        (self >> 32) as u32
+    }
+}
+
+pub(crate) trait I8 {
+    fn as_usize(self) -> usize;
+    fn to_bits(self) -> u8;
+    fn from_bits(n: u8) -> i8;
+}
+
+impl I8 for i8 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("i8 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn to_bits(self) -> u8 {
+        self as u8
+    }
+
+    fn from_bits(n: u8) -> i8 {
+        n as i8
+    }
+}
+
+pub(crate) trait I32 {
+    fn as_usize(self) -> usize;
+    fn to_bits(self) -> u32;
+    fn from_bits(n: u32) -> i32;
+}
+
+impl I32 for i32 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("i32 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn to_bits(self) -> u32 {
+        self as u32
+    }
+
+    fn from_bits(n: u32) -> i32 {
+        n as i32
+    }
+}
+
+pub(crate) trait I64 {
+    fn as_usize(self) -> usize;
+    fn to_bits(self) -> u64;
+    fn from_bits(n: u64) -> i64;
+}
+
+impl I64 for i64 {
+    fn as_usize(self) -> usize {
+        #[cfg(debug_assertions)]
+        {
+            usize::try_from(self).expect("i64 overflowed usize")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as usize
+        }
+    }
+
+    fn to_bits(self) -> u64 {
+        self as u64
+    }
+
+    fn from_bits(n: u64) -> i64 {
+        n as i64
+    }
+}
+
+pub(crate) trait Usize {
+    fn as_u8(self) -> u8;
+    fn as_u16(self) -> u16;
+    fn as_u32(self) -> u32;
+    fn as_u64(self) -> u64;
+}
+
+impl Usize for usize {
+    fn as_u8(self) -> u8 {
+        #[cfg(debug_assertions)]
+        {
+            u8::try_from(self).expect("usize overflowed u8")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u8
+        }
+    }
+
+    fn as_u16(self) -> u16 {
+        #[cfg(debug_assertions)]
+        {
+            u16::try_from(self).expect("usize overflowed u16")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u16
+        }
+    }
+
+    fn as_u32(self) -> u32 {
+        #[cfg(debug_assertions)]
+        {
+            u32::try_from(self).expect("usize overflowed u32")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u32
+        }
+    }
+
+    fn as_u64(self) -> u64 {
+        #[cfg(debug_assertions)]
+        {
+            u64::try_from(self).expect("usize overflowed u64")
+        }
+        #[cfg(not(debug_assertions))]
+        {
+            self as u64
+        }
+    }
+}
+
+// Pointers aren't integers, but we convert pointers to integers to perform
+// offset arithmetic in some places. (And no, we don't convert the integers
+// back to pointers.) So add 'as_usize' conversions here too for completeness.
+//
+// These 'as' casts are actually okay because they're always non-lossy. But the
+// idea here is to just try and remove as much 'as' as possible, particularly
+// in this crate where we are being really paranoid about offsets and making
+// sure we don't panic on inputs that might be untrusted. This way, the 'as'
+// casts become easier to audit if they're all in one place, even when some of
+// them are actually okay 100% of the time.
+
+pub(crate) trait Pointer {
+    fn as_usize(self) -> usize;
+}
+
+impl<T> Pointer for *const T {
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
+
+pub(crate) trait PointerMut {
+    fn as_usize(self) -> usize;
+}
+
+impl<T> PointerMut for *mut T {
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/mod.rs b/third_party/rust/aho-corasick/src/util/mod.rs
new file mode 100644
index 0000000000..f7a1ddd07b
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/mod.rs
@@ -0,0 +1,12 @@
+pub(crate) mod alphabet;
+#[cfg(feature = "std")]
+pub(crate) mod buffer;
+pub(crate) mod byte_frequencies;
+pub(crate) mod debug;
+pub(crate) mod error;
+pub(crate) mod int;
+pub(crate) mod prefilter;
+pub(crate) mod primitives;
+pub(crate) mod remapper;
+pub(crate) mod search;
+pub(crate) mod special;
diff --git a/third_party/rust/aho-corasick/src/util/prefilter.rs b/third_party/rust/aho-corasick/src/util/prefilter.rs
new file mode 100644
index 0000000000..f5ddc75b7c
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/prefilter.rs
@@ -0,0 +1,924 @@
+use core::{
+    cmp,
+    fmt::Debug,
+    panic::{RefUnwindSafe, UnwindSafe},
+    u8,
+};
+
+use alloc::{sync::Arc, vec, vec::Vec};
+
+use crate::{
+    packed,
+    util::{
+        alphabet::ByteSet,
+        search::{Match, MatchKind, Span},
+    },
+};
+
+/// A prefilter for accelerating a search.
+///
+/// This crate uses prefilters in the core search implementations to accelerate
+/// common cases. They typically only apply to cases where there are a small
+/// number of patterns (less than 100 or so), but when they do, thoughput can
+/// be boosted considerably, perhaps by an order of magnitude. When a prefilter
+/// is active, it is used whenever a search enters an automaton's start state.
+///
+/// Currently, prefilters cannot be constructed by
+/// callers. A `Prefilter` can only be accessed via the
+/// [`Automaton::prefilter`](crate::automaton::Automaton::prefilter)
+/// method and used to execute a search. In other words, a prefilter can be
+/// used to optimize your own search implementation if necessary, but cannot do
+/// much else. If you have a use case for more APIs, please submit an issue.
+#[derive(Clone, Debug)]
+pub struct Prefilter {
+    finder: Arc<dyn PrefilterI>,
+    memory_usage: usize,
+}
+
+impl Prefilter {
+    /// Execute a search in the haystack within the span given. If a match or
+    /// a possible match is returned, then it is guaranteed to occur within
+    /// the bounds of the span.
+    ///
+    /// If the span provided is invalid for the given haystack, then behavior
+    /// is unspecified.
+    #[inline]
+    pub fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        self.finder.find_in(haystack, span)
+    }
+
+    #[inline]
+    pub(crate) fn memory_usage(&self) -> usize {
+        self.memory_usage
+    }
+}
+
+/// A candidate is the result of running a prefilter on a haystack at a
+/// particular position.
+///
+/// The result is either no match, a confirmed match or a possible match.
+///
+/// When no match is returned, the prefilter is guaranteeing that no possible
+/// match can be found in the haystack, and the caller may trust this. That is,
+/// all correct prefilters must never report false negatives.
+///
+/// In some cases, a prefilter can confirm a match very quickly, in which case,
+/// the caller may use this to stop what it's doing and report the match. In
+/// this case, prefilter implementations must never report a false positive.
+/// In other cases, the prefilter can only report a potential match, in which
+/// case the callers must attempt to confirm the match. In this case, prefilter
+/// implementations are permitted to return false positives.
+#[derive(Clone, Debug)]
+pub enum Candidate {
+    /// No match was found. Since false negatives are not possible, this means
+    /// the search can quit as it is guaranteed not to find another match.
+    None,
+    /// A confirmed match was found. Callers do not need to confirm it.
+    Match(Match),
+    /// The start of a possible match was found. Callers must confirm it before
+    /// reporting it as a match.
+    PossibleStartOfMatch(usize),
+}
+
+impl Candidate {
+    /// Convert this candidate into an option. This is useful when callers
+    /// do not distinguish between true positives and false positives (i.e.,
+    /// the caller must always confirm the match).
+    pub fn into_option(self) -> Option<usize> {
+        match self {
+            Candidate::None => None,
+            Candidate::Match(ref m) => Some(m.start()),
+            Candidate::PossibleStartOfMatch(start) => Some(start),
+        }
+    }
+}
+
+/// A prefilter describes the behavior of fast literal scanners for quickly
+/// skipping past bytes in the haystack that we know cannot possibly
+/// participate in a match.
+trait PrefilterI:
+    Send + Sync + RefUnwindSafe + UnwindSafe + Debug + 'static
+{
+    /// Returns the next possible match candidate. This may yield false
+    /// positives, so callers must confirm a match starting at the position
+    /// returned. This, however, must never produce false negatives. That is,
+    /// this must, at minimum, return the starting position of the next match
+    /// in the given haystack after or at the given position.
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate;
+}
+
+impl<P: PrefilterI + ?Sized> PrefilterI for Arc<P> {
+    #[inline(always)]
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        (**self).find_in(haystack, span)
+    }
+}
+
+/// A builder for constructing the best possible prefilter. When constructed,
+/// this builder will heuristically select the best prefilter it can build,
+/// if any, and discard the rest.
+#[derive(Debug)]
+pub(crate) struct Builder {
+    count: usize,
+    ascii_case_insensitive: bool,
+    start_bytes: StartBytesBuilder,
+    rare_bytes: RareBytesBuilder,
+    memmem: MemmemBuilder,
+    packed: Option<packed::Builder>,
+    // If we run across a condition that suggests we shouldn't use a prefilter
+    // at all (like an empty pattern), then disable prefilters entirely.
+    enabled: bool,
+}
+
+impl Builder {
+    /// Create a new builder for constructing the best possible prefilter.
+    pub(crate) fn new(kind: MatchKind) -> Builder {
+        let pbuilder = kind
+            .as_packed()
+            .map(|kind| packed::Config::new().match_kind(kind).builder());
+        Builder {
+            count: 0,
+            ascii_case_insensitive: false,
+            start_bytes: StartBytesBuilder::new(),
+            rare_bytes: RareBytesBuilder::new(),
+            memmem: MemmemBuilder::default(),
+            packed: pbuilder,
+            enabled: true,
+        }
+    }
+
+    /// Enable ASCII case insensitivity. When set, byte strings added to this
+    /// builder will be interpreted without respect to ASCII case.
+    pub(crate) fn ascii_case_insensitive(mut self, yes: bool) -> Builder {
+        self.ascii_case_insensitive = yes;
+        self.start_bytes = self.start_bytes.ascii_case_insensitive(yes);
+        self.rare_bytes = self.rare_bytes.ascii_case_insensitive(yes);
+        self
+    }
+
+    /// Return a prefilter suitable for quickly finding potential matches.
+    ///
+    /// All patterns added to an Aho-Corasick automaton should be added to this
+    /// builder before attempting to construct the prefilter.
+    pub(crate) fn build(&self) -> Option<Prefilter> {
+        if !self.enabled {
+            debug!("prefilter not enabled, skipping");
+            return None;
+        }
+        // If we only have one pattern, then deferring to memmem is always
+        // the best choice. This is kind of a weird case, because, well, why
+        // use Aho-Corasick if you only have one pattern? But maybe you don't
+        // know exactly how many patterns you'll get up front, and you need to
+        // support the option of multiple patterns. So instead of relying on
+        // the caller to branch and use memmem explicitly, we just do it for
+        // them.
+        if !self.ascii_case_insensitive {
+            if let Some(pre) = self.memmem.build() {
+                debug!("using memmem prefilter");
+                return Some(pre);
+            }
+        }
+        let (packed, patlen, minlen) = if self.ascii_case_insensitive {
+            (None, usize::MAX, 0)
+        } else {
+            let patlen = self.packed.as_ref().map_or(usize::MAX, |p| p.len());
+            let minlen = self.packed.as_ref().map_or(0, |p| p.minimum_len());
+            let packed =
+                self.packed.as_ref().and_then(|b| b.build()).map(|s| {
+                    let memory_usage = s.memory_usage();
+                    debug!(
+                        "built packed prefilter (len: {}, \
+                         minimum pattern len: {}, memory usage: {}) \
+                         for consideration",
+                        patlen, minlen, memory_usage,
+                    );
+                    Prefilter { finder: Arc::new(Packed(s)), memory_usage }
+                });
+            (packed, patlen, minlen)
+        };
+        match (self.start_bytes.build(), self.rare_bytes.build()) {
+            // If we could build both start and rare prefilters, then there are
+            // a few cases in which we'd want to use the start-byte prefilter
+            // over the rare-byte prefilter, since the former has lower
+            // overhead.
+            (prestart @ Some(_), prerare @ Some(_)) => {
+                debug!(
+                    "both start (len={}, rank={}) and \
+                     rare (len={}, rank={}) byte prefilters \
+                     are available",
+                    self.start_bytes.count,
+                    self.start_bytes.rank_sum,
+                    self.rare_bytes.count,
+                    self.rare_bytes.rank_sum,
+                );
+                if patlen <= 16
+                    && minlen >= 2
+                    && self.start_bytes.count >= 3
+                    && self.rare_bytes.count >= 3
+                {
+                    debug!(
+                        "start and rare byte prefilters available, but \
+                             they're probably slower than packed so using \
+                             packed"
+                    );
+                    return packed;
+                }
+                // If the start-byte prefilter can scan for a smaller number
+                // of bytes than the rare-byte prefilter, then it's probably
+                // faster.
+                let has_fewer_bytes =
+                    self.start_bytes.count < self.rare_bytes.count;
+                // Otherwise, if the combined frequency rank of the detected
+                // bytes in the start-byte prefilter is "close" to the combined
+                // frequency rank of the rare-byte prefilter, then we pick
+                // the start-byte prefilter even if the rare-byte prefilter
+                // heuristically searches for rare bytes. This is because the
+                // rare-byte prefilter has higher constant costs, so we tend to
+                // prefer the start-byte prefilter when we can.
+                let has_rarer_bytes =
+                    self.start_bytes.rank_sum <= self.rare_bytes.rank_sum + 50;
+                if has_fewer_bytes {
+                    debug!(
+                        "using start byte prefilter because it has fewer
+                         bytes to search for than the rare byte prefilter",
+                    );
+                    prestart
+                } else if has_rarer_bytes {
+                    debug!(
+                        "using start byte prefilter because its byte \
+                         frequency rank was determined to be \
+                         \"good enough\" relative to the rare byte prefilter \
+                         byte frequency rank",
+                    );
+                    prestart
+                } else {
+                    debug!("using rare byte prefilter");
+                    prerare
+                }
+            }
+            (prestart @ Some(_), None) => {
+                if patlen <= 16 && minlen >= 2 && self.start_bytes.count >= 3 {
+                    debug!(
+                        "start byte prefilter available, but \
+                         it's probably slower than packed so using \
+                         packed"
+                    );
+                    return packed;
+                }
+                debug!(
+                    "have start byte prefilter but not rare byte prefilter, \
+                     so using start byte prefilter",
+                );
+                prestart
+            }
+            (None, prerare @ Some(_)) => {
+                if patlen <= 16 && minlen >= 2 && self.rare_bytes.count >= 3 {
+                    debug!(
+                        "rare byte prefilter available, but \
+                         it's probably slower than packed so using \
+                         packed"
+                    );
+                    return packed;
+                }
+                debug!(
+                    "have rare byte prefilter but not start byte prefilter, \
+                     so using rare byte prefilter",
+                );
+                prerare
+            }
+            (None, None) if self.ascii_case_insensitive => {
+                debug!(
+                    "no start or rare byte prefilter and ASCII case \
+                     insensitivity was enabled, so skipping prefilter",
+                );
+                None
+            }
+            (None, None) => {
+                if packed.is_some() {
+                    debug!("falling back to packed prefilter");
+                } else {
+                    debug!("no prefilter available");
+                }
+                packed
+            }
+        }
+    }
+
+    /// Add a literal string to this prefilter builder.
+    pub(crate) fn add(&mut self, bytes: &[u8]) {
+        if bytes.is_empty() {
+            self.enabled = false;
+        }
+        if !self.enabled {
+            return;
+        }
+        self.count += 1;
+        self.start_bytes.add(bytes);
+        self.rare_bytes.add(bytes);
+        self.memmem.add(bytes);
+        if let Some(ref mut pbuilder) = self.packed {
+            pbuilder.add(bytes);
+        }
+    }
+}
+
+/// A type that wraps a packed searcher and implements the `Prefilter`
+/// interface.
+#[derive(Clone, Debug)]
+struct Packed(packed::Searcher);
+
+impl PrefilterI for Packed {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        self.0
+            .find_in(&haystack, span)
+            .map_or(Candidate::None, Candidate::Match)
+    }
+}
+
+/// A builder for constructing a prefilter that uses memmem.
+#[derive(Debug, Default)]
+struct MemmemBuilder {
+    /// The number of patterns that have been added.
+    count: usize,
+    /// The singular pattern to search for. This is only set when count==1.
+    one: Option<Vec<u8>>,
+}
+
+impl MemmemBuilder {
+    fn build(&self) -> Option<Prefilter> {
+        #[cfg(all(feature = "std", feature = "perf-literal"))]
+        fn imp(builder: &MemmemBuilder) -> Option<Prefilter> {
+            let pattern = builder.one.as_ref()?;
+            assert_eq!(1, builder.count);
+            let finder = Arc::new(Memmem(
+                memchr::memmem::Finder::new(pattern).into_owned(),
+            ));
+            let memory_usage = pattern.len();
+            Some(Prefilter { finder, memory_usage })
+        }
+
+        #[cfg(not(all(feature = "std", feature = "perf-literal")))]
+        fn imp(_: &MemmemBuilder) -> Option<Prefilter> {
+            None
+        }
+
+        imp(self)
+    }
+
+    fn add(&mut self, bytes: &[u8]) {
+        self.count += 1;
+        if self.count == 1 {
+            self.one = Some(bytes.to_vec());
+        } else {
+            self.one = None;
+        }
+    }
+}
+
+/// A type that wraps a SIMD accelerated single substring search from the
+/// `memchr` crate for use as a prefilter.
+///
+/// Currently, this prefilter is only active for Aho-Corasick searchers with
+/// a single pattern. In theory, this could be extended to support searchers
+/// that have a common prefix of more than one byte (for one byte, we would use
+/// memchr), but it's not clear if it's worth it or not.
+///
+/// Also, unfortunately, this currently also requires the 'std' feature to
+/// be enabled. That's because memchr doesn't have a no-std-but-with-alloc
+/// mode, and so APIs like Finder::into_owned aren't available when 'std' is
+/// disabled. But there should be an 'alloc' feature that brings in APIs like
+/// Finder::into_owned but doesn't use std-only features like runtime CPU
+/// feature detection.
+#[cfg(all(feature = "std", feature = "perf-literal"))]
+#[derive(Clone, Debug)]
+struct Memmem(memchr::memmem::Finder<'static>);
+
+#[cfg(all(feature = "std", feature = "perf-literal"))]
+impl PrefilterI for Memmem {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        use crate::util::primitives::PatternID;
+
+        self.0.find(&haystack[span]).map_or(Candidate::None, |i| {
+            let start = span.start + i;
+            let end = start + self.0.needle().len();
+            // N.B. We can declare a match and use a fixed pattern ID here
+            // because a Memmem prefilter is only ever created for searchers
+            // with exactly one pattern. Thus, every match is always a match
+            // and it is always for the first and only pattern.
+            Candidate::Match(Match::new(PatternID::ZERO, start..end))
+        })
+    }
+}
+
+/// A builder for constructing a rare byte prefilter.
+///
+/// A rare byte prefilter attempts to pick out a small set of rare bytes that
+/// occurr in the patterns, and then quickly scan to matches of those rare
+/// bytes.
+#[derive(Clone, Debug)]
+struct RareBytesBuilder {
+    /// Whether this prefilter should account for ASCII case insensitivity or
+    /// not.
+    ascii_case_insensitive: bool,
+    /// A set of rare bytes, indexed by byte value.
+    rare_set: ByteSet,
+    /// A set of byte offsets associated with bytes in a pattern. An entry
+    /// corresponds to a particular bytes (its index) and is only non-zero if
+    /// the byte occurred at an offset greater than 0 in at least one pattern.
+    ///
+    /// If a byte's offset is not representable in 8 bits, then the rare bytes
+    /// prefilter becomes inert.
+    byte_offsets: RareByteOffsets,
+    /// Whether this is available as a prefilter or not. This can be set to
+    /// false during construction if a condition is seen that invalidates the
+    /// use of the rare-byte prefilter.
+    available: bool,
+    /// The number of bytes set to an active value in `byte_offsets`.
+    count: usize,
+    /// The sum of frequency ranks for the rare bytes detected. This is
+    /// intended to give a heuristic notion of how rare the bytes are.
+    rank_sum: u16,
+}
+
+/// A set of byte offsets, keyed by byte.
+#[derive(Clone, Copy)]
+struct RareByteOffsets {
+    /// Each entry corresponds to the maximum offset of the corresponding
+    /// byte across all patterns seen.
+    set: [RareByteOffset; 256],
+}
+
+impl RareByteOffsets {
+    /// Create a new empty set of rare byte offsets.
+    pub(crate) fn empty() -> RareByteOffsets {
+        RareByteOffsets { set: [RareByteOffset::default(); 256] }
+    }
+
+    /// Add the given offset for the given byte to this set. If the offset is
+    /// greater than the existing offset, then it overwrites the previous
+    /// value and returns false. If there is no previous value set, then this
+    /// sets it and returns true.
+    pub(crate) fn set(&mut self, byte: u8, off: RareByteOffset) {
+        self.set[byte as usize].max =
+            cmp::max(self.set[byte as usize].max, off.max);
+    }
+}
+
+impl core::fmt::Debug for RareByteOffsets {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let mut offsets = vec![];
+        for off in self.set.iter() {
+            if off.max > 0 {
+                offsets.push(off);
+            }
+        }
+        f.debug_struct("RareByteOffsets").field("set", &offsets).finish()
+    }
+}
+
+/// Offsets associated with an occurrence of a "rare" byte in any of the
+/// patterns used to construct a single Aho-Corasick automaton.
+#[derive(Clone, Copy, Debug)]
+struct RareByteOffset {
+    /// The maximum offset at which a particular byte occurs from the start
+    /// of any pattern. This is used as a shift amount. That is, when an
+    /// occurrence of this byte is found, the candidate position reported by
+    /// the prefilter is `position_of_byte - max`, such that the automaton
+    /// will begin its search at a position that is guaranteed to observe a
+    /// match.
+    ///
+    /// To avoid accidentally quadratic behavior, a prefilter is considered
+    /// ineffective when it is asked to start scanning from a position that it
+    /// has already scanned past.
+    ///
+    /// Using a `u8` here means that if we ever see a pattern that's longer
+    /// than 255 bytes, then the entire rare byte prefilter is disabled.
+    max: u8,
+}
+
+impl Default for RareByteOffset {
+    fn default() -> RareByteOffset {
+        RareByteOffset { max: 0 }
+    }
+}
+
+impl RareByteOffset {
+    /// Create a new rare byte offset. If the given offset is too big, then
+    /// None is returned. In that case, callers should render the rare bytes
+    /// prefilter inert.
+    fn new(max: usize) -> Option<RareByteOffset> {
+        if max > u8::MAX as usize {
+            None
+        } else {
+            Some(RareByteOffset { max: max as u8 })
+        }
+    }
+}
+
+impl RareBytesBuilder {
+    /// Create a new builder for constructing a rare byte prefilter.
+    fn new() -> RareBytesBuilder {
+        RareBytesBuilder {
+            ascii_case_insensitive: false,
+            rare_set: ByteSet::empty(),
+            byte_offsets: RareByteOffsets::empty(),
+            available: true,
+            count: 0,
+            rank_sum: 0,
+        }
+    }
+
+    /// Enable ASCII case insensitivity. When set, byte strings added to this
+    /// builder will be interpreted without respect to ASCII case.
+    fn ascii_case_insensitive(mut self, yes: bool) -> RareBytesBuilder {
+        self.ascii_case_insensitive = yes;
+        self
+    }
+
+    /// Build the rare bytes prefilter.
+    ///
+    /// If there are more than 3 distinct rare bytes found, or if heuristics
+    /// otherwise determine that this prefilter should not be used, then `None`
+    /// is returned.
+    fn build(&self) -> Option<Prefilter> {
+        #[cfg(feature = "perf-literal")]
+        fn imp(builder: &RareBytesBuilder) -> Option<Prefilter> {
+            if !builder.available || builder.count > 3 {
+                return None;
+            }
+            let (mut bytes, mut len) = ([0; 3], 0);
+            for b in 0..=255 {
+                if builder.rare_set.contains(b) {
+                    bytes[len] = b as u8;
+                    len += 1;
+                }
+            }
+            let finder: Arc<dyn PrefilterI> = match len {
+                0 => return None,
+                1 => Arc::new(RareBytesOne {
+                    byte1: bytes[0],
+                    offset: builder.byte_offsets.set[bytes[0] as usize],
+                }),
+                2 => Arc::new(RareBytesTwo {
+                    offsets: builder.byte_offsets,
+                    byte1: bytes[0],
+                    byte2: bytes[1],
+                }),
+                3 => Arc::new(RareBytesThree {
+                    offsets: builder.byte_offsets,
+                    byte1: bytes[0],
+                    byte2: bytes[1],
+                    byte3: bytes[2],
+                }),
+                _ => unreachable!(),
+            };
+            Some(Prefilter { finder, memory_usage: 0 })
+        }
+
+        #[cfg(not(feature = "perf-literal"))]
+        fn imp(_: &RareBytesBuilder) -> Option<Prefilter> {
+            None
+        }
+
+        imp(self)
+    }
+
+    /// Add a byte string to this builder.
+    ///
+    /// All patterns added to an Aho-Corasick automaton should be added to this
+    /// builder before attempting to construct the prefilter.
+    fn add(&mut self, bytes: &[u8]) {
+        // If we've already given up, then do nothing.
+        if !self.available {
+            return;
+        }
+        // If we've already blown our budget, then don't waste time looking
+        // for more rare bytes.
+        if self.count > 3 {
+            self.available = false;
+            return;
+        }
+        // If the pattern is too long, then our offset table is bunk, so
+        // give up.
+        if bytes.len() >= 256 {
+            self.available = false;
+            return;
+        }
+        let mut rarest = match bytes.get(0) {
+            None => return,
+            Some(&b) => (b, freq_rank(b)),
+        };
+        // The idea here is to look for the rarest byte in each pattern, and
+        // add that to our set. As a special exception, if we see a byte that
+        // we've already added, then we immediately stop and choose that byte,
+        // even if there's another rare byte in the pattern. This helps us
+        // apply the rare byte optimization in more cases by attempting to pick
+        // bytes that are in common between patterns. So for example, if we
+        // were searching for `Sherlock` and `lockjaw`, then this would pick
+        // `k` for both patterns, resulting in the use of `memchr` instead of
+        // `memchr2` for `k` and `j`.
+        let mut found = false;
+        for (pos, &b) in bytes.iter().enumerate() {
+            self.set_offset(pos, b);
+            if found {
+                continue;
+            }
+            if self.rare_set.contains(b) {
+                found = true;
+                continue;
+            }
+            let rank = freq_rank(b);
+            if rank < rarest.1 {
+                rarest = (b, rank);
+            }
+        }
+        if !found {
+            self.add_rare_byte(rarest.0);
+        }
+    }
+
+    fn set_offset(&mut self, pos: usize, byte: u8) {
+        // This unwrap is OK because pos is never bigger than our max.
+        let offset = RareByteOffset::new(pos).unwrap();
+        self.byte_offsets.set(byte, offset);
+        if self.ascii_case_insensitive {
+            self.byte_offsets.set(opposite_ascii_case(byte), offset);
+        }
+    }
+
+    fn add_rare_byte(&mut self, byte: u8) {
+        self.add_one_rare_byte(byte);
+        if self.ascii_case_insensitive {
+            self.add_one_rare_byte(opposite_ascii_case(byte));
+        }
+    }
+
+    fn add_one_rare_byte(&mut self, byte: u8) {
+        if !self.rare_set.contains(byte) {
+            self.rare_set.add(byte);
+            self.count += 1;
+            self.rank_sum += freq_rank(byte) as u16;
+        }
+    }
+}
+
+/// A prefilter for scanning for a single "rare" byte.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct RareBytesOne {
+    byte1: u8,
+    offset: RareByteOffset,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for RareBytesOne {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr(self.byte1, &haystack[span])
+            .map(|i| {
+                let pos = span.start + i;
+                cmp::max(
+                    span.start,
+                    pos.saturating_sub(usize::from(self.offset.max)),
+                )
+            })
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// A prefilter for scanning for two "rare" bytes.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct RareBytesTwo {
+    offsets: RareByteOffsets,
+    byte1: u8,
+    byte2: u8,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for RareBytesTwo {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr2(self.byte1, self.byte2, &haystack[span])
+            .map(|i| {
+                let pos = span.start + i;
+                let offset = self.offsets.set[usize::from(haystack[pos])].max;
+                cmp::max(span.start, pos.saturating_sub(usize::from(offset)))
+            })
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// A prefilter for scanning for three "rare" bytes.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct RareBytesThree {
+    offsets: RareByteOffsets,
+    byte1: u8,
+    byte2: u8,
+    byte3: u8,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for RareBytesThree {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr3(self.byte1, self.byte2, self.byte3, &haystack[span])
+            .map(|i| {
+                let pos = span.start + i;
+                let offset = self.offsets.set[usize::from(haystack[pos])].max;
+                cmp::max(span.start, pos.saturating_sub(usize::from(offset)))
+            })
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// A builder for constructing a starting byte prefilter.
+///
+/// A starting byte prefilter is a simplistic prefilter that looks for possible
+/// matches by reporting all positions corresponding to a particular byte. This
+/// generally only takes affect when there are at most 3 distinct possible
+/// starting bytes. e.g., the patterns `foo`, `bar`, and `baz` have two
+/// distinct starting bytes (`f` and `b`), and this prefilter returns all
+/// occurrences of either `f` or `b`.
+///
+/// In some cases, a heuristic frequency analysis may determine that it would
+/// be better not to use this prefilter even when there are 3 or fewer distinct
+/// starting bytes.
+#[derive(Clone, Debug)]
+struct StartBytesBuilder {
+    /// Whether this prefilter should account for ASCII case insensitivity or
+    /// not.
+    ascii_case_insensitive: bool,
+    /// The set of starting bytes observed.
+    byteset: Vec<bool>,
+    /// The number of bytes set to true in `byteset`.
+    count: usize,
+    /// The sum of frequency ranks for the rare bytes detected. This is
+    /// intended to give a heuristic notion of how rare the bytes are.
+    rank_sum: u16,
+}
+
+impl StartBytesBuilder {
+    /// Create a new builder for constructing a start byte prefilter.
+    fn new() -> StartBytesBuilder {
+        StartBytesBuilder {
+            ascii_case_insensitive: false,
+            byteset: vec![false; 256],
+            count: 0,
+            rank_sum: 0,
+        }
+    }
+
+    /// Enable ASCII case insensitivity. When set, byte strings added to this
+    /// builder will be interpreted without respect to ASCII case.
+    fn ascii_case_insensitive(mut self, yes: bool) -> StartBytesBuilder {
+        self.ascii_case_insensitive = yes;
+        self
+    }
+
+    /// Build the starting bytes prefilter.
+    ///
+    /// If there are more than 3 distinct starting bytes, or if heuristics
+    /// otherwise determine that this prefilter should not be used, then `None`
+    /// is returned.
+    fn build(&self) -> Option<Prefilter> {
+        #[cfg(feature = "perf-literal")]
+        fn imp(builder: &StartBytesBuilder) -> Option<Prefilter> {
+            if builder.count > 3 {
+                return None;
+            }
+            let (mut bytes, mut len) = ([0; 3], 0);
+            for b in 0..256 {
+                if !builder.byteset[b] {
+                    continue;
+                }
+                // We don't handle non-ASCII bytes for now. Getting non-ASCII
+                // bytes right is trickier, since we generally don't want to put
+                // a leading UTF-8 code unit into a prefilter that isn't ASCII,
+                // since they can frequently. Instead, it would be better to use a
+                // continuation byte, but this requires more sophisticated analysis
+                // of the automaton and a richer prefilter API.
+                if b > 0x7F {
+                    return None;
+                }
+                bytes[len] = b as u8;
+                len += 1;
+            }
+            let finder: Arc<dyn PrefilterI> = match len {
+                0 => return None,
+                1 => Arc::new(StartBytesOne { byte1: bytes[0] }),
+                2 => Arc::new(StartBytesTwo {
+                    byte1: bytes[0],
+                    byte2: bytes[1],
+                }),
+                3 => Arc::new(StartBytesThree {
+                    byte1: bytes[0],
+                    byte2: bytes[1],
+                    byte3: bytes[2],
+                }),
+                _ => unreachable!(),
+            };
+            Some(Prefilter { finder, memory_usage: 0 })
+        }
+
+        #[cfg(not(feature = "perf-literal"))]
+        fn imp(_: &StartBytesBuilder) -> Option<Prefilter> {
+            None
+        }
+
+        imp(self)
+    }
+
+    /// Add a byte string to this builder.
+    ///
+    /// All patterns added to an Aho-Corasick automaton should be added to this
+    /// builder before attempting to construct the prefilter.
+    fn add(&mut self, bytes: &[u8]) {
+        if self.count > 3 {
+            return;
+        }
+        if let Some(&byte) = bytes.get(0) {
+            self.add_one_byte(byte);
+            if self.ascii_case_insensitive {
+                self.add_one_byte(opposite_ascii_case(byte));
+            }
+        }
+    }
+
+    fn add_one_byte(&mut self, byte: u8) {
+        if !self.byteset[byte as usize] {
+            self.byteset[byte as usize] = true;
+            self.count += 1;
+            self.rank_sum += freq_rank(byte) as u16;
+        }
+    }
+}
+
+/// A prefilter for scanning for a single starting byte.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct StartBytesOne {
+    byte1: u8,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for StartBytesOne {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr(self.byte1, &haystack[span])
+            .map(|i| span.start + i)
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// A prefilter for scanning for two starting bytes.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct StartBytesTwo {
+    byte1: u8,
+    byte2: u8,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for StartBytesTwo {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr2(self.byte1, self.byte2, &haystack[span])
+            .map(|i| span.start + i)
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// A prefilter for scanning for three starting bytes.
+#[cfg(feature = "perf-literal")]
+#[derive(Clone, Debug)]
+struct StartBytesThree {
+    byte1: u8,
+    byte2: u8,
+    byte3: u8,
+}
+
+#[cfg(feature = "perf-literal")]
+impl PrefilterI for StartBytesThree {
+    fn find_in(&self, haystack: &[u8], span: Span) -> Candidate {
+        memchr::memchr3(self.byte1, self.byte2, self.byte3, &haystack[span])
+            .map(|i| span.start + i)
+            .map_or(Candidate::None, Candidate::PossibleStartOfMatch)
+    }
+}
+
+/// If the given byte is an ASCII letter, then return it in the opposite case.
+/// e.g., Given `b'A'`, this returns `b'a'`, and given `b'a'`, this returns
+/// `b'A'`. If a non-ASCII letter is given, then the given byte is returned.
+pub(crate) fn opposite_ascii_case(b: u8) -> u8 {
+    if b'A' <= b && b <= b'Z' {
+        b.to_ascii_lowercase()
+    } else if b'a' <= b && b <= b'z' {
+        b.to_ascii_uppercase()
+    } else {
+        b
+    }
+}
+
+/// Return the frequency rank of the given byte. The higher the rank, the more
+/// common the byte (heuristically speaking).
+fn freq_rank(b: u8) -> u8 {
+    use crate::util::byte_frequencies::BYTE_FREQUENCIES;
+    BYTE_FREQUENCIES[b as usize]
+}
diff --git a/third_party/rust/aho-corasick/src/util/primitives.rs b/third_party/rust/aho-corasick/src/util/primitives.rs
new file mode 100644
index 0000000000..784d397171
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/primitives.rs
@@ -0,0 +1,759 @@
+/*!
+Lower level primitive types that are useful in a variety of circumstances.
+
+# Overview
+
+This list represents the principle types in this module and briefly describes
+when you might want to use them.
+
+* [`PatternID`] - A type that represents the identifier of a regex pattern.
+This is probably the most widely used type in this module (which is why it's
+also re-exported in the crate root).
+* [`StateID`] - A type the represents the identifier of a finite automaton
+state. This is used for both NFAs and DFAs, with the notable exception of
+the hybrid NFA/DFA. (The hybrid NFA/DFA uses a special purpose "lazy" state
+identifier.)
+* [`SmallIndex`] - The internal representation of both a `PatternID` and a
+`StateID`. Its purpose is to serve as a type that can index memory without
+being as big as a `usize` on 64-bit targets. The main idea behind this type
+is that there are many things in regex engines that will, in practice, never
+overflow a 32-bit integer. (For example, like the number of patterns in a regex
+or the number of states in an NFA.) Thus, a `SmallIndex` can be used to index
+memory without peppering `as` casts everywhere. Moreover, it forces callers
+to handle errors in the case where, somehow, the value would otherwise overflow
+either a 32-bit integer or a `usize` (e.g., on 16-bit targets).
+*/
+
+// The macro we use to define some types below adds methods that we don't
+// use on some of the types. There isn't much, so we just squash the warning.
+#![allow(dead_code)]
+
+use alloc::vec::Vec;
+
+use crate::util::int::{Usize, U16, U32, U64};
+
+/// A type that represents a "small" index.
+///
+/// The main idea of this type is to provide something that can index memory,
+/// but uses less memory than `usize` on 64-bit systems. Specifically, its
+/// representation is always a `u32` and has `repr(transparent)` enabled. (So
+/// it is safe to transmute between a `u32` and a `SmallIndex`.)
+///
+/// A small index is typically useful in cases where there is no practical way
+/// that the index will overflow a 32-bit integer. A good example of this is
+/// an NFA state. If you could somehow build an NFA with `2^30` states, its
+/// memory usage would be exorbitant and its runtime execution would be so
+/// slow as to be completely worthless. Therefore, this crate generally deems
+/// it acceptable to return an error if it would otherwise build an NFA that
+/// requires a slice longer than what a 32-bit integer can index. In exchange,
+/// we can use 32-bit indices instead of 64-bit indices in various places.
+///
+/// This type ensures this by providing a constructor that will return an error
+/// if its argument cannot fit into the type. This makes it much easier to
+/// handle these sorts of boundary cases that are otherwise extremely subtle.
+///
+/// On all targets, this type guarantees that its value will fit in a `u32`,
+/// `i32`, `usize` and an `isize`. This means that on 16-bit targets, for
+/// example, this type's maximum value will never overflow an `isize`,
+/// which means it will never overflow a `i16` even though its internal
+/// representation is still a `u32`.
+///
+/// The purpose for making the type fit into even signed integer types like
+/// `isize` is to guarantee that the difference between any two small indices
+/// is itself also a small index. This is useful in certain contexts, e.g.,
+/// for delta encoding.
+///
+/// # Other types
+///
+/// The following types wrap `SmallIndex` to provide a more focused use case:
+///
+/// * [`PatternID`] is for representing the identifiers of patterns.
+/// * [`StateID`] is for representing the identifiers of states in finite
+/// automata. It is used for both NFAs and DFAs.
+///
+/// # Representation
+///
+/// This type is always represented internally by a `u32` and is marked as
+/// `repr(transparent)`. Thus, this type always has the same representation as
+/// a `u32`. It is thus safe to transmute between a `u32` and a `SmallIndex`.
+///
+/// # Indexing
+///
+/// For convenience, callers may use a `SmallIndex` to index slices.
+///
+/// # Safety
+///
+/// While a `SmallIndex` is meant to guarantee that its value fits into `usize`
+/// without using as much space as a `usize` on all targets, callers must
+/// not rely on this property for safety. Callers may choose to rely on this
+/// property for correctness however. For example, creating a `SmallIndex` with
+/// an invalid value can be done in entirely safe code. This may in turn result
+/// in panics or silent logical errors.
+#[derive(
+    Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
+)]
+#[repr(transparent)]
+pub(crate) struct SmallIndex(u32);
+
+impl SmallIndex {
+    /// The maximum index value.
+    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+    pub const MAX: SmallIndex =
+        // FIXME: Use as_usize() once const functions in traits are stable.
+        SmallIndex::new_unchecked(core::i32::MAX as usize - 1);
+
+    /// The maximum index value.
+    #[cfg(target_pointer_width = "16")]
+    pub const MAX: SmallIndex =
+        SmallIndex::new_unchecked(core::isize::MAX - 1);
+
+    /// The total number of values that can be represented as a small index.
+    pub const LIMIT: usize = SmallIndex::MAX.as_usize() + 1;
+
+    /// The zero index value.
+    pub const ZERO: SmallIndex = SmallIndex::new_unchecked(0);
+
+    /// The number of bytes that a single small index uses in memory.
+    pub const SIZE: usize = core::mem::size_of::<SmallIndex>();
+
+    /// Create a new small index.
+    ///
+    /// If the given index exceeds [`SmallIndex::MAX`], then this returns
+    /// an error.
+    #[inline]
+    pub fn new(index: usize) -> Result<SmallIndex, SmallIndexError> {
+        SmallIndex::try_from(index)
+    }
+
+    /// Create a new small index without checking whether the given value
+    /// exceeds [`SmallIndex::MAX`].
+    ///
+    /// Using this routine with an invalid index value will result in
+    /// unspecified behavior, but *not* undefined behavior. In particular, an
+    /// invalid index value is likely to cause panics or possibly even silent
+    /// logical errors.
+    ///
+    /// Callers must never rely on a `SmallIndex` to be within a certain range
+    /// for memory safety.
+    #[inline]
+    pub const fn new_unchecked(index: usize) -> SmallIndex {
+        // FIXME: Use as_u32() once const functions in traits are stable.
+        SmallIndex::from_u32_unchecked(index as u32)
+    }
+
+    /// Create a new small index from a `u32` without checking whether the
+    /// given value exceeds [`SmallIndex::MAX`].
+    ///
+    /// Using this routine with an invalid index value will result in
+    /// unspecified behavior, but *not* undefined behavior. In particular, an
+    /// invalid index value is likely to cause panics or possibly even silent
+    /// logical errors.
+    ///
+    /// Callers must never rely on a `SmallIndex` to be within a certain range
+    /// for memory safety.
+    #[inline]
+    pub const fn from_u32_unchecked(index: u32) -> SmallIndex {
+        SmallIndex(index)
+    }
+
+    /// Like [`SmallIndex::new`], but panics if the given index is not valid.
+    #[inline]
+    pub fn must(index: usize) -> SmallIndex {
+        SmallIndex::new(index).expect("invalid small index")
+    }
+
+    /// Return this small index as a `usize`. This is guaranteed to never
+    /// overflow `usize`.
+    #[inline]
+    pub const fn as_usize(&self) -> usize {
+        // FIXME: Use as_usize() once const functions in traits are stable.
+        self.0 as usize
+    }
+
+    /// Return this small index as a `u64`. This is guaranteed to never
+    /// overflow.
+    #[inline]
+    pub const fn as_u64(&self) -> u64 {
+        // FIXME: Use u64::from() once const functions in traits are stable.
+        self.0 as u64
+    }
+
+    /// Return the internal `u32` of this small index. This is guaranteed to
+    /// never overflow `u32`.
+    #[inline]
+    pub const fn as_u32(&self) -> u32 {
+        self.0
+    }
+
+    /// Return the internal `u32` of this small index represented as an `i32`.
+    /// This is guaranteed to never overflow an `i32`.
+    #[inline]
+    pub const fn as_i32(&self) -> i32 {
+        // This is OK because we guarantee that our max value is <= i32::MAX.
+        self.0 as i32
+    }
+
+    /// Returns one more than this small index as a usize.
+    ///
+    /// Since a small index has constraints on its maximum value, adding `1` to
+    /// it will always fit in a `usize`, `isize`, `u32` and a `i32`.
+    #[inline]
+    pub fn one_more(&self) -> usize {
+        self.as_usize() + 1
+    }
+
+    /// Decode this small index from the bytes given using the native endian
+    /// byte order for the current target.
+    ///
+    /// If the decoded integer is not representable as a small index for the
+    /// current target, then this returns an error.
+    #[inline]
+    pub fn from_ne_bytes(
+        bytes: [u8; 4],
+    ) -> Result<SmallIndex, SmallIndexError> {
+        let id = u32::from_ne_bytes(bytes);
+        if id > SmallIndex::MAX.as_u32() {
+            return Err(SmallIndexError { attempted: u64::from(id) });
+        }
+        Ok(SmallIndex::new_unchecked(id.as_usize()))
+    }
+
+    /// Decode this small index from the bytes given using the native endian
+    /// byte order for the current target.
+    ///
+    /// This is analogous to [`SmallIndex::new_unchecked`] in that is does not
+    /// check whether the decoded integer is representable as a small index.
+    #[inline]
+    pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> SmallIndex {
+        SmallIndex::new_unchecked(u32::from_ne_bytes(bytes).as_usize())
+    }
+
+    /// Return the underlying small index integer as raw bytes in native endian
+    /// format.
+    #[inline]
+    pub fn to_ne_bytes(&self) -> [u8; 4] {
+        self.0.to_ne_bytes()
+    }
+}
+
+impl<T> core::ops::Index<SmallIndex> for [T] {
+    type Output = T;
+
+    #[inline]
+    fn index(&self, index: SmallIndex) -> &T {
+        &self[index.as_usize()]
+    }
+}
+
+impl<T> core::ops::IndexMut<SmallIndex> for [T] {
+    #[inline]
+    fn index_mut(&mut self, index: SmallIndex) -> &mut T {
+        &mut self[index.as_usize()]
+    }
+}
+
+impl<T> core::ops::Index<SmallIndex> for Vec<T> {
+    type Output = T;
+
+    #[inline]
+    fn index(&self, index: SmallIndex) -> &T {
+        &self[index.as_usize()]
+    }
+}
+
+impl<T> core::ops::IndexMut<SmallIndex> for Vec<T> {
+    #[inline]
+    fn index_mut(&mut self, index: SmallIndex) -> &mut T {
+        &mut self[index.as_usize()]
+    }
+}
+
+impl From<StateID> for SmallIndex {
+    fn from(sid: StateID) -> SmallIndex {
+        sid.0
+    }
+}
+
+impl From<PatternID> for SmallIndex {
+    fn from(pid: PatternID) -> SmallIndex {
+        pid.0
+    }
+}
+
+impl From<u8> for SmallIndex {
+    fn from(index: u8) -> SmallIndex {
+        SmallIndex::new_unchecked(usize::from(index))
+    }
+}
+
+impl TryFrom<u16> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: u16) -> Result<SmallIndex, SmallIndexError> {
+        if u32::from(index) > SmallIndex::MAX.as_u32() {
+            return Err(SmallIndexError { attempted: u64::from(index) });
+        }
+        Ok(SmallIndex::new_unchecked(index.as_usize()))
+    }
+}
+
+impl TryFrom<u32> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: u32) -> Result<SmallIndex, SmallIndexError> {
+        if index > SmallIndex::MAX.as_u32() {
+            return Err(SmallIndexError { attempted: u64::from(index) });
+        }
+        Ok(SmallIndex::new_unchecked(index.as_usize()))
+    }
+}
+
+impl TryFrom<u64> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: u64) -> Result<SmallIndex, SmallIndexError> {
+        if index > SmallIndex::MAX.as_u64() {
+            return Err(SmallIndexError { attempted: index });
+        }
+        Ok(SmallIndex::new_unchecked(index.as_usize()))
+    }
+}
+
+impl TryFrom<usize> for SmallIndex {
+    type Error = SmallIndexError;
+
+    fn try_from(index: usize) -> Result<SmallIndex, SmallIndexError> {
+        if index > SmallIndex::MAX.as_usize() {
+            return Err(SmallIndexError { attempted: index.as_u64() });
+        }
+        Ok(SmallIndex::new_unchecked(index))
+    }
+}
+
+/// This error occurs when a small index could not be constructed.
+///
+/// This occurs when given an integer exceeding the maximum small index value.
+///
+/// When the `std` feature is enabled, this implements the `Error` trait.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct SmallIndexError {
+    attempted: u64,
+}
+
+impl SmallIndexError {
+    /// Returns the value that could not be converted to a small index.
+    pub fn attempted(&self) -> u64 {
+        self.attempted
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for SmallIndexError {}
+
+impl core::fmt::Display for SmallIndexError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(
+            f,
+            "failed to create small index from {:?}, which exceeds {:?}",
+            self.attempted(),
+            SmallIndex::MAX,
+        )
+    }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct SmallIndexIter {
+    rng: core::ops::Range<usize>,
+}
+
+impl Iterator for SmallIndexIter {
+    type Item = SmallIndex;
+
+    fn next(&mut self) -> Option<SmallIndex> {
+        if self.rng.start >= self.rng.end {
+            return None;
+        }
+        let next_id = self.rng.start + 1;
+        let id = core::mem::replace(&mut self.rng.start, next_id);
+        // new_unchecked is OK since we asserted that the number of
+        // elements in this iterator will fit in an ID at construction.
+        Some(SmallIndex::new_unchecked(id))
+    }
+}
+
+macro_rules! index_type_impls {
+    ($name:ident, $err:ident, $iter:ident, $withiter:ident) => {
+        impl $name {
+            /// The maximum value.
+            pub const MAX: $name = $name(SmallIndex::MAX);
+
+            /// The total number of values that can be represented.
+            pub const LIMIT: usize = SmallIndex::LIMIT;
+
+            /// The zero value.
+            pub const ZERO: $name = $name(SmallIndex::ZERO);
+
+            /// The number of bytes that a single value uses in memory.
+            pub const SIZE: usize = SmallIndex::SIZE;
+
+            /// Create a new value that is represented by a "small index."
+            ///
+            /// If the given index exceeds the maximum allowed value, then this
+            /// returns an error.
+            #[inline]
+            pub fn new(value: usize) -> Result<$name, $err> {
+                SmallIndex::new(value).map($name).map_err($err)
+            }
+
+            /// Create a new value without checking whether the given argument
+            /// exceeds the maximum.
+            ///
+            /// Using this routine with an invalid value will result in
+            /// unspecified behavior, but *not* undefined behavior. In
+            /// particular, an invalid ID value is likely to cause panics or
+            /// possibly even silent logical errors.
+            ///
+            /// Callers must never rely on this type to be within a certain
+            /// range for memory safety.
+            #[inline]
+            pub const fn new_unchecked(value: usize) -> $name {
+                $name(SmallIndex::new_unchecked(value))
+            }
+
+            /// Create a new value from a `u32` without checking whether the
+            /// given value exceeds the maximum.
+            ///
+            /// Using this routine with an invalid value will result in
+            /// unspecified behavior, but *not* undefined behavior. In
+            /// particular, an invalid ID value is likely to cause panics or
+            /// possibly even silent logical errors.
+            ///
+            /// Callers must never rely on this type to be within a certain
+            /// range for memory safety.
+            #[inline]
+            pub const fn from_u32_unchecked(index: u32) -> $name {
+                $name(SmallIndex::from_u32_unchecked(index))
+            }
+
+            /// Like `new`, but panics if the given value is not valid.
+            #[inline]
+            pub fn must(value: usize) -> $name {
+                $name::new(value).expect(concat!(
+                    "invalid ",
+                    stringify!($name),
+                    " value"
+                ))
+            }
+
+            /// Return the internal value as a `usize`. This is guaranteed to
+            /// never overflow `usize`.
+            #[inline]
+            pub const fn as_usize(&self) -> usize {
+                self.0.as_usize()
+            }
+
+            /// Return the internal value as a `u64`. This is guaranteed to
+            /// never overflow.
+            #[inline]
+            pub const fn as_u64(&self) -> u64 {
+                self.0.as_u64()
+            }
+
+            /// Return the internal value as a `u32`. This is guaranteed to
+            /// never overflow `u32`.
+            #[inline]
+            pub const fn as_u32(&self) -> u32 {
+                self.0.as_u32()
+            }
+
+            /// Return the internal value as a `i32`. This is guaranteed to
+            /// never overflow an `i32`.
+            #[inline]
+            pub const fn as_i32(&self) -> i32 {
+                self.0.as_i32()
+            }
+
+            /// Returns one more than this value as a usize.
+            ///
+            /// Since values represented by a "small index" have constraints
+            /// on their maximum value, adding `1` to it will always fit in a
+            /// `usize`, `u32` and a `i32`.
+            #[inline]
+            pub fn one_more(&self) -> usize {
+                self.0.one_more()
+            }
+
+            /// Decode this value from the bytes given using the native endian
+            /// byte order for the current target.
+            ///
+            /// If the decoded integer is not representable as a small index
+            /// for the current target, then this returns an error.
+            #[inline]
+            pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<$name, $err> {
+                SmallIndex::from_ne_bytes(bytes).map($name).map_err($err)
+            }
+
+            /// Decode this value from the bytes given using the native endian
+            /// byte order for the current target.
+            ///
+            /// This is analogous to `new_unchecked` in that is does not check
+            /// whether the decoded integer is representable as a small index.
+            #[inline]
+            pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> $name {
+                $name(SmallIndex::from_ne_bytes_unchecked(bytes))
+            }
+
+            /// Return the underlying integer as raw bytes in native endian
+            /// format.
+            #[inline]
+            pub fn to_ne_bytes(&self) -> [u8; 4] {
+                self.0.to_ne_bytes()
+            }
+
+            /// Returns an iterator over all values from 0 up to and not
+            /// including the given length.
+            ///
+            /// If the given length exceeds this type's limit, then this
+            /// panics.
+            pub(crate) fn iter(len: usize) -> $iter {
+                $iter::new(len)
+            }
+        }
+
+        // We write our own Debug impl so that we get things like PatternID(5)
+        // instead of PatternID(SmallIndex(5)).
+        impl core::fmt::Debug for $name {
+            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                f.debug_tuple(stringify!($name)).field(&self.as_u32()).finish()
+            }
+        }
+
+        impl<T> core::ops::Index<$name> for [T] {
+            type Output = T;
+
+            #[inline]
+            fn index(&self, index: $name) -> &T {
+                &self[index.as_usize()]
+            }
+        }
+
+        impl<T> core::ops::IndexMut<$name> for [T] {
+            #[inline]
+            fn index_mut(&mut self, index: $name) -> &mut T {
+                &mut self[index.as_usize()]
+            }
+        }
+
+        impl<T> core::ops::Index<$name> for Vec<T> {
+            type Output = T;
+
+            #[inline]
+            fn index(&self, index: $name) -> &T {
+                &self[index.as_usize()]
+            }
+        }
+
+        impl<T> core::ops::IndexMut<$name> for Vec<T> {
+            #[inline]
+            fn index_mut(&mut self, index: $name) -> &mut T {
+                &mut self[index.as_usize()]
+            }
+        }
+
+        impl From<SmallIndex> for $name {
+            fn from(index: SmallIndex) -> $name {
+                $name(index)
+            }
+        }
+
+        impl From<u8> for $name {
+            fn from(value: u8) -> $name {
+                $name(SmallIndex::from(value))
+            }
+        }
+
+        impl TryFrom<u16> for $name {
+            type Error = $err;
+
+            fn try_from(value: u16) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        impl TryFrom<u32> for $name {
+            type Error = $err;
+
+            fn try_from(value: u32) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        impl TryFrom<u64> for $name {
+            type Error = $err;
+
+            fn try_from(value: u64) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        impl TryFrom<usize> for $name {
+            type Error = $err;
+
+            fn try_from(value: usize) -> Result<$name, $err> {
+                SmallIndex::try_from(value).map($name).map_err($err)
+            }
+        }
+
+        /// This error occurs when an ID could not be constructed.
+        ///
+        /// This occurs when given an integer exceeding the maximum allowed
+        /// value.
+        ///
+        /// When the `std` feature is enabled, this implements the `Error`
+        /// trait.
+        #[derive(Clone, Debug, Eq, PartialEq)]
+        pub struct $err(SmallIndexError);
+
+        impl $err {
+            /// Returns the value that could not be converted to an ID.
+            pub fn attempted(&self) -> u64 {
+                self.0.attempted()
+            }
+        }
+
+        #[cfg(feature = "std")]
+        impl std::error::Error for $err {}
+
+        impl core::fmt::Display for $err {
+            fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+                write!(
+                    f,
+                    "failed to create {} from {:?}, which exceeds {:?}",
+                    stringify!($name),
+                    self.attempted(),
+                    $name::MAX,
+                )
+            }
+        }
+
+        #[derive(Clone, Debug)]
+        pub(crate) struct $iter(SmallIndexIter);
+
+        impl $iter {
+            fn new(len: usize) -> $iter {
+                assert!(
+                    len <= $name::LIMIT,
+                    "cannot create iterator for {} when number of \
+                     elements exceed {:?}",
+                    stringify!($name),
+                    $name::LIMIT,
+                );
+                $iter(SmallIndexIter { rng: 0..len })
+            }
+        }
+
+        impl Iterator for $iter {
+            type Item = $name;
+
+            fn next(&mut self) -> Option<$name> {
+                self.0.next().map($name)
+            }
+        }
+
+        /// An iterator adapter that is like std::iter::Enumerate, but attaches
+        /// small index values instead. It requires `ExactSizeIterator`. At
+        /// construction, it ensures that the index of each element in the
+        /// iterator is representable in the corresponding small index type.
+        #[derive(Clone, Debug)]
+        pub(crate) struct $withiter<I> {
+            it: I,
+            ids: $iter,
+        }
+
+        impl<I: Iterator + ExactSizeIterator> $withiter<I> {
+            fn new(it: I) -> $withiter<I> {
+                let ids = $name::iter(it.len());
+                $withiter { it, ids }
+            }
+        }
+
+        impl<I: Iterator + ExactSizeIterator> Iterator for $withiter<I> {
+            type Item = ($name, I::Item);
+
+            fn next(&mut self) -> Option<($name, I::Item)> {
+                let item = self.it.next()?;
+                // Number of elements in this iterator must match, according
+                // to contract of ExactSizeIterator.
+                let id = self.ids.next().unwrap();
+                Some((id, item))
+            }
+        }
+    };
+}
+
+/// The identifier of a pattern in an Aho-Corasick automaton.
+///
+/// It is represented by a `u32` even on 64-bit systems in order to conserve
+/// space. Namely, on all targets, this type guarantees that its value will
+/// fit in a `u32`, `i32`, `usize` and an `isize`. This means that on 16-bit
+/// targets, for example, this type's maximum value will never overflow an
+/// `isize`, which means it will never overflow a `i16` even though its
+/// internal representation is still a `u32`.
+///
+/// # Safety
+///
+/// While a `PatternID` is meant to guarantee that its value fits into `usize`
+/// without using as much space as a `usize` on all targets, callers must
+/// not rely on this property for safety. Callers may choose to rely on this
+/// property for correctness however. For example, creating a `StateID` with an
+/// invalid value can be done in entirely safe code. This may in turn result in
+/// panics or silent logical errors.
+#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
+#[repr(transparent)]
+pub struct PatternID(SmallIndex);
+
+/// The identifier of a finite automaton state.
+///
+/// It is represented by a `u32` even on 64-bit systems in order to conserve
+/// space. Namely, on all targets, this type guarantees that its value will
+/// fit in a `u32`, `i32`, `usize` and an `isize`. This means that on 16-bit
+/// targets, for example, this type's maximum value will never overflow an
+/// `isize`, which means it will never overflow a `i16` even though its
+/// internal representation is still a `u32`.
+///
+/// # Safety
+///
+/// While a `StateID` is meant to guarantee that its value fits into `usize`
+/// without using as much space as a `usize` on all targets, callers must
+/// not rely on this property for safety. Callers may choose to rely on this
+/// property for correctness however. For example, creating a `StateID` with an
+/// invalid value can be done in entirely safe code. This may in turn result in
+/// panics or silent logical errors.
+#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
+#[repr(transparent)]
+pub struct StateID(SmallIndex);
+
+index_type_impls!(PatternID, PatternIDError, PatternIDIter, WithPatternIDIter);
+index_type_impls!(StateID, StateIDError, StateIDIter, WithStateIDIter);
+
+/// A utility trait that defines a couple of adapters for making it convenient
+/// to access indices as "small index" types. We require ExactSizeIterator so
+/// that iterator construction can do a single check to make sure the index of
+/// each element is representable by its small index type.
+pub(crate) trait IteratorIndexExt: Iterator {
+    fn with_pattern_ids(self) -> WithPatternIDIter<Self>
+    where
+        Self: Sized + ExactSizeIterator,
+    {
+        WithPatternIDIter::new(self)
+    }
+
+    fn with_state_ids(self) -> WithStateIDIter<Self>
+    where
+        Self: Sized + ExactSizeIterator,
+    {
+        WithStateIDIter::new(self)
+    }
+}
+
+impl<I: Iterator> IteratorIndexExt for I {}
diff --git a/third_party/rust/aho-corasick/src/util/remapper.rs b/third_party/rust/aho-corasick/src/util/remapper.rs
new file mode 100644
index 0000000000..7c47a082cd
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/remapper.rs
@@ -0,0 +1,214 @@
+use alloc::vec::Vec;
+
+use crate::{nfa::noncontiguous, util::primitives::StateID};
+
+/// Remappable is a tightly coupled abstraction that facilitates remapping
+/// state identifiers in DFAs.
+///
+/// The main idea behind remapping state IDs is that DFAs often need to check
+/// if a certain state is a "special" state of some kind (like a match state)
+/// during a search. Since this is extremely perf critical code, we want this
+/// check to be as fast as possible. Partitioning state IDs into, for example,
+/// into "non-match" and "match" states means one can tell if a state is a
+/// match state via a simple comparison of the state ID.
+///
+/// The issue is that during the DFA construction process, it's not
+/// particularly easy to partition the states. Instead, the simplest thing is
+/// to often just do a pass over all of the states and shuffle them into their
+/// desired partitionings. To do that, we need a mechanism for swapping states.
+/// Hence, this abstraction.
+///
+/// Normally, for such little code, I would just duplicate it. But this is a
+/// key optimization and the implementation is a bit subtle. So the abstraction
+/// is basically a ham-fisted attempt at DRY. The only place we use this is in
+/// the dense and one-pass DFAs.
+///
+/// See also src/dfa/special.rs for a more detailed explanation of how dense
+/// DFAs are partitioned.
+pub(crate) trait Remappable: core::fmt::Debug {
+    /// Return the total number of states.
+    fn state_len(&self) -> usize;
+
+    /// Swap the states pointed to by the given IDs. The underlying finite
+    /// state machine should be mutated such that all of the transitions in
+    /// `id1` are now in the memory region where the transitions for `id2`
+    /// were, and all of the transitions in `id2` are now in the memory region
+    /// where the transitions for `id1` were.
+    ///
+    /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`.
+    ///
+    /// It is expected that, after calling this, the underlying state machine
+    /// will be left in an inconsistent state, since any other transitions
+    /// pointing to, e.g., `id1` need to be updated to point to `id2`, since
+    /// that's where `id1` moved to.
+    ///
+    /// In order to "fix" the underlying inconsistent state, a `Remapper`
+    /// should be used to guarantee that `remap` is called at the appropriate
+    /// time.
+    fn swap_states(&mut self, id1: StateID, id2: StateID);
+
+    /// This must remap every single state ID in the underlying value according
+    /// to the function given. For example, in a DFA, this should remap every
+    /// transition and every starting state ID.
+    fn remap(&mut self, map: impl Fn(StateID) -> StateID);
+}
+
+/// Remapper is an abstraction the manages the remapping of state IDs in a
+/// finite state machine. This is useful when one wants to shuffle states into
+/// different positions in the machine.
+///
+/// One of the key complexities this manages is the ability to correctly move
+/// one state multiple times.
+///
+/// Once shuffling is complete, `remap` must be called, which will rewrite
+/// all pertinent transitions to updated state IDs. Neglecting to call `remap`
+/// will almost certainly result in a corrupt machine.
+#[derive(Debug)]
+pub(crate) struct Remapper {
+    /// A map from the index of a state to its pre-multiplied identifier.
+    ///
+    /// When a state is swapped with another, then their corresponding
+    /// locations in this map are also swapped. Thus, its new position will
+    /// still point to its old pre-multiplied StateID.
+    ///
+    /// While there is a bit more to it, this then allows us to rewrite the
+    /// state IDs in a DFA's transition table in a single pass. This is done
+    /// by iterating over every ID in this map, then iterating over each
+    /// transition for the state at that ID and re-mapping the transition from
+    /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position
+    /// in this map where `old_id` *started*, and set it to where it ended up
+    /// after all swaps have been completed.
+    map: Vec<StateID>,
+    /// A way to map indices to state IDs (and back).
+    idx: IndexMapper,
+}
+
+impl Remapper {
+    /// Create a new remapper from the given remappable implementation. The
+    /// remapper can then be used to swap states. The remappable value given
+    /// here must the same one given to `swap` and `remap`.
+    ///
+    /// The given stride should be the stride of the transition table expressed
+    /// as a power of 2. This stride is used to map between state IDs and state
+    /// indices. If state IDs and state indices are equivalent, then provide
+    /// a `stride2` of `0`, which acts as an identity.
+    pub(crate) fn new(r: &impl Remappable, stride2: usize) -> Remapper {
+        let idx = IndexMapper { stride2 };
+        let map = (0..r.state_len()).map(|i| idx.to_state_id(i)).collect();
+        Remapper { map, idx }
+    }
+
+    /// Swap two states. Once this is called, callers must follow through to
+    /// call `remap`, or else it's possible for the underlying remappable
+    /// value to be in a corrupt state.
+    pub(crate) fn swap(
+        &mut self,
+        r: &mut impl Remappable,
+        id1: StateID,
+        id2: StateID,
+    ) {
+        if id1 == id2 {
+            return;
+        }
+        r.swap_states(id1, id2);
+        self.map.swap(self.idx.to_index(id1), self.idx.to_index(id2));
+    }
+
+    /// Complete the remapping process by rewriting all state IDs in the
+    /// remappable value according to the swaps performed.
+    pub(crate) fn remap(mut self, r: &mut impl Remappable) {
+        // Update the map to account for states that have been swapped
+        // multiple times. For example, if (A, C) and (C, G) are swapped, then
+        // transitions previously pointing to A should now point to G. But if
+        // we don't update our map, they will erroneously be set to C. All we
+        // do is follow the swaps in our map until we see our original state
+        // ID.
+        //
+        // The intuition here is to think about how changes are made to the
+        // map: only through pairwise swaps. That means that starting at any
+        // given state, it is always possible to find the loop back to that
+        // state by following the swaps represented in the map (which might be
+        // 0 swaps).
+        //
+        // We are also careful to clone the map before starting in order to
+        // freeze it. We use the frozen map to find our loops, since we need to
+        // update our map as well. Without freezing it, our updates could break
+        // the loops referenced above and produce incorrect results.
+        let oldmap = self.map.clone();
+        for i in 0..r.state_len() {
+            let cur_id = self.idx.to_state_id(i);
+            let mut new_id = oldmap[i];
+            if cur_id == new_id {
+                continue;
+            }
+            loop {
+                let id = oldmap[self.idx.to_index(new_id)];
+                if cur_id == id {
+                    self.map[i] = new_id;
+                    break;
+                }
+                new_id = id;
+            }
+        }
+        r.remap(|sid| self.map[self.idx.to_index(sid)]);
+    }
+}
+
+/// A simple type for mapping between state indices and state IDs.
+///
+/// The reason why this exists is because state IDs are "premultiplied" in a
+/// DFA. That is, in order to get to the transitions for a particular state,
+/// one need only use the state ID as-is, instead of having to multiply it by
+/// transition table's stride.
+///
+/// The downside of this is that it's inconvenient to map between state IDs
+/// using a dense map, e.g., Vec<StateID>. That's because state IDs look like
+/// `0`, `stride`, `2*stride`, `3*stride`, etc., instead of `0`, `1`, `2`, `3`,
+/// etc.
+///
+/// Since our state IDs are premultiplied, we can convert back-and-forth
+/// between IDs and indices by simply unmultiplying the IDs and multiplying the
+/// indices.
+///
+/// Note that for a sparse NFA, state IDs and indices are equivalent. In this
+/// case, we set the stride of the index mapped to be `0`, which acts as an
+/// identity.
+#[derive(Debug)]
+struct IndexMapper {
+    /// The power of 2 corresponding to the stride of the corresponding
+    /// transition table. 'id >> stride2' de-multiplies an ID while 'index <<
+    /// stride2' pre-multiplies an index to an ID.
+    stride2: usize,
+}
+
+impl IndexMapper {
+    /// Convert a state ID to a state index.
+    fn to_index(&self, id: StateID) -> usize {
+        id.as_usize() >> self.stride2
+    }
+
+    /// Convert a state index to a state ID.
+    fn to_state_id(&self, index: usize) -> StateID {
+        // CORRECTNESS: If the given index is not valid, then it is not
+        // required for this to panic or return a valid state ID. We'll "just"
+        // wind up with panics or silent logic errors at some other point. But
+        // this is OK because if Remappable::state_len is correct and so is
+        // 'to_index', then all inputs to 'to_state_id' should be valid indices
+        // and thus transform into valid state IDs.
+        StateID::new_unchecked(index << self.stride2)
+    }
+}
+
+impl Remappable for noncontiguous::NFA {
+    fn state_len(&self) -> usize {
+        noncontiguous::NFA::states(self).len()
+    }
+
+    fn swap_states(&mut self, id1: StateID, id2: StateID) {
+        noncontiguous::NFA::swap_states(self, id1, id2)
+    }
+
+    fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
+        noncontiguous::NFA::remap(self, map)
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/search.rs b/third_party/rust/aho-corasick/src/util/search.rs
new file mode 100644
index 0000000000..59b7035e1f
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/search.rs
@@ -0,0 +1,1148 @@
+use core::ops::{Range, RangeBounds};
+
+use crate::util::primitives::PatternID;
+
+/// The configuration and the haystack to use for an Aho-Corasick search.
+///
+/// When executing a search, there are a few parameters one might want to
+/// configure:
+///
+/// * The haystack to search, provided to the [`Input::new`] constructor. This
+/// is the only required parameter.
+/// * The span _within_ the haystack to limit a search to. (The default
+/// is the entire haystack.) This is configured via [`Input::span`] or
+/// [`Input::range`].
+/// * Whether to run an unanchored (matches can occur anywhere after the
+/// start of the search) or anchored (matches can only occur beginning at
+/// the start of the search) search. Unanchored search is the default. This is
+/// configured via [`Input::anchored`].
+/// * Whether to quit the search as soon as a match has been found, regardless
+/// of the [`MatchKind`] that the searcher was built with. This is configured
+/// via [`Input::earliest`].
+///
+/// For most cases, the defaults for all optional parameters are appropriate.
+/// The utility of this type is that it keeps the default or common case simple
+/// while permitting tweaking parameters in more niche use cases while reusing
+/// the same search APIs.
+///
+/// # Valid bounds and search termination
+///
+/// An `Input` permits setting the bounds of a search via either
+/// [`Input::span`] or [`Input::range`]. The bounds set must be valid, or
+/// else a panic will occur. Bounds are valid if and only if:
+///
+/// * The bounds represent a valid range into the input's haystack.
+/// * **or** the end bound is a valid ending bound for the haystack *and*
+/// the start bound is exactly one greater than the end bound.
+///
+/// In the latter case, [`Input::is_done`] will return true and indicates any
+/// search receiving such an input should immediately return with no match.
+///
+/// Other than representing "search is complete," the `Input::span` and
+/// `Input::range` APIs are never necessary. Instead, callers can slice the
+/// haystack instead, e.g., with `&haystack[start..end]`. With that said, they
+/// can be more convenient than slicing because the match positions reported
+/// when using `Input::span` or `Input::range` are in terms of the original
+/// haystack. If you instead use `&haystack[start..end]`, then you'll need to
+/// add `start` to any match position returned in order for it to be a correct
+/// index into `haystack`.
+///
+/// # Example: `&str` and `&[u8]` automatically convert to an `Input`
+///
+/// There is a `From<&T> for Input` implementation for all `T: AsRef<[u8]>`.
+/// Additionally, the [`AhoCorasick`](crate::AhoCorasick) search APIs accept
+/// a `Into<Input>`. These two things combined together mean you can provide
+/// things like `&str` and `&[u8]` to search APIs when the defaults are
+/// suitable, but also an `Input` when they're not. For example:
+///
+/// ```
+/// use aho_corasick::{AhoCorasick, Anchored, Input, Match, StartKind};
+///
+/// // Build a searcher that supports both unanchored and anchored modes.
+/// let ac = AhoCorasick::builder()
+///     .start_kind(StartKind::Both)
+///     .build(&["abcd", "b"])
+///     .unwrap();
+/// let haystack = "abcd";
+///
+/// // A search using default parameters is unanchored. With standard
+/// // semantics, this finds `b` first.
+/// assert_eq!(
+///     Some(Match::must(1, 1..2)),
+///     ac.find(haystack),
+/// );
+/// // Using the same 'find' routine, we can provide an 'Input' explicitly
+/// // that is configured to do an anchored search. Since 'b' doesn't start
+/// // at the beginning of the search, it is not reported as a match.
+/// assert_eq!(
+///     Some(Match::must(0, 0..4)),
+///     ac.find(Input::new(haystack).anchored(Anchored::Yes)),
+/// );
+/// ```
+#[derive(Clone)]
+pub struct Input<'h> {
+    haystack: &'h [u8],
+    span: Span,
+    anchored: Anchored,
+    earliest: bool,
+}
+
+impl<'h> Input<'h> {
+    /// Create a new search configuration for the given haystack.
+    #[inline]
+    pub fn new<H: ?Sized + AsRef<[u8]>>(haystack: &'h H) -> Input<'h> {
+        Input {
+            haystack: haystack.as_ref(),
+            span: Span { start: 0, end: haystack.as_ref().len() },
+            anchored: Anchored::No,
+            earliest: false,
+        }
+    }
+
+    /// Set the span for this search.
+    ///
+    /// This routine is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`. To provide anything supported by range
+    /// syntax, use the [`Input::range`] method.
+    ///
+    /// The default span is the entire haystack.
+    ///
+    /// Note that [`Input::range`] overrides this method and vice versa.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given span does not correspond to valid bounds in
+    /// the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// This example shows how the span of the search can impact whether a
+    /// match is reported or not.
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Input, MatchKind};
+    ///
+    /// let patterns = &["b", "abcd", "abc"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    /// let input = Input::new(haystack).span(0..3);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// // Without the span stopping the search early, 'abcd' would be reported
+    /// // because it is the correct leftmost-first match.
+    /// assert_eq!("abc", &haystack[mat.span()]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn span<S: Into<Span>>(mut self, span: S) -> Input<'h> {
+        self.set_span(span);
+        self
+    }
+
+    /// Like `Input::span`, but accepts any range instead.
+    ///
+    /// The default range is the entire haystack.
+    ///
+    /// Note that [`Input::span`] overrides this method and vice versa.
+    ///
+    /// # Panics
+    ///
+    /// This routine will panic if the given range could not be converted
+    /// to a valid [`Range`]. For example, this would panic when given
+    /// `0..=usize::MAX` since it cannot be represented using a half-open
+    /// interval in terms of `usize`.
+    ///
+    /// This routine also panics if the given range does not correspond to
+    /// valid bounds in the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    ///
+    /// let input = Input::new("foobar").range(2..=4);
+    /// assert_eq!(2..5, input.get_range());
+    /// ```
+    #[inline]
+    pub fn range<R: RangeBounds<usize>>(mut self, range: R) -> Input<'h> {
+        self.set_range(range);
+        self
+    }
+
+    /// Sets the anchor mode of a search.
+    ///
+    /// When a search is anchored (via [`Anchored::Yes`]), a match must begin
+    /// at the start of a search. When a search is not anchored (that's
+    /// [`Anchored::No`]), searchers will look for a match anywhere in the
+    /// haystack.
+    ///
+    /// By default, the anchored mode is [`Anchored::No`].
+    ///
+    /// # Support for anchored searches
+    ///
+    /// Anchored or unanchored searches might not always be available,
+    /// depending on the type of searcher used and its configuration:
+    ///
+    /// * [`noncontiguous::NFA`](crate::nfa::noncontiguous::NFA) always
+    /// supports both unanchored and anchored searches.
+    /// * [`contiguous::NFA`](crate::nfa::contiguous::NFA) always supports both
+    /// unanchored and anchored searches.
+    /// * [`dfa::DFA`](crate::dfa::DFA) supports only unanchored
+    /// searches by default.
+    /// [`dfa::Builder::start_kind`](crate::dfa::Builder::start_kind) can
+    /// be used to change the default to supporting both kinds of searches
+    /// or even just anchored searches.
+    /// * [`AhoCorasick`](crate::AhoCorasick) inherits the same setup as a
+    /// `DFA`. Namely, it only supports unanchored searches by default, but
+    /// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind)
+    /// can change this.
+    ///
+    /// If you try to execute a search using a `try_` ("fallible") method with
+    /// an unsupported anchor mode, then an error will be returned. For calls
+    /// to infallible search methods, a panic will result.
+    ///
+    /// # Example
+    ///
+    /// This demonstrates the differences between an anchored search and
+    /// an unanchored search. Notice that we build our `AhoCorasick` searcher
+    /// with [`StartKind::Both`] so that it supports both unanchored and
+    /// anchored searches simultaneously.
+    ///
+    /// ```
+    /// use aho_corasick::{
+    ///     AhoCorasick, Anchored, Input, MatchKind, StartKind,
+    /// };
+    ///
+    /// let patterns = &["bcd"];
+    /// let haystack = "abcd";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .start_kind(StartKind::Both)
+    ///     .build(patterns)
+    ///     .unwrap();
+    ///
+    /// // Note that 'Anchored::No' is the default, so it doesn't need to
+    /// // be explicitly specified here.
+    /// let input = Input::new(haystack);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// assert_eq!("bcd", &haystack[mat.span()]);
+    ///
+    /// // While 'bcd' occurs in the haystack, it does not begin where our
+    /// // search begins, so no match is found.
+    /// let input = Input::new(haystack).anchored(Anchored::Yes);
+    /// assert_eq!(None, ac.try_find(input)?);
+    ///
+    /// // However, if we start our search where 'bcd' starts, then we will
+    /// // find a match.
+    /// let input = Input::new(haystack).range(1..).anchored(Anchored::Yes);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// assert_eq!("bcd", &haystack[mat.span()]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn anchored(mut self, mode: Anchored) -> Input<'h> {
+        self.set_anchored(mode);
+        self
+    }
+
+    /// Whether to execute an "earliest" search or not.
+    ///
+    /// When running a non-overlapping search, an "earliest" search will
+    /// return the match location as early as possible. For example, given
+    /// the patterns `abc` and `b`, and a haystack of `abc`, a normal
+    /// leftmost-first search will return `abc` as a match. But an "earliest"
+    /// search will return as soon as it is known that a match occurs, which
+    /// happens once `b` is seen.
+    ///
+    /// Note that when using [`MatchKind::Standard`], the "earliest" option
+    /// has no effect since standard semantics are already "earliest." Note
+    /// also that this has no effect in overlapping searches, since overlapping
+    /// searches also use standard semantics and report all possible matches.
+    ///
+    /// This is disabled by default.
+    ///
+    /// # Example
+    ///
+    /// This example shows the difference between "earliest" searching and
+    /// normal leftmost searching.
+    ///
+    /// ```
+    /// use aho_corasick::{AhoCorasick, Anchored, Input, MatchKind, StartKind};
+    ///
+    /// let patterns = &["abc", "b"];
+    /// let haystack = "abc";
+    ///
+    /// let ac = AhoCorasick::builder()
+    ///     .match_kind(MatchKind::LeftmostFirst)
+    ///     .build(patterns)
+    ///     .unwrap();
+    ///
+    /// // The normal leftmost-first match.
+    /// let input = Input::new(haystack);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// assert_eq!("abc", &haystack[mat.span()]);
+    ///
+    /// // The "earliest" possible match, even if it isn't leftmost-first.
+    /// let input = Input::new(haystack).earliest(true);
+    /// let mat = ac.try_find(input)?.expect("should have a match");
+    /// assert_eq!("b", &haystack[mat.span()]);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn earliest(mut self, yes: bool) -> Input<'h> {
+        self.set_earliest(yes);
+        self
+    }
+
+    /// Set the span for this search configuration.
+    ///
+    /// This is like the [`Input::span`] method, except this mutates the
+    /// span in place.
+    ///
+    /// This routine is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given span does not correspond to valid bounds in
+    /// the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_span(2..4);
+    /// assert_eq!(2..4, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_span<S: Into<Span>>(&mut self, span: S) {
+        let span = span.into();
+        assert!(
+            span.end <= self.haystack.len()
+                && span.start <= span.end.wrapping_add(1),
+            "invalid span {:?} for haystack of length {}",
+            span,
+            self.haystack.len(),
+        );
+        self.span = span;
+    }
+
+    /// Set the span for this search configuration given any range.
+    ///
+    /// This is like the [`Input::range`] method, except this mutates the
+    /// span in place.
+    ///
+    /// # Panics
+    ///
+    /// This routine will panic if the given range could not be converted
+    /// to a valid [`Range`]. For example, this would panic when given
+    /// `0..=usize::MAX` since it cannot be represented using a half-open
+    /// interval in terms of `usize`.
+    ///
+    /// This routine also panics if the given range does not correspond to
+    /// valid bounds in the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_range(2..=4);
+    /// assert_eq!(2..5, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_range<R: RangeBounds<usize>>(&mut self, range: R) {
+        use core::ops::Bound;
+
+        // It's a little weird to convert ranges into spans, and then spans
+        // back into ranges when we actually slice the haystack. Because
+        // of that process, we always represent everything as a half-open
+        // internal. Therefore, handling things like m..=n is a little awkward.
+        let start = match range.start_bound() {
+            Bound::Included(&i) => i,
+            // Can this case ever happen? Range syntax doesn't support it...
+            Bound::Excluded(&i) => i.checked_add(1).unwrap(),
+            Bound::Unbounded => 0,
+        };
+        let end = match range.end_bound() {
+            Bound::Included(&i) => i.checked_add(1).unwrap(),
+            Bound::Excluded(&i) => i,
+            Bound::Unbounded => self.haystack().len(),
+        };
+        self.set_span(Span { start, end });
+    }
+
+    /// Set the starting offset for the span for this search configuration.
+    ///
+    /// This is a convenience routine for only mutating the start of a span
+    /// without having to set the entire span.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given span does not correspond to valid bounds in
+    /// the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_start(5);
+    /// assert_eq!(5..6, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_start(&mut self, start: usize) {
+        self.set_span(Span { start, ..self.get_span() });
+    }
+
+    /// Set the ending offset for the span for this search configuration.
+    ///
+    /// This is a convenience routine for only mutating the end of a span
+    /// without having to set the entire span.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the given span does not correspond to valid bounds in
+    /// the haystack or the termination of a search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// input.set_end(5);
+    /// assert_eq!(0..5, input.get_range());
+    /// ```
+    #[inline]
+    pub fn set_end(&mut self, end: usize) {
+        self.set_span(Span { end, ..self.get_span() });
+    }
+
+    /// Set the anchor mode of a search.
+    ///
+    /// This is like [`Input::anchored`], except it mutates the search
+    /// configuration in place.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::{Anchored, Input};
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(Anchored::No, input.get_anchored());
+    ///
+    /// input.set_anchored(Anchored::Yes);
+    /// assert_eq!(Anchored::Yes, input.get_anchored());
+    /// ```
+    #[inline]
+    pub fn set_anchored(&mut self, mode: Anchored) {
+        self.anchored = mode;
+    }
+
+    /// Set whether the search should execute in "earliest" mode or not.
+    ///
+    /// This is like [`Input::earliest`], except it mutates the search
+    /// configuration in place.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert!(!input.get_earliest());
+    /// input.set_earliest(true);
+    /// assert!(input.get_earliest());
+    /// ```
+    #[inline]
+    pub fn set_earliest(&mut self, yes: bool) {
+        self.earliest = yes;
+    }
+
+    /// Return a borrow of the underlying haystack as a slice of bytes.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(b"foobar", input.haystack());
+    /// ```
+    #[inline]
+    pub fn haystack(&self) -> &[u8] {
+        self.haystack
+    }
+
+    /// Return the start position of this search.
+    ///
+    /// This is a convenience routine for `search.get_span().start()`.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(0, input.start());
+    ///
+    /// let input = Input::new("foobar").span(2..4);
+    /// assert_eq!(2, input.start());
+    /// ```
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.get_span().start
+    }
+
+    /// Return the end position of this search.
+    ///
+    /// This is a convenience routine for `search.get_span().end()`.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(6, input.end());
+    ///
+    /// let input = Input::new("foobar").span(2..4);
+    /// assert_eq!(4, input.end());
+    /// ```
+    #[inline]
+    pub fn end(&self) -> usize {
+        self.get_span().end
+    }
+
+    /// Return the span for this search configuration.
+    ///
+    /// If one was not explicitly set, then the span corresponds to the entire
+    /// range of the haystack.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::{Input, Span};
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(Span { start: 0, end: 6 }, input.get_span());
+    /// ```
+    #[inline]
+    pub fn get_span(&self) -> Span {
+        self.span
+    }
+
+    /// Return the span as a range for this search configuration.
+    ///
+    /// If one was not explicitly set, then the span corresponds to the entire
+    /// range of the haystack.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert_eq!(0..6, input.get_range());
+    /// ```
+    #[inline]
+    pub fn get_range(&self) -> Range<usize> {
+        self.get_span().range()
+    }
+
+    /// Return the anchored mode for this search configuration.
+    ///
+    /// If no anchored mode was set, then it defaults to [`Anchored::No`].
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::{Anchored, Input};
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert_eq!(Anchored::No, input.get_anchored());
+    ///
+    /// input.set_anchored(Anchored::Yes);
+    /// assert_eq!(Anchored::Yes, input.get_anchored());
+    /// ```
+    #[inline]
+    pub fn get_anchored(&self) -> Anchored {
+        self.anchored
+    }
+
+    /// Return whether this search should execute in "earliest" mode.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let input = Input::new("foobar");
+    /// assert!(!input.get_earliest());
+    /// ```
+    #[inline]
+    pub fn get_earliest(&self) -> bool {
+        self.earliest
+    }
+
+    /// Return true if this input has been exhausted, which in turn means all
+    /// subsequent searches will return no matches.
+    ///
+    /// This occurs precisely when the start position of this search is greater
+    /// than the end position of the search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Input;
+    ///
+    /// let mut input = Input::new("foobar");
+    /// assert!(!input.is_done());
+    /// input.set_start(6);
+    /// assert!(!input.is_done());
+    /// input.set_start(7);
+    /// assert!(input.is_done());
+    /// ```
+    #[inline]
+    pub fn is_done(&self) -> bool {
+        self.get_span().start > self.get_span().end
+    }
+}
+
+impl<'h> core::fmt::Debug for Input<'h> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        let mut fmter = f.debug_struct("Input");
+        match core::str::from_utf8(self.haystack()) {
+            Ok(nice) => fmter.field("haystack", &nice),
+            Err(_) => fmter.field("haystack", &self.haystack()),
+        }
+        .field("span", &self.span)
+        .field("anchored", &self.anchored)
+        .field("earliest", &self.earliest)
+        .finish()
+    }
+}
+
+impl<'h, H: ?Sized + AsRef<[u8]>> From<&'h H> for Input<'h> {
+    #[inline]
+    fn from(haystack: &'h H) -> Input<'h> {
+        Input::new(haystack)
+    }
+}
+
+/// A representation of a range in a haystack.
+///
+/// A span corresponds to the starting and ending _byte offsets_ of a
+/// contiguous region of bytes. The starting offset is inclusive while the
+/// ending offset is exclusive. That is, a span is a half-open interval.
+///
+/// A span is used to report the offsets of a match, but it is also used to
+/// convey which region of a haystack should be searched via routines like
+/// [`Input::span`].
+///
+/// This is basically equivalent to a `std::ops::Range<usize>`, except this
+/// type implements `Copy` which makes it more ergonomic to use in the context
+/// of this crate. Indeed, `Span` exists only because `Range<usize>` does
+/// not implement `Copy`. Like a range, this implements `Index` for `[u8]`
+/// and `str`, and `IndexMut` for `[u8]`. For convenience, this also impls
+/// `From<Range>`, which means things like `Span::from(5..10)` work.
+///
+/// There are no constraints on the values of a span. It is, for example, legal
+/// to create a span where `start > end`.
+#[derive(Clone, Copy, Eq, Hash, PartialEq)]
+pub struct Span {
+    /// The start offset of the span, inclusive.
+    pub start: usize,
+    /// The end offset of the span, exclusive.
+    pub end: usize,
+}
+
+impl Span {
+    /// Returns this span as a range.
+    #[inline]
+    pub fn range(&self) -> Range<usize> {
+        Range::from(*self)
+    }
+
+    /// Returns true when this span is empty. That is, when `start >= end`.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.start >= self.end
+    }
+
+    /// Returns the length of this span.
+    ///
+    /// This returns `0` in precisely the cases that `is_empty` returns `true`.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.end.saturating_sub(self.start)
+    }
+
+    /// Returns true when the given offset is contained within this span.
+    ///
+    /// Note that an empty span contains no offsets and will always return
+    /// false.
+    #[inline]
+    pub fn contains(&self, offset: usize) -> bool {
+        !self.is_empty() && self.start <= offset && offset <= self.end
+    }
+
+    /// Returns a new span with `offset` added to this span's `start` and `end`
+    /// values.
+    #[inline]
+    pub fn offset(&self, offset: usize) -> Span {
+        Span { start: self.start + offset, end: self.end + offset }
+    }
+}
+
+impl core::fmt::Debug for Span {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "{}..{}", self.start, self.end)
+    }
+}
+
+impl core::ops::Index<Span> for [u8] {
+    type Output = [u8];
+
+    #[inline]
+    fn index(&self, index: Span) -> &[u8] {
+        &self[index.range()]
+    }
+}
+
+impl core::ops::IndexMut<Span> for [u8] {
+    #[inline]
+    fn index_mut(&mut self, index: Span) -> &mut [u8] {
+        &mut self[index.range()]
+    }
+}
+
+impl core::ops::Index<Span> for str {
+    type Output = str;
+
+    #[inline]
+    fn index(&self, index: Span) -> &str {
+        &self[index.range()]
+    }
+}
+
+impl From<Range<usize>> for Span {
+    #[inline]
+    fn from(range: Range<usize>) -> Span {
+        Span { start: range.start, end: range.end }
+    }
+}
+
+impl From<Span> for Range<usize> {
+    #[inline]
+    fn from(span: Span) -> Range<usize> {
+        Range { start: span.start, end: span.end }
+    }
+}
+
+impl PartialEq<Range<usize>> for Span {
+    #[inline]
+    fn eq(&self, range: &Range<usize>) -> bool {
+        self.start == range.start && self.end == range.end
+    }
+}
+
+impl PartialEq<Span> for Range<usize> {
+    #[inline]
+    fn eq(&self, span: &Span) -> bool {
+        self.start == span.start && self.end == span.end
+    }
+}
+
+/// The type of anchored search to perform.
+///
+/// If an Aho-Corasick searcher does not support the anchored mode selected,
+/// then the search will return an error or panic, depending on whether a
+/// fallible or an infallible routine was called.
+#[non_exhaustive]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Anchored {
+    /// Run an unanchored search. This means a match may occur anywhere at or
+    /// after the start position of the search up until the end position of the
+    /// search.
+    No,
+    /// Run an anchored search. This means that a match must begin at the start
+    /// position of the search and end before the end position of the search.
+    Yes,
+}
+
+impl Anchored {
+    /// Returns true if and only if this anchor mode corresponds to an anchored
+    /// search.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use aho_corasick::Anchored;
+    ///
+    /// assert!(!Anchored::No.is_anchored());
+    /// assert!(Anchored::Yes.is_anchored());
+    /// ```
+    #[inline]
+    pub fn is_anchored(&self) -> bool {
+        matches!(*self, Anchored::Yes)
+    }
+}
+
+/// A representation of a match reported by an Aho-Corasick searcher.
+///
+/// A match has two essential pieces of information: the [`PatternID`] that
+/// matches, and the [`Span`] of the match in a haystack.
+///
+/// The pattern is identified by an ID, which corresponds to its position
+/// (starting from `0`) relative to other patterns used to construct the
+/// corresponding searcher. If only a single pattern is provided, then all
+/// matches are guaranteed to have a pattern ID of `0`.
+///
+/// Every match reported by a searcher guarantees that its span has its start
+/// offset as less than or equal to its end offset.
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
+pub struct Match {
+    /// The pattern ID.
+    pattern: PatternID,
+    /// The underlying match span.
+    span: Span,
+}
+
+impl Match {
+    /// Create a new match from a pattern ID and a span.
+    ///
+    /// This constructor is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `end < start`.
+    ///
+    /// # Example
+    ///
+    /// This shows how to create a match for the first pattern in an
+    /// Aho-Corasick searcher using convenient range syntax.
+    ///
+    /// ```
+    /// use aho_corasick::{Match, PatternID};
+    ///
+    /// let m = Match::new(PatternID::ZERO, 5..10);
+    /// assert_eq!(0, m.pattern().as_usize());
+    /// assert_eq!(5, m.start());
+    /// assert_eq!(10, m.end());
+    /// ```
+    #[inline]
+    pub fn new<S: Into<Span>>(pattern: PatternID, span: S) -> Match {
+        let span = span.into();
+        assert!(span.start <= span.end, "invalid match span");
+        Match { pattern, span }
+    }
+
+    /// Create a new match from a pattern ID and a byte offset span.
+    ///
+    /// This constructor is generic over how a span is provided. While
+    /// a [`Span`] may be given directly, one may also provide a
+    /// `std::ops::Range<usize>`.
+    ///
+    /// This is like [`Match::new`], but accepts a `usize` instead of a
+    /// [`PatternID`]. This panics if the given `usize` is not representable
+    /// as a `PatternID`.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `end < start` or if `pattern > PatternID::MAX`.
+    ///
+    /// # Example
+    ///
+    /// This shows how to create a match for the third pattern in an
+    /// Aho-Corasick searcher using convenient range syntax.
+    ///
+    /// ```
+    /// use aho_corasick::Match;
+    ///
+    /// let m = Match::must(3, 5..10);
+    /// assert_eq!(3, m.pattern().as_usize());
+    /// assert_eq!(5, m.start());
+    /// assert_eq!(10, m.end());
+    /// ```
+    #[inline]
+    pub fn must<S: Into<Span>>(pattern: usize, span: S) -> Match {
+        Match::new(PatternID::must(pattern), span)
+    }
+
+    /// Returns the ID of the pattern that matched.
+    ///
+    /// The ID of a pattern is derived from the position in which it was
+    /// originally inserted into the corresponding searcher. The first pattern
+    /// has identifier `0`, and each subsequent pattern is `1`, `2` and so on.
+    #[inline]
+    pub fn pattern(&self) -> PatternID {
+        self.pattern
+    }
+
+    /// The starting position of the match.
+    ///
+    /// This is a convenience routine for `Match::span().start`.
+    #[inline]
+    pub fn start(&self) -> usize {
+        self.span().start
+    }
+
+    /// The ending position of the match.
+    ///
+    /// This is a convenience routine for `Match::span().end`.
+    #[inline]
+    pub fn end(&self) -> usize {
+        self.span().end
+    }
+
+    /// Returns the match span as a range.
+    ///
+    /// This is a convenience routine for `Match::span().range()`.
+    #[inline]
+    pub fn range(&self) -> core::ops::Range<usize> {
+        self.span().range()
+    }
+
+    /// Returns the span for this match.
+    #[inline]
+    pub fn span(&self) -> Span {
+        self.span
+    }
+
+    /// Returns true when the span in this match is empty.
+    ///
+    /// An empty match can only be returned when empty pattern is in the
+    /// Aho-Corasick searcher.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.span().is_empty()
+    }
+
+    /// Returns the length of this match.
+    ///
+    /// This returns `0` in precisely the cases that `is_empty` returns `true`.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.span().len()
+    }
+
+    /// Returns a new match with `offset` added to its span's `start` and `end`
+    /// values.
+    #[inline]
+    pub fn offset(&self, offset: usize) -> Match {
+        Match {
+            pattern: self.pattern,
+            span: Span {
+                start: self.start() + offset,
+                end: self.end() + offset,
+            },
+        }
+    }
+}
+
+/// A knob for controlling the match semantics of an Aho-Corasick automaton.
+///
+/// There are two generally different ways that Aho-Corasick automatons can
+/// report matches. The first way is the "standard" approach that results from
+/// implementing most textbook explanations of Aho-Corasick. The second way is
+/// to report only the leftmost non-overlapping matches. The leftmost approach
+/// is in turn split into two different ways of resolving ambiguous matches:
+/// leftmost-first and leftmost-longest.
+///
+/// The `Standard` match kind is the default and is the only one that supports
+/// overlapping matches and stream searching. (Trying to find overlapping or
+/// streaming matches using leftmost match semantics will result in an error in
+/// fallible APIs and a panic when using infallibe APIs.) The `Standard` match
+/// kind will report matches as they are seen. When searching for overlapping
+/// matches, then all possible matches are reported. When searching for
+/// non-overlapping matches, the first match seen is reported. For example, for
+/// non-overlapping matches, given the patterns `abcd` and `b` and the haystack
+/// `abcdef`, only a match for `b` is reported since it is detected first. The
+/// `abcd` match is never reported since it overlaps with the `b` match.
+///
+/// In contrast, the leftmost match kind always prefers the leftmost match
+/// among all possible matches. Given the same example as above with `abcd` and
+/// `b` as patterns and `abcdef` as the haystack, the leftmost match is `abcd`
+/// since it begins before the `b` match, even though the `b` match is detected
+/// before the `abcd` match. In this case, the `b` match is not reported at all
+/// since it overlaps with the `abcd` match.
+///
+/// The difference between leftmost-first and leftmost-longest is in how they
+/// resolve ambiguous matches when there are multiple leftmost matches to
+/// choose from. Leftmost-first always chooses the pattern that was provided
+/// earliest, where as leftmost-longest always chooses the longest matching
+/// pattern. For example, given the patterns `a` and `ab` and the subject
+/// string `ab`, the leftmost-first match is `a` but the leftmost-longest match
+/// is `ab`. Conversely, if the patterns were given in reverse order, i.e.,
+/// `ab` and `a`, then both the leftmost-first and leftmost-longest matches
+/// would be `ab`. Stated differently, the leftmost-first match depends on the
+/// order in which the patterns were given to the Aho-Corasick automaton.
+/// Because of that, when leftmost-first matching is used, if a pattern `A`
+/// that appears before a pattern `B` is a prefix of `B`, then it is impossible
+/// to ever observe a match of `B`.
+///
+/// If you're not sure which match kind to pick, then stick with the standard
+/// kind, which is the default. In particular, if you need overlapping or
+/// streaming matches, then you _must_ use the standard kind. The leftmost
+/// kinds are useful in specific circumstances. For example, leftmost-first can
+/// be very useful as a way to implement match priority based on the order of
+/// patterns given and leftmost-longest can be useful for dictionary searching
+/// such that only the longest matching words are reported.
+///
+/// # Relationship with regular expression alternations
+///
+/// Understanding match semantics can be a little tricky, and one easy way
+/// to conceptualize non-overlapping matches from an Aho-Corasick automaton
+/// is to think about them as a simple alternation of literals in a regular
+/// expression. For example, let's say we wanted to match the strings
+/// `Sam` and `Samwise`, which would turn into the regex `Sam|Samwise`. It
+/// turns out that regular expression engines have two different ways of
+/// matching this alternation. The first way, leftmost-longest, is commonly
+/// found in POSIX compatible implementations of regular expressions (such as
+/// `grep`). The second way, leftmost-first, is commonly found in backtracking
+/// implementations such as Perl. (Some regex engines, such as RE2 and Rust's
+/// regex engine do not use backtracking, but still implement leftmost-first
+/// semantics in an effort to match the behavior of dominant backtracking
+/// regex engines such as those found in Perl, Ruby, Python, Javascript and
+/// PHP.)
+///
+/// That is, when matching `Sam|Samwise` against `Samwise`, a POSIX regex
+/// will match `Samwise` because it is the longest possible match, but a
+/// Perl-like regex will match `Sam` since it appears earlier in the
+/// alternation. Indeed, the regex `Sam|Samwise` in a Perl-like regex engine
+/// will never match `Samwise` since `Sam` will always have higher priority.
+/// Conversely, matching the regex `Samwise|Sam` against `Samwise` will lead to
+/// a match of `Samwise` in both POSIX and Perl-like regexes since `Samwise` is
+/// still longest match, but it also appears earlier than `Sam`.
+///
+/// The "standard" match semantics of Aho-Corasick generally don't correspond
+/// to the match semantics of any large group of regex implementations, so
+/// there's no direct analogy that can be made here. Standard match semantics
+/// are generally useful for overlapping matches, or if you just want to see
+/// matches as they are detected.
+///
+/// The main conclusion to draw from this section is that the match semantics
+/// can be tweaked to precisely match either Perl-like regex alternations or
+/// POSIX regex alternations.
+#[non_exhaustive]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum MatchKind {
+    /// Use standard match semantics, which support overlapping matches. When
+    /// used with non-overlapping matches, matches are reported as they are
+    /// seen.
+    Standard,
+    /// Use leftmost-first match semantics, which reports leftmost matches.
+    /// When there are multiple possible leftmost matches, the match
+    /// corresponding to the pattern that appeared earlier when constructing
+    /// the automaton is reported.
+    ///
+    /// This does **not** support overlapping matches or stream searching. If
+    /// this match kind is used, attempting to find overlapping matches or
+    /// stream matches will fail.
+    LeftmostFirst,
+    /// Use leftmost-longest match semantics, which reports leftmost matches.
+    /// When there are multiple possible leftmost matches, the longest match
+    /// is chosen.
+    ///
+    /// This does **not** support overlapping matches or stream searching. If
+    /// this match kind is used, attempting to find overlapping matches or
+    /// stream matches will fail.
+    LeftmostLongest,
+}
+
+/// The default match kind is `MatchKind::Standard`.
+impl Default for MatchKind {
+    fn default() -> MatchKind {
+        MatchKind::Standard
+    }
+}
+
+impl MatchKind {
+    #[inline]
+    pub(crate) fn is_standard(&self) -> bool {
+        matches!(*self, MatchKind::Standard)
+    }
+
+    #[inline]
+    pub(crate) fn is_leftmost(&self) -> bool {
+        matches!(*self, MatchKind::LeftmostFirst | MatchKind::LeftmostLongest)
+    }
+
+    #[inline]
+    pub(crate) fn is_leftmost_first(&self) -> bool {
+        matches!(*self, MatchKind::LeftmostFirst)
+    }
+
+    /// Convert this match kind into a packed match kind. If this match kind
+    /// corresponds to standard semantics, then this returns None, since
+    /// packed searching does not support standard semantics.
+    #[inline]
+    pub(crate) fn as_packed(&self) -> Option<crate::packed::MatchKind> {
+        match *self {
+            MatchKind::Standard => None,
+            MatchKind::LeftmostFirst => {
+                Some(crate::packed::MatchKind::LeftmostFirst)
+            }
+            MatchKind::LeftmostLongest => {
+                Some(crate::packed::MatchKind::LeftmostLongest)
+            }
+        }
+    }
+}
+
+/// The kind of anchored starting configurations to support in an Aho-Corasick
+/// searcher.
+///
+/// Depending on which searcher is used internally by
+/// [`AhoCorasick`](crate::AhoCorasick), supporting both unanchored
+/// and anchored searches can be quite costly. For this reason,
+/// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind)
+/// can be used to configure whether your searcher supports unanchored,
+/// anchored or both kinds of searches.
+///
+/// This searcher configuration knob works in concert with the search time
+/// configuration [`Input::anchored`]. Namely, if one requests an unsupported
+/// anchored mode, then the search will either panic or return an error,
+/// depending on whether you're using infallible or fallibe APIs, respectively.
+///
+/// `AhoCorasick` by default only supports unanchored searches.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum StartKind {
+    /// Support both anchored and unanchored searches.
+    Both,
+    /// Support only unanchored searches. Requesting an anchored search will
+    /// return an error in fallible APIs and panic in infallible APIs.
+    Unanchored,
+    /// Support only anchored searches. Requesting an unanchored search will
+    /// return an error in fallible APIs and panic in infallible APIs.
+    Anchored,
+}
+
+impl Default for StartKind {
+    fn default() -> StartKind {
+        StartKind::Unanchored
+    }
+}
diff --git a/third_party/rust/aho-corasick/src/util/special.rs b/third_party/rust/aho-corasick/src/util/special.rs
new file mode 100644
index 0000000000..beeba40c89
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/util/special.rs
@@ -0,0 +1,42 @@
+use crate::util::primitives::StateID;
+
+/// A collection of sentinel state IDs for Aho-Corasick automata.
+///
+/// This specifically enables the technique by which we determine which states
+/// are dead, matches or start states. Namely, by arranging states in a
+/// particular order, we can determine the type of a state simply by looking at
+/// its ID.
+#[derive(Clone, Debug)]
+pub(crate) struct Special {
+    /// The maximum ID of all the "special" states. This corresponds either to
+    /// start_anchored_id when a prefilter is active and max_match_id when a
+    /// prefilter is not active. The idea here is that if there is no prefilter,
+    /// then there is no point in treating start states as special.
+    pub(crate) max_special_id: StateID,
+    /// The maximum ID of all the match states. Any state ID bigger than this
+    /// is guaranteed to be a non-match ID.
+    ///
+    /// It is possible and legal for max_match_id to be equal to
+    /// start_anchored_id, which occurs precisely in the case where the empty
+    /// string is a pattern that was added to the underlying automaton.
+    pub(crate) max_match_id: StateID,
+    /// The state ID of the start state used for unanchored searches.
+    pub(crate) start_unanchored_id: StateID,
+    /// The state ID of the start state used for anchored searches. This is
+    /// always start_unanchored_id+1.
+    pub(crate) start_anchored_id: StateID,
+}
+
+impl Special {
+    /// Create a new set of "special" state IDs with all IDs initialized to
+    /// zero. The general idea here is that they will be updated and set to
+    /// correct values later.
+    pub(crate) fn zero() -> Special {
+        Special {
+            max_special_id: StateID::ZERO,
+            max_match_id: StateID::ZERO,
+            start_unanchored_id: StateID::ZERO,
+            start_anchored_id: StateID::ZERO,
+        }
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/aho-corasick
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip