1 files changed, 2107 insertions, 302 deletions
diff --git a/vendor/regex-automata/src/nfa/thompson/pikevm.rs b/vendor/regex-automata/src/nfa/thompson/pikevm.rs
index 7572f9f10..0128c151a 100644
--- a/vendor/regex-automata/src/nfa/thompson/pikevm.rs
+++ b/vendor/regex-automata/src/nfa/thompson/pikevm.rs
@@ -1,18 +1,71 @@
-use alloc::{sync::Arc, vec, vec::Vec};
+/*!
+An NFA backed Pike VM for executing regex searches with capturing groups.
+
+This module provides a [`PikeVM`] that works by simulating an NFA and
+resolving all spans of capturing groups that participate in a match.
+*/
+
+#[cfg(feature = "internal-instrument-pikevm")]
+use core::cell::RefCell;
+
+use alloc::{vec, vec::Vec};
 
 use crate::{
-    nfa::thompson::{self, Error, State, NFA},
+    nfa::thompson::{self, BuildError, State, NFA},
     util::{
-        id::{PatternID, StateID},
-        matchtypes::MultiMatch,
+        captures::Captures,
+        empty, iter,
+        prefilter::Prefilter,
+        primitives::{NonMaxUsize, PatternID, SmallIndex, StateID},
+        search::{
+            Anchored, HalfMatch, Input, Match, MatchKind, PatternSet, Span,
+        },
         sparse_set::SparseSet,
     },
 };
 
-#[derive(Clone, Copy, Debug, Default)]
+/// A simple macro for conditionally executing instrumentation logic when
+/// the 'trace' log level is enabled. This is a compile-time no-op when the
+/// 'internal-instrument-pikevm' feature isn't enabled. The intent here is that
+/// this makes it easier to avoid doing extra work when instrumentation isn't
+/// enabled.
+///
+/// This macro accepts a closure of type `|&mut Counters|`. The closure can
+/// then increment counters (or whatever) in accordance with what one wants
+/// to track.
+macro_rules! instrument {
+    ($fun:expr) => {
+        #[cfg(feature = "internal-instrument-pikevm")]
+        {
+            let fun: &mut dyn FnMut(&mut Counters) = &mut $fun;
+            COUNTERS.with(|c: &RefCell<Counters>| fun(&mut *c.borrow_mut()));
+        }
+    };
+}
+
+#[cfg(feature = "internal-instrument-pikevm")]
+std::thread_local! {
+    /// Effectively global state used to keep track of instrumentation
+    /// counters. The "proper" way to do this is to thread it through the
+    /// PikeVM, but it makes the code quite icky. Since this is just a
+    /// debugging feature, we're content to relegate it to thread local
+    /// state. When instrumentation is enabled, the counters are reset at the
+    /// beginning of every search and printed (with the 'trace' log level) at
+    /// the end of every search.
+    static COUNTERS: RefCell<Counters> = RefCell::new(Counters::empty());
+}
+
+/// The configuration used for building a [`PikeVM`].
+///
+/// A PikeVM configuration is a simple data object that is typically used with
+/// [`Builder::configure`]. It can be cheaply cloned.
+///
+/// A default configuration can be created either with `Config::new`, or
+/// perhaps more conveniently, with [`PikeVM::config`].
+#[derive(Clone, Debug, Default)]
 pub struct Config {
-    anchored: Option<bool>,
-    utf8: Option<bool>,
+    match_kind: Option<MatchKind>,
+    pre: Option<Option<Prefilter>>,
 }
 
 impl Config {
@@ -21,37 +74,172 @@ impl Config {
         Config::default()
     }
 
-    pub fn anchored(mut self, yes: bool) -> Config {
-        self.anchored = Some(yes);
+    /// Set the desired match semantics.
+    ///
+    /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the
+    /// match semantics of Perl-like regex engines. That is, when multiple
+    /// patterns would match at the same leftmost position, the pattern that
+    /// appears first in the concrete syntax is chosen.
+    ///
+    /// Currently, the only other kind of match semantics supported is
+    /// [`MatchKind::All`]. This corresponds to "classical DFA" construction
+    /// where all possible matches are visited in the NFA by the `PikeVM`.
+    ///
+    /// Typically, `All` is used when one wants to execute an overlapping
+    /// search and `LeftmostFirst` otherwise. In particular, it rarely makes
+    /// sense to use `All` with the various "leftmost" find routines, since the
+    /// leftmost routines depend on the `LeftmostFirst` automata construction
+    /// strategy. Specifically, `LeftmostFirst` results in the `PikeVM`
+    /// simulating dead states as a way to terminate the search and report a
+    /// match. `LeftmostFirst` also supports non-greedy matches using this
+    /// strategy where as `All` does not.
+    pub fn match_kind(mut self, kind: MatchKind) -> Config {
+        self.match_kind = Some(kind);
         self
     }
 
-    pub fn utf8(mut self, yes: bool) -> Config {
-        self.utf8 = Some(yes);
+    /// Set a prefilter to be used whenever a start state is entered.
+    ///
+    /// A [`Prefilter`] in this context is meant to accelerate searches by
+    /// looking for literal prefixes that every match for the corresponding
+    /// pattern (or patterns) must start with. Once a prefilter produces a
+    /// match, the underlying search routine continues on to try and confirm
+    /// the match.
+    ///
+    /// Be warned that setting a prefilter does not guarantee that the search
+    /// will be faster. While it's usually a good bet, if the prefilter
+    /// produces a lot of false positive candidates (i.e., positions matched
+    /// by the prefilter but not by the regex), then the overall result can
+    /// be slower than if you had just executed the regex engine without any
+    /// prefilters.
+    ///
+    /// By default no prefilter is set.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     util::prefilter::Prefilter,
+    ///     Input, Match, MatchKind,
+    /// };
+    ///
+    /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]);
+    /// let re = PikeVM::builder()
+    ///     .configure(PikeVM::config().prefilter(pre))
+    ///     .build(r"(foo|bar)[a-z]+")?;
+    /// let mut cache = re.create_cache();
+    /// let input = Input::new("foo1 barfox bar");
+    /// assert_eq!(Some(Match::must(0, 5..11)), re.find(&mut cache, input));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// Be warned though that an incorrect prefilter can lead to incorrect
+    /// results!
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     util::prefilter::Prefilter,
+    ///     Input, HalfMatch, MatchKind,
+    /// };
+    ///
+    /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]);
+    /// let re = PikeVM::builder()
+    ///     .configure(PikeVM::config().prefilter(pre))
+    ///     .build(r"(foo|bar)[a-z]+")?;
+    /// let mut cache = re.create_cache();
+    /// let input = Input::new("foo1 barfox bar");
+    /// // No match reported even though there clearly is one!
+    /// assert_eq!(None, re.find(&mut cache, input));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn prefilter(mut self, pre: Option<Prefilter>) -> Config {
+        self.pre = Some(pre);
         self
     }
 
-    pub fn get_anchored(&self) -> bool {
-        self.anchored.unwrap_or(false)
+    /// Returns the match semantics set in this configuration.
+    pub fn get_match_kind(&self) -> MatchKind {
+        self.match_kind.unwrap_or(MatchKind::LeftmostFirst)
     }
 
-    pub fn get_utf8(&self) -> bool {
-        self.utf8.unwrap_or(true)
+    /// Returns the prefilter set in this configuration, if one at all.
+    pub fn get_prefilter(&self) -> Option<&Prefilter> {
+        self.pre.as_ref().unwrap_or(&None).as_ref()
     }
 
-    pub(crate) fn overwrite(self, o: Config) -> Config {
+    /// Overwrite the default configuration such that the options in `o` are
+    /// always used. If an option in `o` is not set, then the corresponding
+    /// option in `self` is used. If it's not set in `self` either, then it
+    /// remains not set.
+    pub(crate) fn overwrite(&self, o: Config) -> Config {
         Config {
-            anchored: o.anchored.or(self.anchored),
-            utf8: o.utf8.or(self.utf8),
+            match_kind: o.match_kind.or(self.match_kind),
+            pre: o.pre.or_else(|| self.pre.clone()),
         }
     }
 }
 
-/// A builder for a PikeVM.
+/// A builder for a `PikeVM`.
+///
+/// This builder permits configuring options for the syntax of a pattern,
+/// the NFA construction and the `PikeVM` construction. This builder is
+/// different from a general purpose regex builder in that it permits fine
+/// grain configuration of the construction process. The trade off for this is
+/// complexity, and the possibility of setting a configuration that might not
+/// make sense. For example, there are two different UTF-8 modes:
+///
+/// * [`util::syntax::Config::utf8`](crate::util::syntax::Config::utf8)
+/// controls whether the pattern itself can contain sub-expressions that match
+/// invalid UTF-8.
+/// * [`thompson::Config::utf8`] controls whether empty matches that split a
+/// Unicode codepoint are reported or not.
+///
+/// Generally speaking, callers will want to either enable all of these or
+/// disable all of these.
+///
+/// # Example
+///
+/// This example shows how to disable UTF-8 mode in the syntax and the regex
+/// itself. This is generally what you want for matching on arbitrary bytes.
+///
+/// ```
+/// use regex_automata::{
+///     nfa::thompson::{self, pikevm::PikeVM},
+///     util::syntax,
+///     Match,
+/// };
+///
+/// let re = PikeVM::builder()
+///     .syntax(syntax::Config::new().utf8(false))
+///     .thompson(thompson::Config::new().utf8(false))
+///     .build(r"foo(?-u:[^b])ar.*")?;
+/// let mut cache = re.create_cache();
+///
+/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n";
+/// let expected = Some(Match::must(0, 1..9));
+/// let got = re.find_iter(&mut cache, haystack).next();
+/// assert_eq!(expected, got);
+/// // Notice that `(?-u:[^b])` matches invalid UTF-8,
+/// // but the subsequent `.*` does not! Disabling UTF-8
+/// // on the syntax permits this.
+/// //
+/// // N.B. This example does not show the impact of
+/// // disabling UTF-8 mode on a PikeVM Config, since that
+/// // only impacts regexes that can produce matches of
+/// // length 0.
+/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap().range()]);
+///
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
 #[derive(Clone, Debug)]
 pub struct Builder {
     config: Config,
-    thompson: thompson::Builder,
+    #[cfg(feature = "syntax")]
+    thompson: thompson::Compiler,
 }
 
 impl Builder {
@@ -59,53 +247,58 @@ impl Builder {
     pub fn new() -> Builder {
         Builder {
             config: Config::default(),
-            thompson: thompson::Builder::new(),
+            #[cfg(feature = "syntax")]
+            thompson: thompson::Compiler::new(),
         }
     }
 
-    pub fn build(&self, pattern: &str) -> Result<PikeVM, Error> {
+    /// Build a `PikeVM` from the given pattern.
+    ///
+    /// If there was a problem parsing or compiling the pattern, then an error
+    /// is returned.
+    #[cfg(feature = "syntax")]
+    pub fn build(&self, pattern: &str) -> Result<PikeVM, BuildError> {
         self.build_many(&[pattern])
     }
 
+    /// Build a `PikeVM` from the given patterns.
+    #[cfg(feature = "syntax")]
     pub fn build_many<P: AsRef<str>>(
         &self,
         patterns: &[P],
-    ) -> Result<PikeVM, Error> {
+    ) -> Result<PikeVM, BuildError> {
         let nfa = self.thompson.build_many(patterns)?;
-        self.build_from_nfa(Arc::new(nfa))
-    }
-
-    pub fn build_from_nfa(&self, nfa: Arc<NFA>) -> Result<PikeVM, Error> {
-        // TODO: Check that this is correct.
-        // if !cfg!(all(
-        // feature = "dfa",
-        // feature = "syntax",
-        // feature = "unicode-perl"
-        // )) {
-        if !cfg!(feature = "syntax") {
-            if nfa.has_word_boundary_unicode() {
-                return Err(Error::unicode_word_unavailable());
-            }
-        }
-        Ok(PikeVM { config: self.config, nfa })
+        self.build_from_nfa(nfa)
+    }
+
+    /// Build a `PikeVM` directly from its NFA.
+    ///
+    /// Note that when using this method, any configuration that applies to the
+    /// construction of the NFA itself will of course be ignored, since the NFA
+    /// given here is already built.
+    pub fn build_from_nfa(&self, nfa: NFA) -> Result<PikeVM, BuildError> {
+        nfa.look_set_any().available().map_err(BuildError::word)?;
+        Ok(PikeVM { config: self.config.clone(), nfa })
     }
 
+    /// Apply the given `PikeVM` configuration options to this builder.
     pub fn configure(&mut self, config: Config) -> &mut Builder {
         self.config = self.config.overwrite(config);
         self
     }
 
     /// Set the syntax configuration for this builder using
-    /// [`SyntaxConfig`](crate::SyntaxConfig).
+    /// [`syntax::Config`](crate::util::syntax::Config).
     ///
     /// This permits setting things like case insensitivity, Unicode and multi
     /// line mode.
     ///
     /// These settings only apply when constructing a PikeVM directly from a
     /// pattern.
+    #[cfg(feature = "syntax")]
     pub fn syntax(
         &mut self,
-        config: crate::util::syntax::SyntaxConfig,
+        config: crate::util::syntax::Config,
     ) -> &mut Builder {
         self.thompson.syntax(config);
         self
@@ -119,259 +312,1395 @@ impl Builder {
     ///
     /// These settings only apply when constructing a PikeVM directly from a
     /// pattern.
+    #[cfg(feature = "syntax")]
     pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder {
         self.thompson.configure(config);
         self
     }
 }
 
+/// A virtual machine for executing regex searches with capturing groups.
+///
+/// # Infallible APIs
+///
+/// Unlike most other regex engines in this crate, a `PikeVM` never returns an
+/// error at search time. It supports all [`Anchored`] configurations, never
+/// quits and works on haystacks of arbitrary length.
+///
+/// There are two caveats to mention though:
+///
+/// * If an invalid pattern ID is given to a search via [`Anchored::Pattern`],
+/// then the PikeVM will report "no match." This is consistent with all other
+/// regex engines in this crate.
+/// * When using [`PikeVM::which_overlapping_matches`] with a [`PatternSet`]
+/// that has insufficient capacity to store all valid pattern IDs, then if a
+/// match occurs for a `PatternID` that cannot be inserted, it is silently
+/// dropped as if it did not match.
+///
+/// # Advice
+///
+/// The `PikeVM` is generally the most "powerful" regex engine in this crate.
+/// "Powerful" in this context means that it can handle any regular expression
+/// that is parseable by `regex-syntax` and any size haystack. Regretably,
+/// the `PikeVM` is also simultaneously often the _slowest_ regex engine in
+/// practice. This results in an annoying situation where one generally tries
+/// to pick any other regex engine (or perhaps none at all) before being
+/// forced to fall back to a `PikeVM`.
+///
+/// For example, a common strategy for dealing with capturing groups is to
+/// actually look for the overall match of the regex using a faster regex
+/// engine, like a [lazy DFA](crate::hybrid::regex::Regex). Once the overall
+/// match is found, one can then run the `PikeVM` on just the match span to
+/// find the spans of the capturing groups. In this way, the faster regex
+/// engine does the majority of the work, while the `PikeVM` only lends its
+/// power in a more limited role.
+///
+/// Unfortunately, this isn't always possible because the faster regex engines
+/// don't support all of the regex features in `regex-syntax`. This notably
+/// includes (and is currently limited to) Unicode word boundaries. So if
+/// your pattern has Unicode word boundaries, you typically can't use a
+/// DFA-based regex engine at all (unless you [enable heuristic support for
+/// it](crate::hybrid::dfa::Config::unicode_word_boundary)). (The [one-pass
+/// DFA](crate::dfa::onepass::DFA) can handle Unicode word boundaries for
+/// anchored searches only, but in a cruel sort of joke, many Unicode features
+/// tend to result in making the regex _not_ one-pass.)
+///
+/// # Example
+///
+/// This example shows that the `PikeVM` implements Unicode word boundaries
+/// correctly by default.
+///
+/// ```
+/// # if cfg!(miri) { return Ok(()); } // miri takes too long
+/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
+///
+/// let re = PikeVM::new(r"\b\w+\b")?;
+/// let mut cache = re.create_cache();
+///
+/// let mut it = re.find_iter(&mut cache, "Шерлок Холмс");
+/// assert_eq!(Some(Match::must(0, 0..12)), it.next());
+/// assert_eq!(Some(Match::must(0, 13..23)), it.next());
+/// assert_eq!(None, it.next());
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
 #[derive(Clone, Debug)]
 pub struct PikeVM {
     config: Config,
-    nfa: Arc<NFA>,
+    nfa: NFA,
 }
 
 impl PikeVM {
-    pub fn new(pattern: &str) -> Result<PikeVM, Error> {
+    /// Parse the given regular expression using the default configuration and
+    /// return the corresponding `PikeVM`.
+    ///
+    /// If you want a non-default configuration, then use the [`Builder`] to
+    /// set your own configuration.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
+    ///
+    /// let re = PikeVM::new("foo[0-9]+bar")?;
+    /// let mut cache = re.create_cache();
+    /// assert_eq!(
+    ///     Some(Match::must(0, 3..14)),
+    ///     re.find_iter(&mut cache, "zzzfoo12345barzzz").next(),
+    /// );
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "syntax")]
+    pub fn new(pattern: &str) -> Result<PikeVM, BuildError> {
         PikeVM::builder().build(pattern)
     }
 
-    pub fn new_many<P: AsRef<str>>(patterns: &[P]) -> Result<PikeVM, Error> {
+    /// Like `new`, but parses multiple patterns into a single "multi regex."
+    /// This similarly uses the default regex configuration.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
+    ///
+    /// let re = PikeVM::new_many(&["[a-z]+", "[0-9]+"])?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let mut it = re.find_iter(&mut cache, "abc 1 foo 4567 0 quux");
+    /// assert_eq!(Some(Match::must(0, 0..3)), it.next());
+    /// assert_eq!(Some(Match::must(1, 4..5)), it.next());
+    /// assert_eq!(Some(Match::must(0, 6..9)), it.next());
+    /// assert_eq!(Some(Match::must(1, 10..14)), it.next());
+    /// assert_eq!(Some(Match::must(1, 15..16)), it.next());
+    /// assert_eq!(Some(Match::must(0, 17..21)), it.next());
+    /// assert_eq!(None, it.next());
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[cfg(feature = "syntax")]
+    pub fn new_many<P: AsRef<str>>(
+        patterns: &[P],
+    ) -> Result<PikeVM, BuildError> {
         PikeVM::builder().build_many(patterns)
     }
 
+    /// Like `new`, but builds a PikeVM directly from an NFA. This is useful
+    /// if you already have an NFA, or even if you hand-assembled the NFA.
+    ///
+    /// # Example
+    ///
+    /// This shows how to hand assemble a regular expression via its HIR,
+    /// compile an NFA from it and build a PikeVM from the NFA.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match};
+    /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange};
+    ///
+    /// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![
+    ///     ClassBytesRange::new(b'0', b'9'),
+    ///     ClassBytesRange::new(b'A', b'Z'),
+    ///     ClassBytesRange::new(b'_', b'_'),
+    ///     ClassBytesRange::new(b'a', b'z'),
+    /// ])));
+    ///
+    /// let config = NFA::config().nfa_size_limit(Some(1_000));
+    /// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?;
+    ///
+    /// let re = PikeVM::new_from_nfa(nfa)?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    /// let expected = Some(Match::must(0, 3..4));
+    /// re.captures(&mut cache, "!@#A#@!", &mut caps);
+    /// assert_eq!(expected, caps.get_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn new_from_nfa(nfa: NFA) -> Result<PikeVM, BuildError> {
+        PikeVM::builder().build_from_nfa(nfa)
+    }
+
+    /// Create a new `PikeVM` that matches every input.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
+    ///
+    /// let re = PikeVM::always_match()?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let expected = Match::must(0, 0..0);
+    /// assert_eq!(Some(expected), re.find_iter(&mut cache, "").next());
+    /// assert_eq!(Some(expected), re.find_iter(&mut cache, "foo").next());
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn always_match() -> Result<PikeVM, BuildError> {
+        let nfa = thompson::NFA::always_match();
+        PikeVM::new_from_nfa(nfa)
+    }
+
+    /// Create a new `PikeVM` that never matches any input.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
+    ///
+    /// let re = PikeVM::never_match()?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// assert_eq!(None, re.find_iter(&mut cache, "").next());
+    /// assert_eq!(None, re.find_iter(&mut cache, "foo").next());
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn never_match() -> Result<PikeVM, BuildError> {
+        let nfa = thompson::NFA::never_match();
+        PikeVM::new_from_nfa(nfa)
+    }
+
+    /// Return a default configuration for a `PikeVM`.
+    ///
+    /// This is a convenience routine to avoid needing to import the `Config`
+    /// type when customizing the construction of a `PikeVM`.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to disable UTF-8 mode. When UTF-8 mode is
+    /// disabled, zero-width matches that split a codepoint are allowed.
+    /// Otherwise they are never reported.
+    ///
+    /// In the code below, notice that `""` is permitted to match positions
+    /// that split the encoding of a codepoint.
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::{self, pikevm::PikeVM}, Match};
+    ///
+    /// let re = PikeVM::builder()
+    ///     .thompson(thompson::Config::new().utf8(false))
+    ///     .build(r"")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let haystack = "a☃z";
+    /// let mut it = re.find_iter(&mut cache, haystack);
+    /// assert_eq!(Some(Match::must(0, 0..0)), it.next());
+    /// assert_eq!(Some(Match::must(0, 1..1)), it.next());
+    /// assert_eq!(Some(Match::must(0, 2..2)), it.next());
+    /// assert_eq!(Some(Match::must(0, 3..3)), it.next());
+    /// assert_eq!(Some(Match::must(0, 4..4)), it.next());
+    /// assert_eq!(Some(Match::must(0, 5..5)), it.next());
+    /// assert_eq!(None, it.next());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     pub fn config() -> Config {
         Config::new()
     }
 
+    /// Return a builder for configuring the construction of a `PikeVM`.
+    ///
+    /// This is a convenience routine to avoid needing to import the
+    /// [`Builder`] type in common cases.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to use the builder to disable UTF-8 mode
+    /// everywhere.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::{self, pikevm::PikeVM},
+    ///     util::syntax,
+    ///     Match,
+    /// };
+    ///
+    /// let re = PikeVM::builder()
+    ///     .syntax(syntax::Config::new().utf8(false))
+    ///     .thompson(thompson::Config::new().utf8(false))
+    ///     .build(r"foo(?-u:[^b])ar.*")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n";
+    /// let expected = Some(Match::must(0, 1..9));
+    /// re.captures(&mut cache, haystack, &mut caps);
+    /// assert_eq!(expected, caps.get_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     pub fn builder() -> Builder {
         Builder::new()
     }
 
+    /// Create a new empty set of capturing groups that is guaranteed to be
+    /// valid for the search APIs on this `PikeVM`.
+    ///
+    /// A `Captures` value created for a specific `PikeVM` cannot be used with
+    /// any other `PikeVM`.
+    ///
+    /// This is a convenience function for [`Captures::all`]. See the
+    /// [`Captures`] documentation for an explanation of its alternative
+    /// constructors that permit the `PikeVM` to do less work during a search,
+    /// and thus might make it faster.
+    pub fn create_captures(&self) -> Captures {
+        Captures::all(self.get_nfa().group_info().clone())
+    }
+
+    /// Create a new cache for this `PikeVM`.
+    ///
+    /// The cache returned should only be used for searches for this
+    /// `PikeVM`. If you want to reuse the cache for another `PikeVM`, then
+    /// you must call [`Cache::reset`] with that `PikeVM` (or, equivalently,
+    /// [`PikeVM::reset_cache`]).
     pub fn create_cache(&self) -> Cache {
-        Cache::new(self.nfa())
+        Cache::new(self)
     }
 
-    pub fn create_captures(&self) -> Captures {
-        Captures::new(self.nfa())
+    /// Reset the given cache such that it can be used for searching with the
+    /// this `PikeVM` (and only this `PikeVM`).
+    ///
+    /// A cache reset permits reusing memory already allocated in this cache
+    /// with a different `PikeVM`.
+    ///
+    /// # Example
+    ///
+    /// This shows how to re-purpose a cache for use with a different `PikeVM`.
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
+    ///
+    /// let re1 = PikeVM::new(r"\w")?;
+    /// let re2 = PikeVM::new(r"\W")?;
+    ///
+    /// let mut cache = re1.create_cache();
+    /// assert_eq!(
+    ///     Some(Match::must(0, 0..2)),
+    ///     re1.find_iter(&mut cache, "Δ").next(),
+    /// );
+    ///
+    /// // Using 'cache' with re2 is not allowed. It may result in panics or
+    /// // incorrect results. In order to re-purpose the cache, we must reset
+    /// // it with the PikeVM we'd like to use it with.
+    /// //
+    /// // Similarly, after this reset, using the cache with 're1' is also not
+    /// // allowed.
+    /// re2.reset_cache(&mut cache);
+    /// assert_eq!(
+    ///     Some(Match::must(0, 0..3)),
+    ///     re2.find_iter(&mut cache, "☃").next(),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn reset_cache(&self, cache: &mut Cache) {
+        cache.reset(self);
     }
 
-    pub fn nfa(&self) -> &Arc<NFA> {
+    /// Returns the total number of patterns compiled into this `PikeVM`.
+    ///
+    /// In the case of a `PikeVM` that contains no patterns, this returns `0`.
+    ///
+    /// # Example
+    ///
+    /// This example shows the pattern length for a `PikeVM` that never
+    /// matches:
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
+    ///
+    /// let re = PikeVM::never_match()?;
+    /// assert_eq!(re.pattern_len(), 0);
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// And another example for a `PikeVM` that matches at every position:
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
+    ///
+    /// let re = PikeVM::always_match()?;
+    /// assert_eq!(re.pattern_len(), 1);
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// And finally, a `PikeVM` that was constructed from multiple patterns:
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
+    ///
+    /// let re = PikeVM::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?;
+    /// assert_eq!(re.pattern_len(), 3);
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn pattern_len(&self) -> usize {
+        self.nfa.pattern_len()
+    }
+
+    /// Return the config for this `PikeVM`.
+    #[inline]
+    pub fn get_config(&self) -> &Config {
+        &self.config
+    }
+
+    /// Returns a reference to the underlying NFA.
+    #[inline]
+    pub fn get_nfa(&self) -> &NFA {
         &self.nfa
     }
+}
 
-    pub fn find_leftmost_iter<'r, 'c, 't>(
+impl PikeVM {
+    /// Returns true if and only if this `PikeVM` matches the given haystack.
+    ///
+    /// This routine may short circuit if it knows that scanning future
+    /// input will never lead to a different result. In particular, if the
+    /// underlying NFA enters a match state, then this routine will return
+    /// `true` immediately without inspecting any future input. (Consider how
+    /// this might make a difference given the regex `a+` on the haystack
+    /// `aaaaaaaaaaaaaaa`. This routine can stop after it sees the first `a`,
+    /// but routines like `find` need to continue searching because `+` is
+    /// greedy by default.)
+    ///
+    /// # Example
+    ///
+    /// This shows basic usage:
+    ///
+    /// ```
+    /// use regex_automata::nfa::thompson::pikevm::PikeVM;
+    ///
+    /// let re = PikeVM::new("foo[0-9]+bar")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// assert!(re.is_match(&mut cache, "foo12345bar"));
+    /// assert!(!re.is_match(&mut cache, "foobar"));
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: consistency with search APIs
+    ///
+    /// `is_match` is guaranteed to return `true` whenever `find` returns a
+    /// match. This includes searches that are executed entirely within a
+    /// codepoint:
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Input};
+    ///
+    /// let re = PikeVM::new("a*")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// assert!(!re.is_match(&mut cache, Input::new("☃").span(1..2)));
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// Notice that when UTF-8 mode is disabled, then the above reports a
+    /// match because the restriction against zero-width matches that split a
+    /// codepoint has been lifted:
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::{pikevm::PikeVM, NFA}, Input};
+    ///
+    /// let re = PikeVM::builder()
+    ///     .thompson(NFA::config().utf8(false))
+    ///     .build("a*")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// assert!(re.is_match(&mut cache, Input::new("☃").span(1..2)));
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn is_match<'h, I: Into<Input<'h>>>(
+        &self,
+        cache: &mut Cache,
+        input: I,
+    ) -> bool {
+        let input = input.into().earliest(true);
+        self.search_slots(cache, &input, &mut []).is_some()
+    }
+
+    /// Executes a leftmost forward search and returns a `Match` if one exists.
+    ///
+    /// This routine only includes the overall match span. To get access to the
+    /// individual spans of each capturing group, use [`PikeVM::captures`].
+    ///
+    /// # Example
+    ///
+    /// Leftmost first match semantics corresponds to the match with the
+    /// smallest starting offset, but where the end offset is determined by
+    /// preferring earlier branches in the original regular expression. For
+    /// example, `Sam|Samwise` will match `Sam` in `Samwise`, but `Samwise|Sam`
+    /// will match `Samwise` in `Samwise`.
+    ///
+    /// Generally speaking, the "leftmost first" match is how most backtracking
+    /// regular expressions tend to work. This is in contrast to POSIX-style
+    /// regular expressions that yield "leftmost longest" matches. Namely,
+    /// both `Sam|Samwise` and `Samwise|Sam` match `Samwise` when using
+    /// leftmost longest semantics. (This crate does not currently support
+    /// leftmost longest semantics.)
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
+    ///
+    /// let re = PikeVM::new("foo[0-9]+")?;
+    /// let mut cache = re.create_cache();
+    /// let expected = Match::must(0, 0..8);
+    /// assert_eq!(Some(expected), re.find(&mut cache, "foo12345"));
+    ///
+    /// // Even though a match is found after reading the first byte (`a`),
+    /// // the leftmost first match semantics demand that we find the earliest
+    /// // match that prefers earlier parts of the pattern over later parts.
+    /// let re = PikeVM::new("abc|a")?;
+    /// let mut cache = re.create_cache();
+    /// let expected = Match::must(0, 0..3);
+    /// assert_eq!(Some(expected), re.find(&mut cache, "abc"));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn find<'h, I: Into<Input<'h>>>(
+        &self,
+        cache: &mut Cache,
+        input: I,
+    ) -> Option<Match> {
+        let input = input.into();
+        if self.get_nfa().pattern_len() == 1 {
+            let mut slots = [None, None];
+            let pid = self.search_slots(cache, &input, &mut slots)?;
+            let start = slots[0]?.get();
+            let end = slots[1]?.get();
+            return Some(Match::new(pid, Span { start, end }));
+        }
+        let ginfo = self.get_nfa().group_info();
+        let slots_len = ginfo.implicit_slot_len();
+        let mut slots = vec![None; slots_len];
+        let pid = self.search_slots(cache, &input, &mut slots)?;
+        let start = slots[pid.as_usize() * 2]?.get();
+        let end = slots[pid.as_usize() * 2 + 1]?.get();
+        Some(Match::new(pid, Span { start, end }))
+    }
+
+    /// Executes a leftmost forward search and writes the spans of capturing
+    /// groups that participated in a match into the provided [`Captures`]
+    /// value. If no match was found, then [`Captures::is_match`] is guaranteed
+    /// to return `false`.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
+    ///
+    /// let re = PikeVM::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    ///
+    /// re.captures(&mut cache, "2010-03-14", &mut caps);
+    /// assert!(caps.is_match());
+    /// assert_eq!(Some(Span::from(0..4)), caps.get_group(1));
+    /// assert_eq!(Some(Span::from(5..7)), caps.get_group(2));
+    /// assert_eq!(Some(Span::from(8..10)), caps.get_group(3));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn captures<'h, I: Into<Input<'h>>>(
+        &self,
+        cache: &mut Cache,
+        input: I,
+        caps: &mut Captures,
+    ) {
+        self.search(cache, &input.into(), caps)
+    }
+
+    /// Returns an iterator over all non-overlapping leftmost matches in the
+    /// given bytes. If no match exists, then the iterator yields no elements.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
+    ///
+    /// let re = PikeVM::new("foo[0-9]+")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let text = "foo1 foo12 foo123";
+    /// let matches: Vec<Match> = re.find_iter(&mut cache, text).collect();
+    /// assert_eq!(matches, vec![
+    ///     Match::must(0, 0..4),
+    ///     Match::must(0, 5..10),
+    ///     Match::must(0, 11..17),
+    /// ]);
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn find_iter<'r, 'c, 'h, I: Into<Input<'h>>>(
+        &'r self,
+        cache: &'c mut Cache,
+        input: I,
+    ) -> FindMatches<'r, 'c, 'h> {
+        let caps = Captures::matches(self.get_nfa().group_info().clone());
+        let it = iter::Searcher::new(input.into());
+        FindMatches { re: self, cache, caps, it }
+    }
+
+    /// Returns an iterator over all non-overlapping `Captures` values. If no
+    /// match exists, then the iterator yields no elements.
+    ///
+    /// This yields the same matches as [`PikeVM::find_iter`], but it includes
+    /// the spans of all capturing groups that participate in each match.
+    ///
+    /// **Tip:** See [`util::iter::Searcher`](crate::util::iter::Searcher) for
+    /// how to correctly iterate over all matches in a haystack while avoiding
+    /// the creation of a new `Captures` value for every match. (Which you are
+    /// forced to do with an `Iterator`.)
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
+    ///
+    /// let re = PikeVM::new("foo(?P<numbers>[0-9]+)")?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let text = "foo1 foo12 foo123";
+    /// let matches: Vec<Span> = re
+    ///     .captures_iter(&mut cache, text)
+    ///     // The unwrap is OK since 'numbers' matches if the pattern matches.
+    ///     .map(|caps| caps.get_group_by_name("numbers").unwrap())
+    ///     .collect();
+    /// assert_eq!(matches, vec![
+    ///     Span::from(3..4),
+    ///     Span::from(8..10),
+    ///     Span::from(14..17),
+    /// ]);
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn captures_iter<'r, 'c, 'h, I: Into<Input<'h>>>(
         &'r self,
         cache: &'c mut Cache,
-        haystack: &'t [u8],
-    ) -> FindLeftmostMatches<'r, 'c, 't> {
-        FindLeftmostMatches::new(self, cache, haystack)
-    }
-
-    // BREADCRUMBS:
-    //
-    // 1) Don't forget about prefilters.
-    //
-    // 2) Consider the case of using a PikeVM with an NFA that has Capture
-    // states, but where we don't want to track capturing groups (other than
-    // group 0). This potentially saves a lot of copying around and what not. I
-    // believe the current regex crate does this, for example. The interesting
-    // bit here is how to handle the case of multiple patterns...
-    //
-    // 3) Permit the caller to specify a pattern ID to run an anchored-only
-    // search on.
-    //
-    // 4) How to do overlapping? The way multi-regex support works in the regex
-    // crate currently is to run the PikeVM until either we reach the end of
-    // the haystack or when we know all regexes have matched. The latter case
-    // is probably quite rare, so the common case is likely that we're always
-    // searching the entire input. The question is: can we emulate that with
-    // our typical 'overlapping' APIs on DFAs? I believe we can. If so, then
-    // all we need to do is provide an overlapping API on the PikeVM that
-    // roughly matches the ones we provide on DFAs. For those APIs, the only
-    // thing they need over non-overlapping APIs is "caller state." For DFAs,
-    // the caller state is simple: it contains the last state visited and the
-    // last match reported. For the PikeVM (and NFAs in general), the "last
-    // state" is actually a *set* of NFA states. So I think what happens here
-    // is that we can just force the `Cache` to subsume this role. We'll still
-    // need some additional state to track the last match reported though.
-    // Because when two or more patterns match at the same location, we need a
-    // way to know to iterate over them. Although maybe it's not match index we
-    // need, but the state index of the last NFA state processed in the cache.
-    // Then we just pick up where we left off. There might be another match
-    // state, in which case, we report it.
-
-    pub fn find_leftmost_at(
+        input: I,
+    ) -> CapturesMatches<'r, 'c, 'h> {
+        let caps = self.create_captures();
+        let it = iter::Searcher::new(input.into());
+        CapturesMatches { re: self, cache, caps, it }
+    }
+}
+
+impl PikeVM {
+    /// Executes a leftmost forward search and writes the spans of capturing
+    /// groups that participated in a match into the provided [`Captures`]
+    /// value. If no match was found, then [`Captures::is_match`] is guaranteed
+    /// to return `false`.
+    ///
+    /// This is like [`PikeVM::captures`], but it accepts a concrete `&Input`
+    /// instead of an `Into<Input>`.
+    ///
+    /// # Example: specific pattern search
+    ///
+    /// This example shows how to build a multi-PikeVM that permits searching
+    /// for specific patterns.
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     Anchored, Match, PatternID, Input,
+    /// };
+    ///
+    /// let re = PikeVM::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    /// let haystack = "foo123";
+    ///
+    /// // Since we are using the default leftmost-first match and both
+    /// // patterns match at the same starting position, only the first pattern
+    /// // will be returned in this case when doing a search for any of the
+    /// // patterns.
+    /// let expected = Some(Match::must(0, 0..6));
+    /// re.search(&mut cache, &Input::new(haystack), &mut caps);
+    /// assert_eq!(expected, caps.get_match());
+    ///
+    /// // But if we want to check whether some other pattern matches, then we
+    /// // can provide its pattern ID.
+    /// let expected = Some(Match::must(1, 0..6));
+    /// let input = Input::new(haystack)
+    ///     .anchored(Anchored::Pattern(PatternID::must(1)));
+    /// re.search(&mut cache, &input, &mut caps);
+    /// assert_eq!(expected, caps.get_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: specifying the bounds of a search
+    ///
+    /// This example shows how providing the bounds of a search can produce
+    /// different results than simply sub-slicing the haystack.
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match, Input};
+    ///
+    /// let re = PikeVM::new(r"\b[0-9]{3}\b")?;
+    /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
+    /// let haystack = "foo123bar";
+    ///
+    /// // Since we sub-slice the haystack, the search doesn't know about
+    /// // the larger context and assumes that `123` is surrounded by word
+    /// // boundaries. And of course, the match position is reported relative
+    /// // to the sub-slice as well, which means we get `0..3` instead of
+    /// // `3..6`.
+    /// let expected = Some(Match::must(0, 0..3));
+    /// re.search(&mut cache, &Input::new(&haystack[3..6]), &mut caps);
+    /// assert_eq!(expected, caps.get_match());
+    ///
+    /// // But if we provide the bounds of the search within the context of the
+    /// // entire haystack, then the search can take the surrounding context
+    /// // into account. (And if we did find a match, it would be reported
+    /// // as a valid offset into `haystack` instead of its sub-slice.)
+    /// let expected = None;
+    /// let input = Input::new(haystack).range(3..6);
+    /// re.search(&mut cache, &input, &mut caps);
+    /// assert_eq!(expected, caps.get_match());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn search(
         &self,
         cache: &mut Cache,
-        haystack: &[u8],
-        start: usize,
-        end: usize,
+        input: &Input<'_>,
         caps: &mut Captures,
-    ) -> Option<MultiMatch> {
-        let anchored =
-            self.config.get_anchored() || self.nfa.is_always_start_anchored();
-        let mut at = start;
-        let mut matched_pid = None;
-        cache.clear();
-        'LOOP: loop {
-            if cache.clist.set.is_empty() {
-                if matched_pid.is_some() || (anchored && at > start) {
-                    break 'LOOP;
+    ) {
+        caps.set_pattern(None);
+        let pid = self.search_slots(cache, input, caps.slots_mut());
+        caps.set_pattern(pid);
+    }
+
+    /// Executes a leftmost forward search and writes the spans of capturing
+    /// groups that participated in a match into the provided `slots`, and
+    /// returns the matching pattern ID. The contents of the slots for patterns
+    /// other than the matching pattern are unspecified. If no match was found,
+    /// then `None` is returned and the contents of `slots` is unspecified.
+    ///
+    /// This is like [`PikeVM::search`], but it accepts a raw slots slice
+    /// instead of a `Captures` value. This is useful in contexts where you
+    /// don't want or need to allocate a `Captures`.
+    ///
+    /// It is legal to pass _any_ number of slots to this routine. If the regex
+    /// engine would otherwise write a slot offset that doesn't fit in the
+    /// provided slice, then it is simply skipped. In general though, there are
+    /// usually three slice lengths you might want to use:
+    ///
+    /// * An empty slice, if you only care about which pattern matched.
+    /// * A slice with
+    /// [`pattern_len() * 2`](crate::nfa::thompson::NFA::pattern_len)
+    /// slots, if you only care about the overall match spans for each matching
+    /// pattern.
+    /// * A slice with
+    /// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which
+    /// permits recording match offsets for every capturing group in every
+    /// pattern.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to find the overall match offsets in a
+    /// multi-pattern search without allocating a `Captures` value. Indeed, we
+    /// can put our slots right on the stack.
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID, Input};
+    ///
+    /// let re = PikeVM::new_many(&[
+    ///     r"\pL+",
+    ///     r"\d+",
+    /// ])?;
+    /// let mut cache = re.create_cache();
+    /// let input = Input::new("!@#123");
+    ///
+    /// // We only care about the overall match offsets here, so we just
+    /// // allocate two slots for each pattern. Each slot records the start
+    /// // and end of the match.
+    /// let mut slots = [None; 4];
+    /// let pid = re.search_slots(&mut cache, &input, &mut slots);
+    /// assert_eq!(Some(PatternID::must(1)), pid);
+    ///
+    /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'.
+    /// // See 'GroupInfo' for more details on the mapping between groups and
+    /// // slot indices.
+    /// let slot_start = pid.unwrap().as_usize() * 2;
+    /// let slot_end = slot_start + 1;
+    /// assert_eq!(Some(3), slots[slot_start].map(|s| s.get()));
+    /// assert_eq!(Some(6), slots[slot_end].map(|s| s.get()));
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn search_slots(
+        &self,
+        cache: &mut Cache,
+        input: &Input<'_>,
+        slots: &mut [Option<NonMaxUsize>],
+    ) -> Option<PatternID> {
+        let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
+        if !utf8empty {
+            let hm = self.search_slots_imp(cache, input, slots)?;
+            return Some(hm.pattern());
+        }
+        // There is an unfortunate special case where if the regex can
+        // match the empty string and UTF-8 mode is enabled, the search
+        // implementation requires that the slots have at least as much space
+        // to report the bounds of any match. This is so zero-width matches
+        // that split a codepoint can be filtered out.
+        //
+        // Note that if utf8empty is true, we specialize the case for when
+        // the number of patterns is 1. In that case, we can just use a stack
+        // allocation. Otherwise we resort to a heap allocation, which we
+        // convince ourselves we're fine with due to the pathological nature of
+        // this case.
+        let min = self.get_nfa().group_info().implicit_slot_len();
+        if slots.len() >= min {
+            let hm = self.search_slots_imp(cache, input, slots)?;
+            return Some(hm.pattern());
+        }
+        if self.get_nfa().pattern_len() == 1 {
+            let mut enough = [None, None];
+            let got = self.search_slots_imp(cache, input, &mut enough);
+            // This is OK because we know `enough` is strictly bigger than
+            // `slots`, otherwise this special case isn't reached.
+            slots.copy_from_slice(&enough[..slots.len()]);
+            return got.map(|hm| hm.pattern());
+        }
+        let mut enough = vec![None; min];
+        let got = self.search_slots_imp(cache, input, &mut enough);
+        // This is OK because we know `enough` is strictly bigger than `slots`,
+        // otherwise this special case isn't reached.
+        slots.copy_from_slice(&enough[..slots.len()]);
+        got.map(|hm| hm.pattern())
+    }
+
+    /// This is the actual implementation of `search_slots_imp` that
+    /// doesn't account for the special case when 1) the NFA has UTF-8 mode
+    /// enabled, 2) the NFA can match the empty string and 3) the caller has
+    /// provided an insufficient number of slots to record match offsets.
+    #[inline(never)]
+    fn search_slots_imp(
+        &self,
+        cache: &mut Cache,
+        input: &Input<'_>,
+        slots: &mut [Option<NonMaxUsize>],
+    ) -> Option<HalfMatch> {
+        let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
+        let hm = match self.search_imp(cache, input, slots) {
+            None => return None,
+            Some(hm) if !utf8empty => return Some(hm),
+            Some(hm) => hm,
+        };
+        empty::skip_splits_fwd(input, hm, hm.offset(), |input| {
+            Ok(self
+                .search_imp(cache, input, slots)
+                .map(|hm| (hm, hm.offset())))
+        })
+        // OK because the PikeVM never errors.
+        .unwrap()
+    }
+
+    /// Writes the set of patterns that match anywhere in the given search
+    /// configuration to `patset`. If multiple patterns match at the same
+    /// position and this `PikeVM` was configured with [`MatchKind::All`]
+    /// semantics, then all matching patterns are written to the given set.
+    ///
+    /// Unless all of the patterns in this `PikeVM` are anchored, then
+    /// generally speaking, this will visit every byte in the haystack.
+    ///
+    /// This search routine *does not* clear the pattern set. This gives some
+    /// flexibility to the caller (e.g., running multiple searches with the
+    /// same pattern set), but does make the API bug-prone if you're reusing
+    /// the same pattern set for multiple searches but intended them to be
+    /// independent.
+    ///
+    /// If a pattern ID matched but the given `PatternSet` does not have
+    /// sufficient capacity to store it, then it is not inserted and silently
+    /// dropped.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to find all matching patterns in a haystack,
+    /// even when some patterns match at the same position as other patterns.
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{
+    ///     nfa::thompson::pikevm::PikeVM,
+    ///     Input, MatchKind, PatternSet,
+    /// };
+    ///
+    /// let patterns = &[
+    ///     r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar",
+    /// ];
+    /// let re = PikeVM::builder()
+    ///     .configure(PikeVM::config().match_kind(MatchKind::All))
+    ///     .build_many(patterns)?;
+    /// let mut cache = re.create_cache();
+    ///
+    /// let input = Input::new("foobar");
+    /// let mut patset = PatternSet::new(re.pattern_len());
+    /// re.which_overlapping_matches(&mut cache, &input, &mut patset);
+    /// let expected = vec![0, 2, 3, 4, 6];
+    /// let got: Vec<usize> = patset.iter().map(|p| p.as_usize()).collect();
+    /// assert_eq!(expected, got);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    #[inline]
+    pub fn which_overlapping_matches(
+        &self,
+        cache: &mut Cache,
+        input: &Input<'_>,
+        patset: &mut PatternSet,
+    ) {
+        self.which_overlapping_imp(cache, input, patset)
+    }
+}
+
+impl PikeVM {
+    /// The implementation of standard leftmost search.
+    ///
+    /// Capturing group spans are written to `slots`, but only if requested.
+    /// `slots` can be any length. Any slot in the NFA that is activated but
+    /// which is out of bounds for the given `slots` is ignored.
+    fn search_imp(
+        &self,
+        cache: &mut Cache,
+        input: &Input<'_>,
+        slots: &mut [Option<NonMaxUsize>],
+    ) -> Option<HalfMatch> {
+        cache.setup_search(slots.len());
+        if input.is_done() {
+            return None;
+        }
+        // Why do we even care about this? Well, in our 'Captures'
+        // representation, we use usize::MAX as a sentinel to indicate "no
+        // match." This isn't problematic so long as our haystack doesn't have
+        // a maximal length. Byte slices are guaranteed by Rust to have a
+        // length that fits into isize, and so this assert should always pass.
+        // But we put it here to make our assumption explicit.
+        assert!(
+            input.haystack().len() < core::usize::MAX,
+            "byte slice lengths must be less than usize MAX",
+        );
+        instrument!(|c| c.reset(&self.nfa));
+
+        // Whether we want to visit all match states instead of emulating the
+        // 'leftmost' semantics of typical backtracking regex engines.
+        let allmatches =
+            self.config.get_match_kind().continue_past_first_match();
+        let (anchored, start_id) = match self.start_config(input) {
+            None => return None,
+            Some(config) => config,
+        };
+
+        let pre =
+            if anchored { None } else { self.get_config().get_prefilter() };
+        let Cache { ref mut stack, ref mut curr, ref mut next } = cache;
+        let mut hm = None;
+        // Yes, our search doesn't end at input.end(), but includes it. This
+        // is necessary because matches are delayed by one byte, just like
+        // how the DFA engines work. The delay is used to handle look-behind
+        // assertions. In the case of the PikeVM, the delay is implemented
+        // by not considering a match to exist until it is visited in
+        // 'steps'. Technically, we know a match exists in the previous
+        // iteration via 'epsilon_closure'. (It's the same thing in NFA-to-DFA
+        // determinization. We don't mark a DFA state as a match state if it
+        // contains an NFA match state, but rather, whether the DFA state was
+        // generated by a transition from a DFA state that contains an NFA
+        // match state.)
+        let mut at = input.start();
+        while at <= input.end() {
+            // If we have no states left to visit, then there are some cases
+            // where we know we can quit early or even skip ahead.
+            if curr.set.is_empty() {
+                // We have a match and we haven't been instructed to continue
+                // on even after finding a match, so we can quit.
+                if hm.is_some() && !allmatches {
+                    break;
+                }
+                // If we're running an anchored search and we've advanced
+                // beyond the start position with no other states to try, then
+                // we will never observe a match and thus can stop.
+                if anchored && at > input.start() {
+                    break;
+                }
+                // If there no states left to explore at this position and we
+                // know we can't terminate early, then we are effectively at
+                // the starting state of the NFA. If we fell through here,
+                // we'd end up adding our '(?s-u:.)*?' prefix and it would be
+                // the only thing in 'curr'. So we might as well just skip
+                // ahead until we find something that we know might advance us
+                // forward.
+                if let Some(ref pre) = pre {
+                    let span = Span::from(at..input.end());
+                    match pre.find(input.haystack(), span) {
+                        None => break,
+                        Some(ref span) => at = span.start,
+                    }
                 }
-                // TODO: prefilter
             }
-            if (!anchored && matched_pid.is_none())
-                || cache.clist.set.is_empty()
+            // Instead of using the NFA's unanchored start state, we actually
+            // always use its anchored starting state. As a result, when doing
+            // an unanchored search, we need to simulate our own '(?s-u:.)*?'
+            // prefix, to permit a match to appear anywhere.
+            //
+            // Now, we don't *have* to do things this way. We could use the
+            // NFA's unanchored starting state and do one 'epsilon_closure'
+            // call from that starting state before the main loop here. And
+            // that is just as correct. However, it turns out to be slower
+            // than our approach here because it slightly increases the cost
+            // of processing each byte by requiring us to visit more NFA
+            // states to deal with the additional NFA states in the unanchored
+            // prefix. By simulating it explicitly here, we lower those costs
+            // substantially. The cost is itself small, but it adds up for
+            // large haystacks.
+            //
+            // In order to simulate the '(?s-u:.)*?' prefix---which is not
+            // greedy---we are careful not to perform an epsilon closure on
+            // the start state if we already have a match. Namely, if we
+            // did otherwise, we would never reach a terminating condition
+            // because there would always be additional states to process.
+            // In effect, the exclusion of running 'epsilon_closure' when
+            // we have a match corresponds to the "dead" states we have in
+            // our DFA regex engines. Namely, in a DFA, match states merely
+            // instruct the search execution to record the current offset as
+            // the most recently seen match. It is the dead state that actually
+            // indicates when to stop the search (other than EOF or quit
+            // states).
+            //
+            // However, when 'allmatches' is true, the caller has asked us to
+            // leave in every possible match state. This tends not to make a
+            // whole lot of sense in unanchored searches, because it means the
+            // search really cannot terminate until EOF. And often, in that
+            // case, you wind up skipping over a bunch of matches and are left
+            // with the "last" match. Arguably, it just doesn't make a lot of
+            // sense to run a 'leftmost' search (which is what this routine is)
+            // with 'allmatches' set to true. But the DFAs support it and this
+            // matches their behavior. (Generally, 'allmatches' is useful for
+            // overlapping searches or leftmost anchored searches to find the
+            // longest possible match by ignoring match priority.)
+            //
+            // Additionally, when we're running an anchored search, this
+            // epsilon closure should only be computed at the beginning of the
+            // search. If we re-computed it at every position, we would be
+            // simulating an unanchored search when we were tasked to perform
+            // an anchored search.
+            if (!hm.is_some() || allmatches)
+                && (!anchored || at == input.start())
             {
-                self.epsilon_closure(
-                    &mut cache.clist,
-                    &mut caps.slots,
-                    &mut cache.stack,
-                    self.nfa.start_anchored(),
-                    haystack,
-                    at,
-                );
+                // Since we are adding to the 'curr' active states and since
+                // this is for the start ID, we use a slots slice that is
+                // guaranteed to have the right length but where every element
+                // is absent. This is exactly what we want, because this
+                // epsilon closure is responsible for simulating an unanchored
+                // '(?s:.)*?' prefix. It is specifically outside of any
+                // capturing groups, and thus, using slots that are always
+                // absent is correct.
+                //
+                // Note though that we can't just use '&mut []' here, since
+                // this epsilon closure may traverse through 'Captures' epsilon
+                // transitions, and thus must be able to write offsets to the
+                // slots given which are later copied to slot values in 'curr'.
+                let slots = next.slot_table.all_absent();
+                self.epsilon_closure(stack, slots, curr, input, at, start_id);
             }
-            for i in 0..cache.clist.set.len() {
-                let sid = cache.clist.set.get(i);
-                let pid = match self.step(
-                    &mut cache.nlist,
-                    &mut caps.slots,
-                    cache.clist.caps(sid),
-                    &mut cache.stack,
-                    sid,
-                    haystack,
-                    at,
-                ) {
-                    None => continue,
-                    Some(pid) => pid,
-                };
-                matched_pid = Some(pid);
-                break;
+            if let Some(pid) = self.nexts(stack, curr, next, input, at, slots)
+            {
+                hm = Some(HalfMatch::new(pid, at));
             }
-            if at >= end {
+            // Unless the caller asked us to return early, we need to mush on
+            // to see if we can extend our match. (But note that 'nexts' will
+            // quit right after seeing a match when match_kind==LeftmostFirst,
+            // as is consistent with leftmost-first match priority.)
+            if input.get_earliest() && hm.is_some() {
                 break;
             }
+            core::mem::swap(curr, next);
+            next.set.clear();
             at += 1;
-            cache.swap();
-            cache.nlist.set.clear();
         }
-        matched_pid.map(|pid| {
-            let slots = self.nfa.pattern_slots(pid);
-            let (start, end) = (slots.start, slots.start + 1);
-            MultiMatch::new(
-                pid,
-                caps.slots[start].unwrap(),
-                caps.slots[end].unwrap(),
-            )
-        })
+        instrument!(|c| c.eprint(&self.nfa));
+        hm
+    }
+
+    /// The implementation for the 'which_overlapping_matches' API. Basically,
+    /// we do a single scan through the entire haystack (unless our regex
+    /// or search is anchored) and record every pattern that matched. In
+    /// particular, when MatchKind::All is used, this supports overlapping
+    /// matches. So if we have the regexes 'sam' and 'samwise', they will
+    /// *both* be reported in the pattern set when searching the haystack
+    /// 'samwise'.
+    fn which_overlapping_imp(
+        &self,
+        cache: &mut Cache,
+        input: &Input<'_>,
+        patset: &mut PatternSet,
+    ) {
+        // NOTE: This is effectively a copy of 'search_imp' above, but with no
+        // captures support and instead writes patterns that matched directly
+        // to 'patset'. See that routine for better commentary about what's
+        // going on in this routine. We probably could unify the routines using
+        // generics or more helper routines, but I'm not sure it's worth it.
+        //
+        // NOTE: We somewhat go out of our way here to support things like
+        // 'input.get_earliest()' and 'leftmost-first' match semantics. Neither
+        // of those seem particularly relevant to this routine, but they are
+        // both supported by the DFA analogs of this routine by construction
+        // and composition, so it seems like good sense to have the PikeVM
+        // match that behavior.
+
+        cache.setup_search(0);
+        if input.is_done() {
+            return;
+        }
+        assert!(
+            input.haystack().len() < core::usize::MAX,
+            "byte slice lengths must be less than usize MAX",
+        );
+        instrument!(|c| c.reset(&self.nfa));
+
+        let allmatches =
+            self.config.get_match_kind().continue_past_first_match();
+        let (anchored, start_id) = match self.start_config(input) {
+            None => return,
+            Some(config) => config,
+        };
+
+        let Cache { ref mut stack, ref mut curr, ref mut next } = cache;
+        for at in input.start()..=input.end() {
+            let any_matches = !patset.is_empty();
+            if curr.set.is_empty() {
+                if any_matches && !allmatches {
+                    break;
+                }
+                if anchored && at > input.start() {
+                    break;
+                }
+            }
+            if !any_matches || allmatches {
+                let slots = &mut [];
+                self.epsilon_closure(stack, slots, curr, input, at, start_id);
+            }
+            self.nexts_overlapping(stack, curr, next, input, at, patset);
+            // If we found a match and filled our set, then there is no more
+            // additional info that we can provide. Thus, we can quit. We also
+            // quit if the caller asked us to stop at the earliest point that
+            // we know a match exists.
+            if patset.is_full() || input.get_earliest() {
+                break;
+            }
+            core::mem::swap(curr, next);
+            next.set.clear();
+        }
+        instrument!(|c| c.eprint(&self.nfa));
+    }
+
+    /// Process the active states in 'curr' to find the states (written to
+    /// 'next') we should process for the next byte in the haystack.
+    ///
+    /// 'stack' is used to perform a depth first traversal of the NFA when
+    /// computing an epsilon closure.
+    ///
+    /// When a match is found, the slots for that match state (in 'curr') are
+    /// copied to 'caps'. Moreover, once a match is seen, processing for 'curr'
+    /// stops (unless the PikeVM was configured with MatchKind::All semantics).
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn nexts(
+        &self,
+        stack: &mut Vec<FollowEpsilon>,
+        curr: &mut ActiveStates,
+        next: &mut ActiveStates,
+        input: &Input<'_>,
+        at: usize,
+        slots: &mut [Option<NonMaxUsize>],
+    ) -> Option<PatternID> {
+        instrument!(|c| c.record_state_set(&curr.set));
+        let mut pid = None;
+        let ActiveStates { ref set, ref mut slot_table } = *curr;
+        for sid in set.iter() {
+            pid = match self.next(stack, slot_table, next, input, at, sid) {
+                None => continue,
+                Some(pid) => Some(pid),
+            };
+            slots.copy_from_slice(slot_table.for_state(sid));
+            if !self.config.get_match_kind().continue_past_first_match() {
+                break;
+            }
+        }
+        pid
     }
 
-    #[inline(always)]
-    fn step(
+    /// Like 'nexts', but for the overlapping case. This doesn't write any
+    /// slots, and instead just writes which pattern matched in 'patset'.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn nexts_overlapping(
         &self,
-        nlist: &mut Threads,
-        slots: &mut [Slot],
-        thread_caps: &mut [Slot],
         stack: &mut Vec<FollowEpsilon>,
-        sid: StateID,
-        haystack: &[u8],
+        curr: &mut ActiveStates,
+        next: &mut ActiveStates,
+        input: &Input<'_>,
         at: usize,
+        patset: &mut PatternSet,
+    ) {
+        instrument!(|c| c.record_state_set(&curr.set));
+        let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
+        let ActiveStates { ref set, ref mut slot_table } = *curr;
+        for sid in set.iter() {
+            let pid = match self.next(stack, slot_table, next, input, at, sid)
+            {
+                None => continue,
+                Some(pid) => pid,
+            };
+            // This handles the case of finding a zero-width match that splits
+            // a codepoint. Namely, if we're in UTF-8 mode AND we know we can
+            // match the empty string, then the only valid way of getting to
+            // this point with an offset that splits a codepoint is when we
+            // have an empty match. Such matches, in UTF-8 mode, must not be
+            // reported. So we just skip them here and pretend as if we did
+            // not see a match.
+            if utf8empty && !input.is_char_boundary(at) {
+                continue;
+            }
+            let _ = patset.try_insert(pid);
+            if !self.config.get_match_kind().continue_past_first_match() {
+                break;
+            }
+        }
+    }
+
+    /// Starting from 'sid', if the position 'at' in the 'input' haystack has a
+    /// transition defined out of 'sid', then add the state transitioned to and
+    /// its epsilon closure to the 'next' set of states to explore.
+    ///
+    /// 'stack' is used by the epsilon closure computation to perform a depth
+    /// first traversal of the NFA.
+    ///
+    /// 'curr_slot_table' should be the table of slots for the current set of
+    /// states being explored. If there is a transition out of 'sid', then
+    /// sid's row in the slot table is used to perform the epsilon closure.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn next(
+        &self,
+        stack: &mut Vec<FollowEpsilon>,
+        curr_slot_table: &mut SlotTable,
+        next: &mut ActiveStates,
+        input: &Input<'_>,
+        at: usize,
+        sid: StateID,
     ) -> Option<PatternID> {
+        instrument!(|c| c.record_step(sid));
         match *self.nfa.state(sid) {
             State::Fail
             | State::Look { .. }
             | State::Union { .. }
+            | State::BinaryUnion { .. }
             | State::Capture { .. } => None,
-            State::Range { ref range } => {
-                if range.matches(haystack, at) {
+            State::ByteRange { ref trans } => {
+                if trans.matches(input.haystack(), at) {
+                    let slots = curr_slot_table.for_state(sid);
+                    // OK because 'at <= haystack.len() < usize::MAX', so
+                    // adding 1 will never wrap.
+                    let at = at.wrapping_add(1);
                     self.epsilon_closure(
-                        nlist,
-                        thread_caps,
-                        stack,
-                        range.next,
-                        haystack,
-                        at + 1,
+                        stack, slots, next, input, at, trans.next,
                     );
                 }
                 None
             }
             State::Sparse(ref sparse) => {
-                if let Some(next) = sparse.matches(haystack, at) {
+                if let Some(next_sid) = sparse.matches(input.haystack(), at) {
+                    let slots = curr_slot_table.for_state(sid);
+                    // OK because 'at <= haystack.len() < usize::MAX', so
+                    // adding 1 will never wrap.
+                    let at = at.wrapping_add(1);
                     self.epsilon_closure(
-                        nlist,
-                        thread_caps,
-                        stack,
-                        next,
-                        haystack,
-                        at + 1,
+                        stack, slots, next, input, at, next_sid,
                     );
                 }
                 None
             }
-            State::Match { id } => {
-                slots.copy_from_slice(thread_caps);
-                Some(id)
+            State::Dense(ref dense) => {
+                if let Some(next_sid) = dense.matches(input.haystack(), at) {
+                    let slots = curr_slot_table.for_state(sid);
+                    // OK because 'at <= haystack.len() < usize::MAX', so
+                    // adding 1 will never wrap.
+                    let at = at.wrapping_add(1);
+                    self.epsilon_closure(
+                        stack, slots, next, input, at, next_sid,
+                    );
+                }
+                None
             }
+            State::Match { pattern_id } => Some(pattern_id),
         }
     }
 
-    #[inline(always)]
+    /// Compute the epsilon closure of 'sid', writing the closure into 'next'
+    /// while copying slot values from 'curr_slots' into corresponding states
+    /// in 'next'. 'curr_slots' should be the slot values corresponding to
+    /// 'sid'.
+    ///
+    /// The given 'stack' is used to perform a depth first traversal of the
+    /// NFA by recursively following all epsilon transitions out of 'sid'.
+    /// Conditional epsilon transitions are followed if and only if they are
+    /// satisfied for the position 'at' in the 'input' haystack.
+    ///
+    /// While this routine may write to 'curr_slots', once it returns, any
+    /// writes are undone and the original values (even if absent) are
+    /// restored.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn epsilon_closure(
         &self,
-        nlist: &mut Threads,
-        thread_caps: &mut [Slot],
         stack: &mut Vec<FollowEpsilon>,
-        sid: StateID,
-        haystack: &[u8],
+        curr_slots: &mut [Option<NonMaxUsize>],
+        next: &mut ActiveStates,
+        input: &Input<'_>,
         at: usize,
+        sid: StateID,
     ) {
-        stack.push(FollowEpsilon::StateID(sid));
+        instrument!(|c| {
+            c.record_closure(sid);
+            c.record_stack_push(sid);
+        });
+        stack.push(FollowEpsilon::Explore(sid));
         while let Some(frame) = stack.pop() {
             match frame {
-                FollowEpsilon::StateID(sid) => {
-                    self.epsilon_closure_step(
-                        nlist,
-                        thread_caps,
-                        stack,
-                        sid,
-                        haystack,
-                        at,
-                    );
+                FollowEpsilon::RestoreCapture { slot, offset: pos } => {
+                    curr_slots[slot] = pos;
                 }
-                FollowEpsilon::Capture { slot, pos } => {
-                    thread_caps[slot] = pos;
+                FollowEpsilon::Explore(sid) => {
+                    self.epsilon_closure_explore(
+                        stack, curr_slots, next, input, at, sid,
+                    );
                 }
             }
         }
     }
 
-    #[inline(always)]
-    fn epsilon_closure_step(
+    /// Explore all of the epsilon transitions out of 'sid'. This is mostly
+    /// split out from 'epsilon_closure' in order to clearly delineate
+    /// the actual work of computing an epsilon closure from the stack
+    /// book-keeping.
+    ///
+    /// This will push any additional explorations needed on to 'stack'.
+    ///
+    /// 'curr_slots' should refer to the slots for the currently active NFA
+    /// state. That is, the current state we are stepping through. These
+    /// slots are mutated in place as new 'Captures' states are traversed
+    /// during epsilon closure, but the slots are restored to their original
+    /// values once the full epsilon closure is completed. The ultimate use of
+    /// 'curr_slots' is to copy them to the corresponding 'next_slots', so that
+    /// the capturing group spans are forwarded from the currently active state
+    /// to the next.
+    ///
+    /// 'next' refers to the next set of active states. Computing an epsilon
+    /// closure may increase the next set of active states.
+    ///
+    /// 'input' refers to the caller's input configuration and 'at' refers to
+    /// the current position in the haystack. These are used to check whether
+    /// conditional epsilon transitions (like look-around) are satisfied at
+    /// the current position. If they aren't, then the epsilon closure won't
+    /// include them.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn epsilon_closure_explore(
         &self,
-        nlist: &mut Threads,
-        thread_caps: &mut [Slot],
         stack: &mut Vec<FollowEpsilon>,
-        mut sid: StateID,
-        haystack: &[u8],
+        curr_slots: &mut [Option<NonMaxUsize>],
+        next: &mut ActiveStates,
+        input: &Input<'_>,
         at: usize,
+        mut sid: StateID,
     ) {
+        // We can avoid pushing some state IDs on to our stack in precisely
+        // the cases where a 'push(x)' would be immediately followed by a 'x
+        // = pop()'. This is achieved by this outer-loop. We simply set 'sid'
+        // to be the next state ID we want to explore once we're done with
+        // our initial exploration. In practice, this avoids a lot of stack
+        // thrashing.
         loop {
-            if !nlist.set.insert(sid) {
+            instrument!(|c| c.record_set_insert(sid));
+            // Record this state as part of our next set of active states. If
+            // we've already explored it, then no need to do it again.
+            if !next.set.insert(sid) {
                 return;
             }
             match *self.nfa.state(sid) {
                 State::Fail
-                | State::Range { .. }
+                | State::Match { .. }
+                | State::ByteRange { .. }
                 | State::Sparse { .. }
-                | State::Match { .. } => {
-                    let t = &mut nlist.caps(sid);
-                    t.copy_from_slice(thread_caps);
+                | State::Dense { .. } => {
+                    next.slot_table.for_state(sid).copy_from_slice(curr_slots);
                     return;
                 }
                 State::Look { look, next } => {
-                    if !look.matches(haystack, at) {
+                    // OK because we don't permit building a searcher with a
+                    // Unicode word boundary if the requisite Unicode data is
+                    // unavailable.
+                    if !self.nfa.look_matcher().matches_inline(
+                        look,
+                        input.haystack(),
+                        at,
+                    ) {
                         return;
                     }
                     sid = next;
@@ -381,174 +1710,650 @@ impl PikeVM {
                         None => return,
                         Some(&sid) => sid,
                     };
+                    instrument!(|c| {
+                        for &alt in &alternates[1..] {
+                            c.record_stack_push(alt);
+                        }
+                    });
                     stack.extend(
                         alternates[1..]
                             .iter()
                             .copied()
                             .rev()
-                            .map(FollowEpsilon::StateID),
+                            .map(FollowEpsilon::Explore),
                     );
                 }
-                State::Capture { next, slot } => {
-                    if slot < thread_caps.len() {
-                        stack.push(FollowEpsilon::Capture {
+                State::BinaryUnion { alt1, alt2 } => {
+                    sid = alt1;
+                    instrument!(|c| c.record_stack_push(sid));
+                    stack.push(FollowEpsilon::Explore(alt2));
+                }
+                State::Capture { next, slot, .. } => {
+                    // There's no need to do anything with slots that
+                    // ultimately won't be copied into the caller-provided
+                    // 'Captures' value. So we just skip dealing with them at
+                    // all.
+                    if slot.as_usize() < curr_slots.len() {
+                        instrument!(|c| c.record_stack_push(sid));
+                        stack.push(FollowEpsilon::RestoreCapture {
                             slot,
-                            pos: thread_caps[slot],
+                            offset: curr_slots[slot],
                         });
-                        thread_caps[slot] = Some(at);
+                        // OK because length of a slice must fit into an isize.
+                        curr_slots[slot] = Some(NonMaxUsize::new(at).unwrap());
                     }
                     sid = next;
                 }
             }
         }
     }
+
+    /// Return the starting configuration of a PikeVM search.
+    ///
+    /// The "start config" is basically whether the search should be anchored
+    /// or not and the NFA state ID at which to begin the search. The state ID
+    /// returned always corresponds to an anchored starting state even when the
+    /// search is unanchored. This is because the PikeVM search loop deals with
+    /// unanchored searches with an explicit epsilon closure out of the start
+    /// state.
+    ///
+    /// This routine accounts for both the caller's `Input` configuration
+    /// and the pattern itself. For example, even if the caller asks for an
+    /// unanchored search, if the pattern itself is anchored, then this will
+    /// always return 'true' because implementing an unanchored search in that
+    /// case would be incorrect.
+    ///
+    /// Similarly, if the caller requests an anchored search for a particular
+    /// pattern, then the starting state ID returned will reflect that.
+    ///
+    /// If a pattern ID is given in the input configuration that is not in
+    /// this regex, then `None` is returned.
+    fn start_config(&self, input: &Input<'_>) -> Option<(bool, StateID)> {
+        match input.get_anchored() {
+            // Only way we're unanchored is if both the caller asked for an
+            // unanchored search *and* the pattern is itself not anchored.
+            Anchored::No => Some((
+                self.nfa.is_always_start_anchored(),
+                self.nfa.start_anchored(),
+            )),
+            Anchored::Yes => Some((true, self.nfa.start_anchored())),
+            Anchored::Pattern(pid) => {
+                Some((true, self.nfa.start_pattern(pid)?))
+            }
+        }
+    }
 }
 
-/// An iterator over all non-overlapping leftmost matches for a particular
-/// infallible search.
+/// An iterator over all non-overlapping matches for a particular search.
 ///
-/// The iterator yields a [`MultiMatch`] value until no more matches could be
-/// found. If the underlying search returns an error, then this panics.
+/// The iterator yields a [`Match`] value until no more matches could be found.
 ///
-/// The lifetime variables are as follows:
+/// The lifetime parameters are as follows:
 ///
-/// * `'r` is the lifetime of the regular expression itself.
-/// * `'c` is the lifetime of the mutable cache used during search.
-/// * `'t` is the lifetime of the text being searched.
+/// * `'r` represents the lifetime of the PikeVM.
+/// * `'c` represents the lifetime of the PikeVM's cache.
+/// * `'h` represents the lifetime of the haystack being searched.
+///
+/// This iterator can be created with the [`PikeVM::find_iter`] method.
 #[derive(Debug)]
-pub struct FindLeftmostMatches<'r, 'c, 't> {
-    vm: &'r PikeVM,
+pub struct FindMatches<'r, 'c, 'h> {
+    re: &'r PikeVM,
     cache: &'c mut Cache,
-    // scanner: Option<prefilter::Scanner<'r>>,
-    text: &'t [u8],
-    last_end: usize,
-    last_match: Option<usize>,
+    caps: Captures,
+    it: iter::Searcher<'h>,
 }
 
-impl<'r, 'c, 't> FindLeftmostMatches<'r, 'c, 't> {
-    fn new(
-        vm: &'r PikeVM,
-        cache: &'c mut Cache,
-        text: &'t [u8],
-    ) -> FindLeftmostMatches<'r, 'c, 't> {
-        FindLeftmostMatches { vm, cache, text, last_end: 0, last_match: None }
+impl<'r, 'c, 'h> Iterator for FindMatches<'r, 'c, 'h> {
+    type Item = Match;
+
+    #[inline]
+    fn next(&mut self) -> Option<Match> {
+        // Splitting 'self' apart seems necessary to appease borrowck.
+        let FindMatches { re, ref mut cache, ref mut caps, ref mut it } =
+            *self;
+        // 'advance' converts errors into panics, which is OK here because
+        // the PikeVM can never return an error.
+        it.advance(|input| {
+            re.search(cache, input, caps);
+            Ok(caps.get_match())
+        })
     }
 }
 
-impl<'r, 'c, 't> Iterator for FindLeftmostMatches<'r, 'c, 't> {
-    // type Item = Captures;
-    type Item = MultiMatch;
+/// An iterator over all non-overlapping leftmost matches, with their capturing
+/// groups, for a particular search.
+///
+/// The iterator yields a [`Captures`] value until no more matches could be
+/// found.
+///
+/// The lifetime parameters are as follows:
+///
+/// * `'r` represents the lifetime of the PikeVM.
+/// * `'c` represents the lifetime of the PikeVM's cache.
+/// * `'h` represents the lifetime of the haystack being searched.
+///
+/// This iterator can be created with the [`PikeVM::captures_iter`] method.
+#[derive(Debug)]
+pub struct CapturesMatches<'r, 'c, 'h> {
+    re: &'r PikeVM,
+    cache: &'c mut Cache,
+    caps: Captures,
+    it: iter::Searcher<'h>,
+}
 
-    // fn next(&mut self) -> Option<Captures> {
-    fn next(&mut self) -> Option<MultiMatch> {
-        if self.last_end > self.text.len() {
-            return None;
-        }
-        let mut caps = self.vm.create_captures();
-        let m = self.vm.find_leftmost_at(
-            self.cache,
-            self.text,
-            self.last_end,
-            self.text.len(),
-            &mut caps,
-        )?;
-        if m.is_empty() {
-            // This is an empty match. To ensure we make progress, start
-            // the next search at the smallest possible starting position
-            // of the next match following this one.
-            self.last_end = if self.vm.config.get_utf8() {
-                crate::util::next_utf8(self.text, m.end())
-            } else {
-                m.end() + 1
-            };
-            // Don't accept empty matches immediately following a match.
-            // Just move on to the next match.
-            if Some(m.end()) == self.last_match {
-                return self.next();
-            }
+impl<'r, 'c, 'h> Iterator for CapturesMatches<'r, 'c, 'h> {
+    type Item = Captures;
+
+    #[inline]
+    fn next(&mut self) -> Option<Captures> {
+        // Splitting 'self' apart seems necessary to appease borrowck.
+        let CapturesMatches { re, ref mut cache, ref mut caps, ref mut it } =
+            *self;
+        // 'advance' converts errors into panics, which is OK here because
+        // the PikeVM can never return an error.
+        it.advance(|input| {
+            re.search(cache, input, caps);
+            Ok(caps.get_match())
+        });
+        if caps.is_match() {
+            Some(caps.clone())
         } else {
-            self.last_end = m.end();
+            None
         }
-        self.last_match = Some(m.end());
-        Some(m)
     }
 }
 
+/// A cache represents mutable state that a [`PikeVM`] requires during a
+/// search.
+///
+/// For a given [`PikeVM`], its corresponding cache may be created either via
+/// [`PikeVM::create_cache`], or via [`Cache::new`]. They are equivalent in
+/// every way, except the former does not require explicitly importing `Cache`.
+///
+/// A particular `Cache` is coupled with the [`PikeVM`] from which it
+/// was created. It may only be used with that `PikeVM`. A cache and its
+/// allocations may be re-purposed via [`Cache::reset`], in which case, it can
+/// only be used with the new `PikeVM` (and not the old one).
 #[derive(Clone, Debug)]
-pub struct Captures {
-    slots: Vec<Slot>,
+pub struct Cache {
+    /// Stack used while computing epsilon closure. This effectively lets us
+    /// move what is more naturally expressed through recursion to a stack
+    /// on the heap.
+    stack: Vec<FollowEpsilon>,
+    /// The current active states being explored for the current byte in the
+    /// haystack.
+    curr: ActiveStates,
+    /// The next set of states we're building that will be explored for the
+    /// next byte in the haystack.
+    next: ActiveStates,
 }
 
-impl Captures {
-    pub fn new(nfa: &NFA) -> Captures {
-        Captures { slots: vec![None; nfa.capture_slot_len()] }
+impl Cache {
+    /// Create a new [`PikeVM`] cache.
+    ///
+    /// A potentially more convenient routine to create a cache is
+    /// [`PikeVM::create_cache`], as it does not require also importing the
+    /// `Cache` type.
+    ///
+    /// If you want to reuse the returned `Cache` with some other `PikeVM`,
+    /// then you must call [`Cache::reset`] with the desired `PikeVM`.
+    pub fn new(re: &PikeVM) -> Cache {
+        Cache {
+            stack: vec![],
+            curr: ActiveStates::new(re),
+            next: ActiveStates::new(re),
+        }
+    }
+
+    /// Reset this cache such that it can be used for searching with a
+    /// different [`PikeVM`].
+    ///
+    /// A cache reset permits reusing memory already allocated in this cache
+    /// with a different `PikeVM`.
+    ///
+    /// # Example
+    ///
+    /// This shows how to re-purpose a cache for use with a different `PikeVM`.
+    ///
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
+    ///
+    /// let re1 = PikeVM::new(r"\w")?;
+    /// let re2 = PikeVM::new(r"\W")?;
+    ///
+    /// let mut cache = re1.create_cache();
+    /// assert_eq!(
+    ///     Some(Match::must(0, 0..2)),
+    ///     re1.find_iter(&mut cache, "Δ").next(),
+    /// );
+    ///
+    /// // Using 'cache' with re2 is not allowed. It may result in panics or
+    /// // incorrect results. In order to re-purpose the cache, we must reset
+    /// // it with the PikeVM we'd like to use it with.
+    /// //
+    /// // Similarly, after this reset, using the cache with 're1' is also not
+    /// // allowed.
+    /// cache.reset(&re2);
+    /// assert_eq!(
+    ///     Some(Match::must(0, 0..3)),
+    ///     re2.find_iter(&mut cache, "☃").next(),
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn reset(&mut self, re: &PikeVM) {
+        self.curr.reset(re);
+        self.next.reset(re);
+    }
+
+    /// Returns the heap memory usage, in bytes, of this cache.
+    ///
+    /// This does **not** include the stack size used up by this cache. To
+    /// compute that, use `std::mem::size_of::<Cache>()`.
+    pub fn memory_usage(&self) -> usize {
+        use core::mem::size_of;
+        (self.stack.len() * size_of::<FollowEpsilon>())
+            + self.curr.memory_usage()
+            + self.next.memory_usage()
+    }
+
+    /// Clears this cache. This should be called at the start of every search
+    /// to ensure we start with a clean slate.
+    ///
+    /// This also sets the length of the capturing groups used in the current
+    /// search. This permits an optimization where by 'SlotTable::for_state'
+    /// only returns the number of slots equivalent to the number of slots
+    /// given in the 'Captures' value. This may be less than the total number
+    /// of possible slots, e.g., when one only wants to track overall match
+    /// offsets. This in turn permits less copying of capturing group spans
+    /// in the PikeVM.
+    fn setup_search(&mut self, captures_slot_len: usize) {
+        self.stack.clear();
+        self.curr.setup_search(captures_slot_len);
+        self.next.setup_search(captures_slot_len);
     }
 }
 
+/// A set of active states used to "simulate" the execution of an NFA via the
+/// PikeVM.
+///
+/// There are two sets of these used during NFA simulation. One set corresponds
+/// to the "current" set of states being traversed for the current position
+/// in a haystack. The other set corresponds to the "next" set of states being
+/// built, which will become the new "current" set for the next position in the
+/// haystack. These two sets correspond to CLIST and NLIST in Thompson's
+/// original paper regexes: https://dl.acm.org/doi/pdf/10.1145/363347.363387
+///
+/// In addition to representing a set of NFA states, this also maintains slot
+/// values for each state. These slot values are what turn the NFA simulation
+/// into the "Pike VM." Namely, they track capturing group values for each
+/// state. During the computation of epsilon closure, we copy slot values from
+/// states in the "current" set to the "next" set. Eventually, once a match
+/// is found, the slot values for that match state are what we write to the
+/// caller provided 'Captures' value.
 #[derive(Clone, Debug)]
-pub struct Cache {
-    stack: Vec<FollowEpsilon>,
-    clist: Threads,
-    nlist: Threads,
+struct ActiveStates {
+    /// The set of active NFA states. This set preserves insertion order, which
+    /// is critical for simulating the match semantics of backtracking regex
+    /// engines.
+    set: SparseSet,
+    /// The slots for every NFA state, where each slot stores a (possibly
+    /// absent) offset. Every capturing group has two slots. One for a start
+    /// offset and one for an end offset.
+    slot_table: SlotTable,
 }
 
-type Slot = Option<usize>;
+impl ActiveStates {
+    /// Create a new set of active states for the given PikeVM. The active
+    /// states returned may only be used with the given PikeVM. (Use 'reset'
+    /// to re-purpose the allocation for a different PikeVM.)
+    fn new(re: &PikeVM) -> ActiveStates {
+        let mut active = ActiveStates {
+            set: SparseSet::new(0),
+            slot_table: SlotTable::new(),
+        };
+        active.reset(re);
+        active
+    }
+
+    /// Reset this set of active states such that it can be used with the given
+    /// PikeVM (and only that PikeVM).
+    fn reset(&mut self, re: &PikeVM) {
+        self.set.resize(re.get_nfa().states().len());
+        self.slot_table.reset(re);
+    }
+
+    /// Return the heap memory usage, in bytes, used by this set of active
+    /// states.
+    ///
+    /// This does not include the stack size of this value.
+    fn memory_usage(&self) -> usize {
+        self.set.memory_usage() + self.slot_table.memory_usage()
+    }
 
+    /// Setup this set of active states for a new search. The given slot
+    /// length should be the number of slots in a caller provided 'Captures'
+    /// (and may be zero).
+    fn setup_search(&mut self, captures_slot_len: usize) {
+        self.set.clear();
+        self.slot_table.setup_search(captures_slot_len);
+    }
+}
+
+/// A table of slots, where each row represent a state in an NFA. Thus, the
+/// table has room for storing slots for every single state in an NFA.
+///
+/// This table is represented with a single contiguous allocation. In general,
+/// the notion of "capturing group" doesn't really exist at this level of
+/// abstraction, hence the name "slot" instead. (Indeed, every capturing group
+/// maps to a pair of slots, one for the start offset and one for the end
+/// offset.) Slots are indexed by the 'Captures' NFA state.
+///
+/// N.B. Not every state actually needs a row of slots. Namely, states that
+/// only have epsilon transitions currently never have anything written to
+/// their rows in this table. Thus, the table is somewhat wasteful in its heap
+/// usage. However, it is important to maintain fast random access by state
+/// ID, which means one giant table tends to work well. RE2 takes a different
+/// approach here and allocates each row as its own reference counted thing.
+/// I explored such a strategy at one point here, but couldn't get it to work
+/// well using entirely safe code. (To the ambitious reader: I encourage you to
+/// re-litigate that experiment.) I very much wanted to stick to safe code, but
+/// could be convinced otherwise if there was a solid argument and the safety
+/// was encapsulated well.
 #[derive(Clone, Debug)]
-struct Threads {
-    set: SparseSet,
-    caps: Vec<Slot>,
-    slots_per_thread: usize,
+struct SlotTable {
+    /// The actual table of offsets.
+    table: Vec<Option<NonMaxUsize>>,
+    /// The number of slots per state, i.e., the table's stride or the length
+    /// of each row.
+    slots_per_state: usize,
+    /// The number of slots in the caller-provided 'Captures' value for the
+    /// current search. Setting this to 'slots_per_state' is always correct,
+    /// but may be wasteful.
+    slots_for_captures: usize,
+}
+
+impl SlotTable {
+    /// Create a new slot table.
+    ///
+    /// One should call 'reset' with the corresponding PikeVM before use.
+    fn new() -> SlotTable {
+        SlotTable { table: vec![], slots_for_captures: 0, slots_per_state: 0 }
+    }
+
+    /// Reset this slot table such that it can be used with the given PikeVM
+    /// (and only that PikeVM).
+    fn reset(&mut self, re: &PikeVM) {
+        let nfa = re.get_nfa();
+        self.slots_per_state = nfa.group_info().slot_len();
+        // This is always correct, but may be reduced for a particular search
+        // if a 'Captures' has fewer slots, e.g., none at all or only slots
+        // for tracking the overall match instead of all slots for every
+        // group.
+        self.slots_for_captures = core::cmp::max(
+            self.slots_per_state,
+            nfa.pattern_len().checked_mul(2).unwrap(),
+        );
+        let len = nfa
+            .states()
+            .len()
+            .checked_mul(self.slots_per_state)
+            // Add space to account for scratch space used during a search.
+            .and_then(|x| x.checked_add(self.slots_for_captures))
+            // It seems like this could actually panic on legitimate inputs on
+            // 32-bit targets, and very likely to panic on 16-bit. Should we
+            // somehow convert this to an error? What about something similar
+            // for the lazy DFA cache? If you're tripping this assert, please
+            // file a bug.
+            .expect("slot table length doesn't overflow");
+        // This happens about as often as a regex is compiled, so it probably
+        // should be at debug level, but I found it quite distracting and not
+        // particularly useful.
+        trace!(
+            "resizing PikeVM active states table to {} entries \
+             (slots_per_state={})",
+            len,
+            self.slots_per_state,
+        );
+        self.table.resize(len, None);
+    }
+
+    /// Return the heap memory usage, in bytes, used by this slot table.
+    ///
+    /// This does not include the stack size of this value.
+    fn memory_usage(&self) -> usize {
+        self.table.len() * core::mem::size_of::<Option<NonMaxUsize>>()
+    }
+
+    /// Perform any per-search setup for this slot table.
+    ///
+    /// In particular, this sets the length of the number of slots used in the
+    /// 'Captures' given by the caller (if any at all). This number may be
+    /// smaller than the total number of slots available, e.g., when the caller
+    /// is only interested in tracking the overall match and not the spans of
+    /// every matching capturing group. Only tracking the overall match can
+    /// save a substantial amount of time copying capturing spans during a
+    /// search.
+    fn setup_search(&mut self, captures_slot_len: usize) {
+        self.slots_for_captures = captures_slot_len;
+    }
+
+    /// Return a mutable slice of the slots for the given state.
+    ///
+    /// Note that the length of the slice returned may be less than the total
+    /// number of slots available for this state. In particular, the length
+    /// always matches the number of slots indicated via 'setup_search'.
+    fn for_state(&mut self, sid: StateID) -> &mut [Option<NonMaxUsize>] {
+        let i = sid.as_usize() * self.slots_per_state;
+        &mut self.table[i..i + self.slots_for_captures]
+    }
+
+    /// Return a slice of slots of appropriate length where every slot offset
+    /// is guaranteed to be absent. This is useful in cases where you need to
+    /// compute an epsilon closure outside of the user supplied regex, and thus
+    /// never want it to have any capturing slots set.
+    fn all_absent(&mut self) -> &mut [Option<NonMaxUsize>] {
+        let i = self.table.len() - self.slots_for_captures;
+        &mut self.table[i..i + self.slots_for_captures]
+    }
 }
 
+/// Represents a stack frame for use while computing an epsilon closure.
+///
+/// (An "epsilon closure" refers to the set of reachable NFA states from a
+/// single state without consuming any input. That is, the set of all epsilon
+/// transitions not only from that single state, but from every other state
+/// reachable by an epsilon transition as well. This is why it's called a
+/// "closure." Computing an epsilon closure is also done during DFA
+/// determinization! Compare and contrast the epsilon closure here in this
+/// PikeVM and the one used for determinization in crate::util::determinize.)
+///
+/// Computing the epsilon closure in a Thompson NFA proceeds via a depth
+/// first traversal over all epsilon transitions from a particular state.
+/// (A depth first traversal is important because it emulates the same priority
+/// of matches that is typically found in backtracking regex engines.) This
+/// depth first traversal is naturally expressed using recursion, but to avoid
+/// a call stack size proportional to the size of a regex, we put our stack on
+/// the heap instead.
+///
+/// This stack thus consists of call frames. The typical call frame is
+/// `Explore`, which instructs epsilon closure to explore the epsilon
+/// transitions from that state. (Subsequent epsilon transitions are then
+/// pushed on to the stack as more `Explore` frames.) If the state ID being
+/// explored has no epsilon transitions, then the capturing group slots are
+/// copied from the original state that sparked the epsilon closure (from the
+/// 'step' routine) to the state ID being explored. This way, capturing group
+/// slots are forwarded from the previous state to the next.
+///
+/// The other stack frame, `RestoreCaptures`, instructs the epsilon closure to
+/// set the position for a particular slot back to some particular offset. This
+/// frame is pushed when `Explore` sees a `Capture` transition. `Explore` will
+/// set the offset of the slot indicated in `Capture` to the current offset,
+/// and then push the old offset on to the stack as a `RestoreCapture` frame.
+/// Thus, the new offset is only used until the epsilon closure reverts back to
+/// the `RestoreCapture` frame. In effect, this gives the `Capture` epsilon
+/// transition its "scope" to only states that come "after" it during depth
+/// first traversal.
 #[derive(Clone, Debug)]
 enum FollowEpsilon {
-    StateID(StateID),
-    Capture { slot: usize, pos: Slot },
+    /// Explore the epsilon transitions from a state ID.
+    Explore(StateID),
+    /// Reset the given `slot` to the given `offset` (which might be `None`).
+    RestoreCapture { slot: SmallIndex, offset: Option<NonMaxUsize> },
 }
 
-impl Cache {
-    pub fn new(nfa: &NFA) -> Cache {
-        Cache {
-            stack: vec![],
-            clist: Threads::new(nfa),
-            nlist: Threads::new(nfa),
+/// A set of counters that "instruments" a PikeVM search. To enable this, you
+/// must enable the 'internal-instrument-pikevm' feature. Then run your Rust
+/// program with RUST_LOG=regex_automata::nfa::thompson::pikevm=trace set in
+/// the environment. The metrics collected will be dumped automatically for
+/// every search executed by the PikeVM.
+///
+/// NOTE: When 'internal-instrument-pikevm' is enabled, it will likely cause an
+/// absolute decrease in wall-clock performance, even if the 'trace' log level
+/// isn't enabled. (Although, we do try to avoid extra costs when 'trace' isn't
+/// enabled.) The main point of instrumentation is to get counts of various
+/// events that occur during the PikeVM's execution.
+///
+/// This is a somewhat hacked together collection of metrics that are useful
+/// to gather from a PikeVM search. In particular, it lets us scrutinize the
+/// performance profile of a search beyond what general purpose profiling tools
+/// give us. Namely, we orient the profiling data around the specific states of
+/// the NFA.
+///
+/// In other words, this lets us see which parts of the NFA graph are most
+/// frequently activated. This then provides direction for optimization
+/// opportunities.
+///
+/// The really sad part about this is that it absolutely clutters up the PikeVM
+/// implementation. :'( Another approach would be to just manually add this
+/// code in whenever I want this kind of profiling data, but it's complicated
+/// and tedious enough that I went with this approach... for now.
+///
+/// When instrumentation is enabled (which also turns on 'logging'), then a
+/// `Counters` is initialized for every search and `trace`'d just before the
+/// search returns to the caller.
+///
+/// Tip: When debugging performance problems with the PikeVM, it's best to try
+/// to work with an NFA that is as small as possible. Otherwise the state graph
+/// is likely to be too big to digest.
+#[cfg(feature = "internal-instrument-pikevm")]
+#[derive(Clone, Debug)]
+struct Counters {
+    /// The number of times the NFA is in a particular permutation of states.
+    state_sets: alloc::collections::BTreeMap<Vec<StateID>, u64>,
+    /// The number of times 'step' is called for a particular state ID (which
+    /// indexes this array).
+    steps: Vec<u64>,
+    /// The number of times an epsilon closure was computed for a state.
+    closures: Vec<u64>,
+    /// The number of times a particular state ID is pushed on to a stack while
+    /// computing an epsilon closure.
+    stack_pushes: Vec<u64>,
+    /// The number of times a particular state ID is inserted into a sparse set
+    /// while computing an epsilon closure.
+    set_inserts: Vec<u64>,
+}
+
+#[cfg(feature = "internal-instrument-pikevm")]
+impl Counters {
+    fn empty() -> Counters {
+        Counters {
+            state_sets: alloc::collections::BTreeMap::new(),
+            steps: vec![],
+            closures: vec![],
+            stack_pushes: vec![],
+            set_inserts: vec![],
         }
     }
 
-    fn clear(&mut self) {
-        self.stack.clear();
-        self.clist.set.clear();
-        self.nlist.set.clear();
+    fn reset(&mut self, nfa: &NFA) {
+        let len = nfa.states().len();
+
+        self.state_sets.clear();
+
+        self.steps.clear();
+        self.steps.resize(len, 0);
+
+        self.closures.clear();
+        self.closures.resize(len, 0);
+
+        self.stack_pushes.clear();
+        self.stack_pushes.resize(len, 0);
+
+        self.set_inserts.clear();
+        self.set_inserts.resize(len, 0);
+    }
+
+    fn eprint(&self, nfa: &NFA) {
+        trace!("===== START PikeVM Instrumentation Output =====");
+        // We take the top-K most occurring state sets. Otherwise the output
+        // is likely to be overwhelming. And we probably only care about the
+        // most frequently occurring ones anyway.
+        const LIMIT: usize = 20;
+        let mut set_counts =
+            self.state_sets.iter().collect::<Vec<(&Vec<StateID>, &u64)>>();
+        set_counts.sort_by_key(|(_, &count)| core::cmp::Reverse(count));
+        trace!("## PikeVM frequency of state sets (top {})", LIMIT);
+        for (set, count) in set_counts.iter().take(LIMIT) {
+            trace!("{:?}: {}", set, count);
+        }
+        if set_counts.len() > LIMIT {
+            trace!(
+                "... {} sets omitted (out of {} total)",
+                set_counts.len() - LIMIT,
+                set_counts.len(),
+            );
+        }
+
+        trace!("");
+        trace!("## PikeVM total frequency of events");
+        trace!(
+            "steps: {}, closures: {}, stack-pushes: {}, set-inserts: {}",
+            self.steps.iter().copied().sum::<u64>(),
+            self.closures.iter().copied().sum::<u64>(),
+            self.stack_pushes.iter().copied().sum::<u64>(),
+            self.set_inserts.iter().copied().sum::<u64>(),
+        );
+
+        trace!("");
+        trace!("## PikeVM frequency of events broken down by state");
+        for sid in 0..self.steps.len() {
+            trace!(
+                "{:06}: steps: {}, closures: {}, \
+                 stack-pushes: {}, set-inserts: {}",
+                sid,
+                self.steps[sid],
+                self.closures[sid],
+                self.stack_pushes[sid],
+                self.set_inserts[sid],
+            );
+        }
+
+        trace!("");
+        trace!("## NFA debug display");
+        trace!("{:?}", nfa);
+        trace!("===== END PikeVM Instrumentation Output =====");
     }
 
-    fn swap(&mut self) {
-        core::mem::swap(&mut self.clist, &mut self.nlist);
+    fn record_state_set(&mut self, set: &SparseSet) {
+        let set = set.iter().collect::<Vec<StateID>>();
+        *self.state_sets.entry(set).or_insert(0) += 1;
     }
-}
 
-impl Threads {
-    fn new(nfa: &NFA) -> Threads {
-        let mut threads = Threads {
-            set: SparseSet::new(0),
-            caps: vec![],
-            slots_per_thread: 0,
-        };
-        threads.resize(nfa);
-        threads
+    fn record_step(&mut self, sid: StateID) {
+        self.steps[sid] += 1;
     }
 
-    fn resize(&mut self, nfa: &NFA) {
-        if nfa.states().len() == self.set.capacity() {
-            return;
-        }
-        self.slots_per_thread = nfa.capture_slot_len();
-        self.set.resize(nfa.states().len());
-        self.caps.resize(self.slots_per_thread * nfa.states().len(), None);
+    fn record_closure(&mut self, sid: StateID) {
+        self.closures[sid] += 1;
+    }
+
+    fn record_stack_push(&mut self, sid: StateID) {
+        self.stack_pushes[sid] += 1;
     }
 
-    fn caps(&mut self, sid: StateID) -> &mut [Slot] {
-        let i = sid.as_usize() * self.slots_per_thread;
-        &mut self.caps[i..i + self.slots_per_thread]
+    fn record_set_insert(&mut self, sid: StateID) {
+        self.set_inserts[sid] += 1;
     }
 }