1 files changed, 1614 insertions, 1050 deletions
diff --git a/vendor/regex-automata/src/hybrid/dfa.rs b/vendor/regex-automata/src/hybrid/dfa.rs
index 1fbce5f5f..67261c1a3 100644
--- a/vendor/regex-automata/src/hybrid/dfa.rs
+++ b/vendor/regex-automata/src/hybrid/dfa.rs
@@ -7,36 +7,47 @@ This module also contains a [`hybrid::dfa::Builder`](Builder) and a
 [`hybrid::dfa::Config`](Config) for configuring and building a lazy DFA.
 */
 
-use core::{borrow::Borrow, iter, mem::size_of};
+use core::{iter, mem::size_of};
 
-use alloc::{sync::Arc, vec::Vec};
+use alloc::vec::Vec;
 
 use crate::{
     hybrid::{
         error::{BuildError, CacheError},
-        id::{LazyStateID, LazyStateIDError, OverlappingState},
+        id::{LazyStateID, LazyStateIDError},
         search,
     },
     nfa::thompson,
     util::{
         alphabet::{self, ByteClasses, ByteSet},
         determinize::{self, State, StateBuilderEmpty, StateBuilderNFA},
-        id::{PatternID, StateID as NFAStateID},
-        matchtypes::{HalfMatch, MatchError, MatchKind},
-        prefilter,
+        empty,
+        prefilter::Prefilter,
+        primitives::{PatternID, StateID as NFAStateID},
+        search::{
+            Anchored, HalfMatch, Input, MatchError, MatchKind, PatternSet,
+        },
         sparse_set::SparseSets,
-        start::Start,
+        start::{Start, StartByteMap},
     },
 };
 
-/// The mininum number of states that a lazy DFA's cache size must support.
+/// The minimum number of states that a lazy DFA's cache size must support.
 ///
 /// This is checked at time of construction to ensure that at least some small
 /// number of states can fit in the given capacity allotment. If we can't fit
 /// at least this number of states, then the thinking is that it's pretty
 /// senseless to use the lazy DFA. More to the point, parts of the code do
 /// assume that the cache can fit at least some small number of states.
-const MIN_STATES: usize = 5;
+const MIN_STATES: usize = SENTINEL_STATES + 2;
+
+/// The number of "sentinel" states that get added to every lazy DFA.
+///
+/// These are special states indicating status conditions of a search: unknown,
+/// dead and quit. These states in particular also use zero NFA states, so
+/// their memory usage is quite small. This is relevant for computing the
+/// minimum memory needed for a lazy DFA cache.
+const SENTINEL_STATES: usize = 3;
 
 /// A hybrid NFA/DFA (also called a "lazy DFA") for regex searching.
 ///
@@ -92,26 +103,26 @@ const MIN_STATES: usize = 5;
 /// a cache and pass it to our search routine.
 ///
 /// ```
-/// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
+/// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input};
 ///
 /// let dfa = DFA::new("foo[0-9]+")?;
 /// let mut cache = dfa.create_cache();
 ///
 /// let expected = Some(HalfMatch::must(0, 8));
-/// assert_eq!(expected, dfa.find_leftmost_fwd(&mut cache, b"foo12345")?);
+/// assert_eq!(expected, dfa.try_search_fwd(
+///     &mut cache, &Input::new("foo12345"))?,
+/// );
 /// # Ok::<(), Box<dyn std::error::Error>>(())
 /// ```
 #[derive(Clone, Debug)]
 pub struct DFA {
-    nfa: Arc<thompson::NFA>,
+    config: Config,
+    nfa: thompson::NFA,
     stride2: usize,
+    start_map: StartByteMap,
     classes: ByteClasses,
     quitset: ByteSet,
-    anchored: bool,
-    match_kind: MatchKind,
-    starts_for_each_pattern: bool,
     cache_capacity: usize,
-    minimum_cache_clear_count: Option<usize>,
 }
 
 impl DFA {
@@ -124,7 +135,7 @@ impl DFA {
     /// # Example
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
+    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input};
     ///
     /// let dfa = DFA::new("foo[0-9]+bar")?;
     /// let mut cache = dfa.create_cache();
@@ -132,10 +143,11 @@ impl DFA {
     /// let expected = HalfMatch::must(0, 11);
     /// assert_eq!(
     ///     Some(expected),
-    ///     dfa.find_leftmost_fwd(&mut cache, b"foo12345bar")?,
+    ///     dfa.try_search_fwd(&mut cache, &Input::new("foo12345bar"))?,
     /// );
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
+    #[cfg(feature = "syntax")]
     pub fn new(pattern: &str) -> Result<DFA, BuildError> {
         DFA::builder().build(pattern)
     }
@@ -149,7 +161,7 @@ impl DFA {
     /// # Example
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
+    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input};
     ///
     /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+"])?;
     /// let mut cache = dfa.create_cache();
@@ -157,10 +169,11 @@ impl DFA {
     /// let expected = HalfMatch::must(1, 3);
     /// assert_eq!(
     ///     Some(expected),
-    ///     dfa.find_leftmost_fwd(&mut cache, b"foo12345bar")?,
+    ///     dfa.try_search_fwd(&mut cache, &Input::new("foo12345bar"))?,
     /// );
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
+    #[cfg(feature = "syntax")]
     pub fn new_many<P: AsRef<str>>(patterns: &[P]) -> Result<DFA, BuildError> {
         DFA::builder().build_many(patterns)
     }
@@ -170,19 +183,23 @@ impl DFA {
     /// # Example
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
+    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input};
     ///
     /// let dfa = DFA::always_match()?;
     /// let mut cache = dfa.create_cache();
     ///
     /// let expected = HalfMatch::must(0, 0);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(&mut cache, b"")?);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(&mut cache, b"foo")?);
+    /// assert_eq!(Some(expected), dfa.try_search_fwd(
+    ///     &mut cache, &Input::new(""))?,
+    /// );
+    /// assert_eq!(Some(expected), dfa.try_search_fwd(
+    ///     &mut cache, &Input::new("foo"))?,
+    /// );
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn always_match() -> Result<DFA, BuildError> {
         let nfa = thompson::NFA::always_match();
-        Builder::new().build_from_nfa(Arc::new(nfa))
+        Builder::new().build_from_nfa(nfa)
     }
 
     /// Create a new lazy DFA that never matches any input.
@@ -190,44 +207,51 @@ impl DFA {
     /// # Example
     ///
     /// ```
-    /// use regex_automata::hybrid::dfa::DFA;
+    /// use regex_automata::{hybrid::dfa::DFA, Input};
     ///
     /// let dfa = DFA::never_match()?;
     /// let mut cache = dfa.create_cache();
     ///
-    /// assert_eq!(None, dfa.find_leftmost_fwd(&mut cache, b"")?);
-    /// assert_eq!(None, dfa.find_leftmost_fwd(&mut cache, b"foo")?);
+    /// assert_eq!(None, dfa.try_search_fwd(&mut cache, &Input::new(""))?);
+    /// assert_eq!(None, dfa.try_search_fwd(&mut cache, &Input::new("foo"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn never_match() -> Result<DFA, BuildError> {
         let nfa = thompson::NFA::never_match();
-        Builder::new().build_from_nfa(Arc::new(nfa))
+        Builder::new().build_from_nfa(nfa)
     }
 
     /// Return a default configuration for a `DFA`.
     ///
-    /// This is a convenience routine to avoid needing to import the `Config`
+    /// This is a convenience routine to avoid needing to import the [`Config`]
     /// type when customizing the construction of a lazy DFA.
     ///
     /// # Example
     ///
-    /// This example shows how to build a lazy DFA that only executes searches
-    /// in anchored mode.
+    /// This example shows how to build a lazy DFA that heuristically supports
+    /// Unicode word boundaries.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchError, Input};
     ///
     /// let re = DFA::builder()
-    ///     .configure(DFA::config().anchored(true))
-    ///     .build(r"[0-9]+")?;
+    ///     .configure(DFA::config().unicode_word_boundary(true))
+    ///     .build(r"\b\w+\b")?;
     /// let mut cache = re.create_cache();
     ///
-    /// let haystack = "abc123xyz".as_bytes();
-    /// assert_eq!(None, re.find_leftmost_fwd(&mut cache, haystack)?);
-    /// assert_eq!(
-    ///     Some(HalfMatch::must(0, 3)),
-    ///     re.find_leftmost_fwd(&mut cache, &haystack[3..6])?,
-    /// );
+    /// // Since our haystack is all ASCII, the DFA search sees then and knows
+    /// // it is legal to interpret Unicode word boundaries as ASCII word
+    /// // boundaries.
+    /// let input = Input::new("!!foo!!");
+    /// let expected = HalfMatch::must(0, 5);
+    /// assert_eq!(Some(expected), re.try_search_fwd(&mut cache, &input)?);
+    ///
+    /// // But if our haystack contains non-ASCII, then the search will fail
+    /// // with an error.
+    /// let input = Input::new("!!βββ!!");
+    /// let expected = MatchError::quit(b'\xCE', 2);
+    /// assert_eq!(Err(expected), re.try_search_fwd(&mut cache, &input));
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
@@ -243,29 +267,20 @@ impl DFA {
     /// # Example
     ///
     /// This example shows how to use the builder to disable UTF-8 mode
-    /// everywhere for lazy DFAs. This includes disabling it for both the
-    /// concrete syntax (e.g., `.` matches any byte and Unicode character
-    /// classes like `\p{Letter}` are not allowed) and for the unanchored
-    /// search prefix. The latter enables the regex to match anywhere in a
-    /// sequence of arbitrary bytes. (Typically, the unanchored search prefix
-    /// will only permit matching valid UTF-8.)
+    /// everywhere for lazy DFAs.
     ///
     /// ```
-    /// use regex_automata::{
-    ///     hybrid::dfa::DFA,
-    ///     nfa::thompson,
-    ///     HalfMatch, SyntaxConfig,
-    /// };
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{hybrid::dfa::DFA, util::syntax, HalfMatch, Input};
     ///
     /// let re = DFA::builder()
-    ///     .syntax(SyntaxConfig::new().utf8(false))
-    ///     .thompson(thompson::Config::new().utf8(false))
+    ///     .syntax(syntax::Config::new().utf8(false))
     ///     .build(r"foo(?-u:[^b])ar.*")?;
     /// let mut cache = re.create_cache();
     ///
-    /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n";
+    /// let input = Input::new(b"\xFEfoo\xFFarzz\xE2\x98\xFF\n");
     /// let expected = Some(HalfMatch::must(0, 9));
-    /// let got = re.find_leftmost_fwd(&mut cache, haystack)?;
+    /// let got = re.try_search_fwd(&mut cache, &input)?;
     /// assert_eq!(expected, got);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
@@ -302,7 +317,8 @@ impl DFA {
     /// This shows how to re-purpose a cache for use with a different DFA.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input};
     ///
     /// let dfa1 = DFA::new(r"\w")?;
     /// let dfa2 = DFA::new(r"\W")?;
@@ -310,7 +326,7 @@ impl DFA {
     /// let mut cache = dfa1.create_cache();
     /// assert_eq!(
     ///     Some(HalfMatch::must(0, 2)),
-    ///     dfa1.find_leftmost_fwd(&mut cache, "Δ".as_bytes())?,
+    ///     dfa1.try_search_fwd(&mut cache, &Input::new("Δ"))?,
     /// );
     ///
     /// // Using 'cache' with dfa2 is not allowed. It may result in panics or
@@ -322,7 +338,7 @@ impl DFA {
     /// dfa2.reset_cache(&mut cache);
     /// assert_eq!(
     ///     Some(HalfMatch::must(0, 3)),
-    ///     dfa2.find_leftmost_fwd(&mut cache, "☃".as_bytes())?,
+    ///     dfa2.try_search_fwd(&mut cache, &Input::new("☃"))?,
     /// );
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
@@ -337,13 +353,13 @@ impl DFA {
     ///
     /// # Example
     ///
-    /// This example shows the pattern count for a DFA that never matches:
+    /// This example shows the pattern length for a DFA that never matches:
     ///
     /// ```
     /// use regex_automata::hybrid::dfa::DFA;
     ///
     /// let dfa = DFA::never_match()?;
-    /// assert_eq!(dfa.pattern_count(), 0);
+    /// assert_eq!(dfa.pattern_len(), 0);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     ///
@@ -353,7 +369,7 @@ impl DFA {
     /// use regex_automata::hybrid::dfa::DFA;
     ///
     /// let dfa = DFA::always_match()?;
-    /// assert_eq!(dfa.pattern_count(), 1);
+    /// assert_eq!(dfa.pattern_len(), 1);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     ///
@@ -363,15 +379,37 @@ impl DFA {
     /// use regex_automata::hybrid::dfa::DFA;
     ///
     /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?;
-    /// assert_eq!(dfa.pattern_count(), 3);
+    /// assert_eq!(dfa.pattern_len(), 3);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
-    pub fn pattern_count(&self) -> usize {
+    pub fn pattern_len(&self) -> usize {
         self.nfa.pattern_len()
     }
 
+    /// Returns the equivalence classes that make up the alphabet for this DFA.
+    ///
+    /// Unless [`Config::byte_classes`] was disabled, it is possible that
+    /// multiple distinct bytes are grouped into the same equivalence class
+    /// if it is impossible for them to discriminate between a match and a
+    /// non-match. This has the effect of reducing the overall alphabet size
+    /// and in turn potentially substantially reducing the size of the DFA's
+    /// transition table.
+    ///
+    /// The downside of using equivalence classes like this is that every state
+    /// transition will automatically use this map to convert an arbitrary
+    /// byte to its corresponding equivalence class. In practice this has a
+    /// negligible impact on performance.
+    pub fn byte_classes(&self) -> &ByteClasses {
+        &self.classes
+    }
+
+    /// Returns this lazy DFA's configuration.
+    pub fn get_config(&self) -> &Config {
+        &self.config
+    }
+
     /// Returns a reference to the underlying NFA.
-    pub fn nfa(&self) -> &Arc<thompson::NFA> {
+    pub fn get_nfa(&self) -> &thompson::NFA {
         &self.nfa
     }
 
@@ -393,253 +431,231 @@ impl DFA {
         1 << self.stride2()
     }
 
-    /// Returns the total number of elements in the alphabet for this
-    /// transition table. This is always less than or equal to `self.stride()`.
-    /// It is only equal when the alphabet length is a power of 2. Otherwise,
-    /// it is always strictly less.
-    fn alphabet_len(&self) -> usize {
-        self.classes.alphabet_len()
-    }
-
     /// Returns the memory usage, in bytes, of this lazy DFA.
     ///
     /// This does **not** include the stack size used up by this lazy DFA. To
-    /// compute that, use `std::mem::size_of::<DFA>()`. This also does
-    /// not include the size of the `Cache` used.
+    /// compute that, use `std::mem::size_of::<DFA>()`. This also does not
+    /// include the size of the `Cache` used.
+    ///
+    /// This also does not include any heap memory used by the NFA inside of
+    /// this hybrid NFA/DFA. This is because the NFA's ownership is shared, and
+    /// thus not owned by this hybrid NFA/DFA. More practically, several regex
+    /// engines in this crate embed an NFA, and reporting the NFA's memory
+    /// usage in all of them would likely result in reporting higher heap
+    /// memory than is actually used.
     pub fn memory_usage(&self) -> usize {
-        // Everything else is on the stack.
-        self.nfa.memory_usage()
+        // The only thing that uses heap memory in a DFA is the NFA. But the
+        // NFA has shared ownership, so reporting its memory as part of the
+        // hybrid DFA is likely to lead to double-counting the NFA memory
+        // somehow. In particular, this DFA does not really own an NFA, so
+        // including it in the DFA's memory usage doesn't seem semantically
+        // correct.
+        0
     }
 }
 
 impl DFA {
-    /// Executes a forward search and returns the end position of the first
-    /// match that is found as early as possible. If no match exists, then
-    /// `None` is returned.
-    ///
-    /// This routine stops scanning input as soon as the search observes a
-    /// match state. This is useful for implementing boolean `is_match`-like
-    /// routines, where as little work is done as possible.
+    /// Executes a forward search and returns the end position of the leftmost
+    /// match that is found. If no match exists, then `None` is returned.
     ///
-    /// See [`DFA::find_earliest_fwd_at`] for additional functionality, such as
-    /// providing a prefilter, a specific pattern to match and the bounds of
-    /// the search within the haystack. This routine is meant as a convenience
-    /// for common cases where the additional functionality is not needed.
+    /// In particular, this method continues searching even after it enters
+    /// a match state. The search only terminates once it has reached the
+    /// end of the input or when it has entered a dead or quit state. Upon
+    /// termination, the position of the last byte seen while still in a match
+    /// state is returned.
     ///
     /// # Errors
     ///
-    /// This routine only errors if the search could not complete. For
-    /// lazy DFAs generated by this crate, this only occurs in non-default
-    /// configurations where quit bytes are used, Unicode word boundaries are
-    /// heuristically enabled or limits are set on the number of times the lazy
-    /// DFA's cache may be cleared.
-    ///
-    /// When a search cannot complete, callers cannot know whether a match
+    /// This routine errors if the search could not complete. This can occur
+    /// in a number of circumstances:
+    ///
+    /// * The configuration of the lazy DFA may permit it to "quit" the search.
+    /// For example, setting quit bytes or enabling heuristic support for
+    /// Unicode word boundaries. The default configuration does not enable any
+    /// option that could result in the lazy DFA quitting.
+    /// * The configuration of the lazy DFA may also permit it to "give up"
+    /// on a search if it makes ineffective use of its transition table
+    /// cache. The default configuration does not enable this by default,
+    /// although it is typically a good idea to.
+    /// * When the provided `Input` configuration is not supported. For
+    /// example, by providing an unsupported anchor mode.
+    ///
+    /// When a search returns an error, callers cannot know whether a match
     /// exists or not.
     ///
     /// # Example
     ///
-    /// This example demonstrates how the position returned might differ from
-    /// what one might expect when executing a traditional leftmost search.
+    /// This example shows how to run a basic search.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
+    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input};
     ///
     /// let dfa = DFA::new("foo[0-9]+")?;
     /// let mut cache = dfa.create_cache();
-    /// // Normally, the end of the leftmost first match here would be 8,
-    /// // corresponding to the end of the input. But the "earliest" semantics
-    /// // this routine cause it to stop as soon as a match is known, which
-    /// // occurs once 'foo[0-9]' has matched.
-    /// let expected = HalfMatch::must(0, 4);
-    /// assert_eq!(
-    ///     Some(expected),
-    ///     dfa.find_earliest_fwd(&mut cache, b"foo12345")?,
+    /// let expected = HalfMatch::must(0, 8);
+    /// assert_eq!(Some(expected), dfa.try_search_fwd(
+    ///     &mut cache, &Input::new("foo12345"))?,
     /// );
     ///
+    /// // Even though a match is found after reading the first byte (`a`),
+    /// // the leftmost first match semantics demand that we find the earliest
+    /// // match that prefers earlier parts of the pattern over later parts.
     /// let dfa = DFA::new("abc|a")?;
     /// let mut cache = dfa.create_cache();
-    /// // Normally, the end of the leftmost first match here would be 3,
-    /// // but the shortest match semantics detect a match earlier.
-    /// let expected = HalfMatch::must(0, 1);
-    /// assert_eq!(Some(expected), dfa.find_earliest_fwd(&mut cache, b"abc")?);
+    /// let expected = HalfMatch::must(0, 3);
+    /// assert_eq!(Some(expected), dfa.try_search_fwd(
+    ///     &mut cache, &Input::new("abc"))?,
+    /// );
+    ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
-    #[inline]
-    pub fn find_earliest_fwd(
-        &self,
-        cache: &mut Cache,
-        bytes: &[u8],
-    ) -> Result<Option<HalfMatch>, MatchError> {
-        self.find_earliest_fwd_at(cache, None, None, bytes, 0, bytes.len())
-    }
-
-    /// Executes a reverse search and returns the start position of the first
-    /// match that is found as early as possible. If no match exists, then
-    /// `None` is returned.
-    ///
-    /// This routine stops scanning input as soon as the search observes a
-    /// match state.
-    ///
-    /// Note that while it is not technically necessary to build a reverse
-    /// automaton to use a reverse search, it is likely that you'll want to do
-    /// so. Namely, the typical use of a reverse search is to find the starting
-    /// location of a match once its end is discovered from a forward search. A
-    /// reverse DFA automaton can be built by configuring the intermediate NFA
-    /// to be reversed via
-    /// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::reverse).
-    ///
-    /// # Errors
-    ///
-    /// This routine only errors if the search could not complete. For
-    /// lazy DFAs generated by this crate, this only occurs in non-default
-    /// configurations where quit bytes are used, Unicode word boundaries are
-    /// heuristically enabled or limits are set on the number of times the lazy
-    /// DFA's cache may be cleared.
     ///
-    /// When a search cannot complete, callers cannot know whether a match
-    /// exists or not.
-    ///
-    /// # Example
+    /// # Example: specific pattern search
     ///
-    /// This example demonstrates how the position returned might differ from
-    /// what one might expect when executing a traditional leftmost reverse
-    /// search.
+    /// This example shows how to build a lazy multi-DFA that permits searching
+    /// for specific patterns.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, nfa::thompson, HalfMatch};
-    ///
-    /// let dfa = DFA::builder()
-    ///     .thompson(thompson::Config::new().reverse(true))
-    ///     .build("[a-z]+[0-9]+")?;
-    /// let mut cache = dfa.create_cache();
-    /// // Normally, the end of the leftmost first match here would be 0,
-    /// // corresponding to the beginning of the input. But the "earliest"
-    /// // semantics of this routine cause it to stop as soon as a match is
-    /// // known, which occurs once '[a-z][0-9]+' has matched.
-    /// let expected = HalfMatch::must(0, 2);
-    /// assert_eq!(
-    ///     Some(expected),
-    ///     dfa.find_earliest_rev(&mut cache, b"foo12345")?,
-    /// );
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     Anchored, HalfMatch, PatternID, Input,
+    /// };
     ///
     /// let dfa = DFA::builder()
-    ///     .thompson(thompson::Config::new().reverse(true))
-    ///     .build("abc|c")?;
+    ///     .configure(DFA::config().starts_for_each_pattern(true))
+    ///     .build_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?;
     /// let mut cache = dfa.create_cache();
-    /// // Normally, the end of the leftmost first match here would be 0,
-    /// // but the shortest match semantics detect a match earlier.
-    /// let expected = HalfMatch::must(0, 2);
-    /// assert_eq!(Some(expected), dfa.find_earliest_rev(&mut cache, b"abc")?);
-    /// # Ok::<(), Box<dyn std::error::Error>>(())
-    /// ```
-    #[inline]
-    pub fn find_earliest_rev(
-        &self,
-        cache: &mut Cache,
-        bytes: &[u8],
-    ) -> Result<Option<HalfMatch>, MatchError> {
-        self.find_earliest_rev_at(cache, None, bytes, 0, bytes.len())
-    }
-
-    /// Executes a forward search and returns the end position of the leftmost
-    /// match that is found. If no match exists, then `None` is returned.
+    /// let haystack = "foo123";
     ///
-    /// In particular, this method continues searching even after it enters
-    /// a match state. The search only terminates once it has reached the
-    /// end of the input or when it has entered a dead or quit state. Upon
-    /// termination, the position of the last byte seen while still in a match
-    /// state is returned.
-    ///
-    /// # Errors
-    ///
-    /// This routine only errors if the search could not complete. For
-    /// lazy DFAs generated by this crate, this only occurs in non-default
-    /// configurations where quit bytes are used, Unicode word boundaries are
-    /// heuristically enabled or limits are set on the number of times the lazy
-    /// DFA's cache may be cleared.
+    /// // Since we are using the default leftmost-first match and both
+    /// // patterns match at the same starting position, only the first pattern
+    /// // will be returned in this case when doing a search for any of the
+    /// // patterns.
+    /// let expected = Some(HalfMatch::must(0, 6));
+    /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?;
+    /// assert_eq!(expected, got);
     ///
-    /// When a search cannot complete, callers cannot know whether a match
-    /// exists or not.
+    /// // But if we want to check whether some other pattern matches, then we
+    /// // can provide its pattern ID.
+    /// let expected = Some(HalfMatch::must(1, 6));
+    /// let input = Input::new(haystack)
+    ///     .anchored(Anchored::Pattern(PatternID::must(1)));
+    /// let got = dfa.try_search_fwd(&mut cache, &input)?;
+    /// assert_eq!(expected, got);
     ///
-    /// # Example
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     ///
-    /// Leftmost first match semantics corresponds to the match with the
-    /// smallest starting offset, but where the end offset is determined by
-    /// preferring earlier branches in the original regular expression. For
-    /// example, `Sam|Samwise` will match `Sam` in `Samwise`, but `Samwise|Sam`
-    /// will match `Samwise` in `Samwise`.
+    /// # Example: specifying the bounds of a search
     ///
-    /// Generally speaking, the "leftmost first" match is how most backtracking
-    /// regular expressions tend to work. This is in contrast to POSIX-style
-    /// regular expressions that yield "leftmost longest" matches. Namely,
-    /// both `Sam|Samwise` and `Samwise|Sam` match `Samwise` when using
-    /// leftmost longest semantics. (This crate does not currently support
-    /// leftmost longest semantics.)
+    /// This example shows how providing the bounds of a search can produce
+    /// different results than simply sub-slicing the haystack.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
+    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input};
     ///
-    /// let dfa = DFA::new("foo[0-9]+")?;
+    /// // N.B. We disable Unicode here so that we use a simple ASCII word
+    /// // boundary. Alternatively, we could enable heuristic support for
+    /// // Unicode word boundaries since our haystack is pure ASCII.
+    /// let dfa = DFA::new(r"(?-u)\b[0-9]{3}\b")?;
     /// let mut cache = dfa.create_cache();
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(
-    ///     Some(expected),
-    ///     dfa.find_leftmost_fwd(&mut cache, b"foo12345")?,
-    /// );
+    /// let haystack = "foo123bar";
     ///
-    /// // Even though a match is found after reading the first byte (`a`),
-    /// // the leftmost first match semantics demand that we find the earliest
-    /// // match that prefers earlier parts of the pattern over latter parts.
-    /// let dfa = DFA::new("abc|a")?;
-    /// let mut cache = dfa.create_cache();
-    /// let expected = HalfMatch::must(0, 3);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(&mut cache, b"abc")?);
+    /// // Since we sub-slice the haystack, the search doesn't know about the
+    /// // larger context and assumes that `123` is surrounded by word
+    /// // boundaries. And of course, the match position is reported relative
+    /// // to the sub-slice as well, which means we get `3` instead of `6`.
+    /// let expected = Some(HalfMatch::must(0, 3));
+    /// let got = dfa.try_search_fwd(
+    ///     &mut cache,
+    ///     &Input::new(&haystack[3..6]),
+    /// )?;
+    /// assert_eq!(expected, got);
+    ///
+    /// // But if we provide the bounds of the search within the context of the
+    /// // entire haystack, then the search can take the surrounding context
+    /// // into account. (And if we did find a match, it would be reported
+    /// // as a valid offset into `haystack` instead of its sub-slice.)
+    /// let expected = None;
+    /// let got = dfa.try_search_fwd(
+    ///     &mut cache,
+    ///     &Input::new(haystack).range(3..6),
+    /// )?;
+    /// assert_eq!(expected, got);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
-    pub fn find_leftmost_fwd(
+    pub fn try_search_fwd(
         &self,
         cache: &mut Cache,
-        bytes: &[u8],
+        input: &Input<'_>,
     ) -> Result<Option<HalfMatch>, MatchError> {
-        self.find_leftmost_fwd_at(cache, None, None, bytes, 0, bytes.len())
+        let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
+        let hm = match search::find_fwd(self, cache, input)? {
+            None => return Ok(None),
+            Some(hm) if !utf8empty => return Ok(Some(hm)),
+            Some(hm) => hm,
+        };
+        // We get to this point when we know our DFA can match the empty string
+        // AND when UTF-8 mode is enabled. In this case, we skip any matches
+        // whose offset splits a codepoint. Such a match is necessarily a
+        // zero-width match, because UTF-8 mode requires the underlying NFA
+        // to be built such that all non-empty matches span valid UTF-8.
+        // Therefore, any match that ends in the middle of a codepoint cannot
+        // be part of a span of valid UTF-8 and thus must be an empty match.
+        // In such cases, we skip it, so as not to report matches that split a
+        // codepoint.
+        //
+        // Note that this is not a checked assumption. Callers *can* provide an
+        // NFA with UTF-8 mode enabled but produces non-empty matches that span
+        // invalid UTF-8. But doing so is documented to result in unspecified
+        // behavior.
+        empty::skip_splits_fwd(input, hm, hm.offset(), |input| {
+            let got = search::find_fwd(self, cache, input)?;
+            Ok(got.map(|hm| (hm, hm.offset())))
+        })
     }
 
     /// Executes a reverse search and returns the start of the position of the
     /// leftmost match that is found. If no match exists, then `None` is
     /// returned.
     ///
-    /// In particular, this method continues searching even after it enters
-    /// a match state. The search only terminates once it has reached the
-    /// end of the input or when it has entered a dead or quit state. Upon
-    /// termination, the position of the last byte seen while still in a match
-    /// state is returned.
-    ///
     /// # Errors
     ///
-    /// This routine only errors if the search could not complete. For
-    /// lazy DFAs generated by this crate, this only occurs in non-default
-    /// configurations where quit bytes are used, Unicode word boundaries are
-    /// heuristically enabled or limits are set on the number of times the lazy
-    /// DFA's cache may be cleared.
-    ///
-    /// When a search cannot complete, callers cannot know whether a match
+    /// This routine errors if the search could not complete. This can occur
+    /// in a number of circumstances:
+    ///
+    /// * The configuration of the lazy DFA may permit it to "quit" the search.
+    /// For example, setting quit bytes or enabling heuristic support for
+    /// Unicode word boundaries. The default configuration does not enable any
+    /// option that could result in the lazy DFA quitting.
+    /// * The configuration of the lazy DFA may also permit it to "give up"
+    /// on a search if it makes ineffective use of its transition table
+    /// cache. The default configuration does not enable this by default,
+    /// although it is typically a good idea to.
+    /// * When the provided `Input` configuration is not supported. For
+    /// example, by providing an unsupported anchor mode.
+    ///
+    /// When a search returns an error, callers cannot know whether a match
     /// exists or not.
     ///
     /// # Example
     ///
-    /// In particular, this routine is principally
-    /// useful when used in conjunction with the
-    /// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::revers
-    /// e) configuration. In general, it's unlikely to be correct to use both
-    /// `find_leftmost_fwd` and `find_leftmost_rev` with the same DFA since
-    /// any particular DFA will only support searching in one direction with
+    /// This routine is principally useful when used in
+    /// conjunction with the
+    /// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::reverse)
+    /// configuration. In general, it's unlikely to be correct to use both
+    /// `try_search_fwd` and `try_search_rev` with the same DFA since any
+    /// particular DFA will only support searching in one direction with
     /// respect to the pattern.
     ///
     /// ```
-    /// use regex_automata::{nfa::thompson, hybrid::dfa::DFA, HalfMatch};
+    /// use regex_automata::{
+    ///     nfa::thompson,
+    ///     hybrid::dfa::DFA,
+    ///     HalfMatch, Input,
+    /// };
     ///
     /// let dfa = DFA::builder()
     ///     .thompson(thompson::Config::new().reverse(true))
@@ -648,7 +664,7 @@ impl DFA {
     /// let expected = HalfMatch::must(0, 0);
     /// assert_eq!(
     ///     Some(expected),
-    ///     dfa.find_leftmost_rev(&mut cache, b"foo12345")?,
+    ///     dfa.try_search_rev(&mut cache, &Input::new("foo12345"))?,
     /// );
     ///
     /// // Even though a match is found after reading the last byte (`c`),
@@ -659,17 +675,133 @@ impl DFA {
     ///     .build("abc|c")?;
     /// let mut cache = dfa.create_cache();
     /// let expected = HalfMatch::must(0, 0);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_rev(&mut cache, b"abc")?);
+    /// assert_eq!(Some(expected), dfa.try_search_rev(
+    ///     &mut cache, &Input::new("abc"))?,
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Example: UTF-8 mode
+    ///
+    /// This examples demonstrates that UTF-8 mode applies to reverse
+    /// DFAs. When UTF-8 mode is enabled in the underlying NFA, then all
+    /// matches reported must correspond to valid UTF-8 spans. This includes
+    /// prohibiting zero-width matches that split a codepoint.
+    ///
+    /// UTF-8 mode is enabled by default. Notice below how the only zero-width
+    /// matches reported are those at UTF-8 boundaries:
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     nfa::thompson,
+    ///     HalfMatch, Input, MatchKind,
+    /// };
+    ///
+    /// let dfa = DFA::builder()
+    ///     .thompson(thompson::Config::new().reverse(true))
+    ///     .build(r"")?;
+    /// let mut cache = dfa.create_cache();
+    ///
+    /// // Run the reverse DFA to collect all matches.
+    /// let mut input = Input::new("☃");
+    /// let mut matches = vec![];
+    /// loop {
+    ///     match dfa.try_search_rev(&mut cache, &input)? {
+    ///         None => break,
+    ///         Some(hm) => {
+    ///             matches.push(hm);
+    ///             if hm.offset() == 0 || input.end() == 0 {
+    ///                 break;
+    ///             } else if hm.offset() < input.end() {
+    ///                 input.set_end(hm.offset());
+    ///             } else {
+    ///                 // This is only necessary to handle zero-width
+    ///                 // matches, which of course occur in this example.
+    ///                 // Without this, the search would never advance
+    ///                 // backwards beyond the initial match.
+    ///                 input.set_end(input.end() - 1);
+    ///             }
+    ///         }
+    ///     }
+    /// }
+    ///
+    /// // No matches split a codepoint.
+    /// let expected = vec![
+    ///     HalfMatch::must(0, 3),
+    ///     HalfMatch::must(0, 0),
+    /// ];
+    /// assert_eq!(expected, matches);
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// Now let's look at the same example, but with UTF-8 mode on the
+    /// underlying NFA disabled:
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     nfa::thompson,
+    ///     HalfMatch, Input, MatchKind,
+    /// };
+    ///
+    /// let dfa = DFA::builder()
+    ///     .thompson(thompson::Config::new().reverse(true).utf8(false))
+    ///     .build(r"")?;
+    /// let mut cache = dfa.create_cache();
+    ///
+    /// // Run the reverse DFA to collect all matches.
+    /// let mut input = Input::new("☃");
+    /// let mut matches = vec![];
+    /// loop {
+    ///     match dfa.try_search_rev(&mut cache, &input)? {
+    ///         None => break,
+    ///         Some(hm) => {
+    ///             matches.push(hm);
+    ///             if hm.offset() == 0 || input.end() == 0 {
+    ///                 break;
+    ///             } else if hm.offset() < input.end() {
+    ///                 input.set_end(hm.offset());
+    ///             } else {
+    ///                 // This is only necessary to handle zero-width
+    ///                 // matches, which of course occur in this example.
+    ///                 // Without this, the search would never advance
+    ///                 // backwards beyond the initial match.
+    ///                 input.set_end(input.end() - 1);
+    ///             }
+    ///         }
+    ///     }
+    /// }
+    ///
+    /// // No matches split a codepoint.
+    /// let expected = vec![
+    ///     HalfMatch::must(0, 3),
+    ///     HalfMatch::must(0, 2),
+    ///     HalfMatch::must(0, 1),
+    ///     HalfMatch::must(0, 0),
+    /// ];
+    /// assert_eq!(expected, matches);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
-    pub fn find_leftmost_rev(
+    pub fn try_search_rev(
         &self,
         cache: &mut Cache,
-        bytes: &[u8],
+        input: &Input<'_>,
     ) -> Result<Option<HalfMatch>, MatchError> {
-        self.find_leftmost_rev_at(cache, None, bytes, 0, bytes.len())
+        let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
+        let hm = match search::find_rev(self, cache, input)? {
+            None => return Ok(None),
+            Some(hm) if !utf8empty => return Ok(Some(hm)),
+            Some(hm) => hm,
+        };
+        empty::skip_splits_rev(input, hm, hm.offset(), |input| {
+            let got = search::find_rev(self, cache, input)?;
+            Ok(got.map(|hm| (hm, hm.offset())))
+        })
     }
 
     /// Executes an overlapping forward search and returns the end position of
@@ -681,15 +813,34 @@ impl DFA {
     /// state from prior calls so that the implementation knows where the last
     /// match occurred.
     ///
-    /// # Errors
+    /// When using this routine to implement an iterator of overlapping
+    /// matches, the `start` of the search should remain invariant throughout
+    /// iteration. The `OverlappingState` given to the search will keep track
+    /// of the current position of the search. (This is because multiple
+    /// matches may be reported at the same position, so only the search
+    /// implementation itself knows when to advance the position.)
+    ///
+    /// If for some reason you want the search to forget about its previous
+    /// state and restart the search at a particular position, then setting the
+    /// state to [`OverlappingState::start`] will accomplish that.
     ///
-    /// This routine only errors if the search could not complete. For
-    /// lazy DFAs generated by this crate, this only occurs in non-default
-    /// configurations where quit bytes are used, Unicode word boundaries are
-    /// heuristically enabled or limits are set on the number of times the lazy
-    /// DFA's cache may be cleared.
+    /// # Errors
     ///
-    /// When a search cannot complete, callers cannot know whether a match
+    /// This routine errors if the search could not complete. This can occur
+    /// in a number of circumstances:
+    ///
+    /// * The configuration of the lazy DFA may permit it to "quit" the search.
+    /// For example, setting quit bytes or enabling heuristic support for
+    /// Unicode word boundaries. The default configuration does not enable any
+    /// option that could result in the lazy DFA quitting.
+    /// * The configuration of the lazy DFA may also permit it to "give up"
+    /// on a search if it makes ineffective use of its transition table
+    /// cache. The default configuration does not enable this by default,
+    /// although it is typically a good idea to.
+    /// * When the provided `Input` configuration is not supported. For
+    /// example, by providing an unsupported anchor mode.
+    ///
+    /// When a search returns an error, callers cannot know whether a match
     /// exists or not.
     ///
     /// # Example
@@ -708,10 +859,10 @@ impl DFA {
     /// the search to find totally new matches (potentially of other patterns).
     ///
     /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
     /// use regex_automata::{
-    ///     hybrid::{dfa::DFA, OverlappingState},
-    ///     HalfMatch,
-    ///     MatchKind,
+    ///     hybrid::dfa::{DFA, OverlappingState},
+    ///     HalfMatch, Input, MatchKind,
     /// };
     ///
     /// let dfa = DFA::builder()
@@ -719,12 +870,14 @@ impl DFA {
     ///     .build_many(&[r"\w+$", r"\S+$"])?;
     /// let mut cache = dfa.create_cache();
     ///
-    /// let haystack = "@foo".as_bytes();
+    /// let haystack = "@foo";
     /// let mut state = OverlappingState::start();
     ///
     /// let expected = Some(HalfMatch::must(1, 4));
-    /// let got = dfa.find_overlapping_fwd(&mut cache, haystack, &mut state)?;
-    /// assert_eq!(expected, got);
+    /// dfa.try_search_overlapping_fwd(
+    ///     &mut cache, &Input::new(haystack), &mut state,
+    /// )?;
+    /// assert_eq!(expected, state.get_match());
     ///
     /// // The first pattern also matches at the same position, so re-running
     /// // the search will yield another match. Notice also that the first
@@ -732,418 +885,265 @@ impl DFA {
     /// // pattern begins its match before the first, is therefore an earlier
     /// // match and is thus reported first.
     /// let expected = Some(HalfMatch::must(0, 4));
-    /// let got = dfa.find_overlapping_fwd(&mut cache, haystack, &mut state)?;
-    /// assert_eq!(expected, got);
+    /// dfa.try_search_overlapping_fwd(
+    ///     &mut cache, &Input::new(haystack), &mut state,
+    /// )?;
+    /// assert_eq!(expected, state.get_match());
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
-    pub fn find_overlapping_fwd(
+    pub fn try_search_overlapping_fwd(
         &self,
         cache: &mut Cache,
-        bytes: &[u8],
+        input: &Input<'_>,
         state: &mut OverlappingState,
-    ) -> Result<Option<HalfMatch>, MatchError> {
-        self.find_overlapping_fwd_at(
-            cache,
-            None,
-            None,
-            bytes,
-            0,
-            bytes.len(),
-            state,
-        )
+    ) -> Result<(), MatchError> {
+        let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
+        search::find_overlapping_fwd(self, cache, input, state)?;
+        match state.get_match() {
+            None => Ok(()),
+            Some(_) if !utf8empty => Ok(()),
+            Some(_) => skip_empty_utf8_splits_overlapping(
+                input,
+                state,
+                |input, state| {
+                    search::find_overlapping_fwd(self, cache, input, state)
+                },
+            ),
+        }
     }
 
-    /// Executes a forward search and returns the end position of the first
-    /// match that is found as early as possible. If no match exists, then
+    /// Executes a reverse overlapping search and returns the start of the
+    /// position of the leftmost match that is found. If no match exists, then
     /// `None` is returned.
     ///
-    /// This routine stops scanning input as soon as the search observes a
-    /// match state. This is useful for implementing boolean `is_match`-like
-    /// routines, where as little work is done as possible.
-    ///
-    /// This is like [`DFA::find_earliest_fwd`], except it provides some
-    /// additional control over how the search is executed:
-    ///
-    /// * `pre` is a prefilter scanner that, when given, is used whenever the
-    /// DFA enters its starting state. This is meant to speed up searches where
-    /// one or a small number of literal prefixes are known.
-    /// * `pattern_id` specifies a specific pattern in the DFA to run an
-    /// anchored search for. If not given, then a search for any pattern is
-    /// performed. For lazy DFAs, [`Config::starts_for_each_pattern`] must be
-    /// enabled to use this functionality.
-    /// * `start` and `end` permit searching a specific region of the haystack
-    /// `bytes`. This is useful when implementing an iterator over matches
-    /// within the same haystack, which cannot be done correctly by simply
-    /// providing a subslice of `bytes`. (Because the existence of look-around
-    /// operations such as `\b`, `^` and `$` need to take the surrounding
-    /// context into account. This cannot be done if the haystack doesn't
-    /// contain it.)
-    ///
-    /// The examples below demonstrate each of these additional parameters.
+    /// When using this routine to implement an iterator of overlapping
+    /// matches, the `start` of the search should remain invariant throughout
+    /// iteration. The `OverlappingState` given to the search will keep track
+    /// of the current position of the search. (This is because multiple
+    /// matches may be reported at the same position, so only the search
+    /// implementation itself knows when to advance the position.)
     ///
-    /// # Errors
+    /// If for some reason you want the search to forget about its previous
+    /// state and restart the search at a particular position, then setting the
+    /// state to [`OverlappingState::start`] will accomplish that.
     ///
-    /// This routine only errors if the search could not complete. For
-    /// lazy DFAs generated by this crate, this only occurs in non-default
-    /// configurations where quit bytes are used, Unicode word boundaries are
-    /// heuristically enabled or limits are set on the number of times the lazy
-    /// DFA's cache may be cleared.
+    /// # Errors
     ///
-    /// When a search cannot complete, callers cannot know whether a match
+    /// This routine errors if the search could not complete. This can occur
+    /// in a number of circumstances:
+    ///
+    /// * The configuration of the lazy DFA may permit it to "quit" the search.
+    /// For example, setting quit bytes or enabling heuristic support for
+    /// Unicode word boundaries. The default configuration does not enable any
+    /// option that could result in the lazy DFA quitting.
+    /// * The configuration of the lazy DFA may also permit it to "give up"
+    /// on a search if it makes ineffective use of its transition table
+    /// cache. The default configuration does not enable this by default,
+    /// although it is typically a good idea to.
+    /// * When the provided `Input` configuration is not supported. For
+    /// example, by providing an unsupported anchor mode.
+    ///
+    /// When a search returns an error, callers cannot know whether a match
     /// exists or not.
     ///
-    /// # Panics
-    ///
-    /// This routine panics if a `pattern_id` is given and this lazy DFA does
-    /// not support specific pattern searches.
+    /// # Example: UTF-8 mode
     ///
-    /// It also panics if the given haystack range is not valid.
+    /// This examples demonstrates that UTF-8 mode applies to reverse
+    /// DFAs. When UTF-8 mode is enabled in the underlying NFA, then all
+    /// matches reported must correspond to valid UTF-8 spans. This includes
+    /// prohibiting zero-width matches that split a codepoint.
     ///
-    /// # Example: prefilter
-    ///
-    /// This example shows how to provide a prefilter for a pattern where all
-    /// matches start with a `z` byte.
+    /// UTF-8 mode is enabled by default. Notice below how the only zero-width
+    /// matches reported are those at UTF-8 boundaries:
     ///
     /// ```
     /// use regex_automata::{
-    ///     hybrid::dfa::DFA,
-    ///     util::prefilter::{Candidate, Prefilter, Scanner, State},
-    ///     HalfMatch,
+    ///     hybrid::dfa::{DFA, OverlappingState},
+    ///     nfa::thompson,
+    ///     HalfMatch, Input, MatchKind,
     /// };
     ///
-    /// #[derive(Debug)]
-    /// pub struct ZPrefilter;
-    ///
-    /// impl Prefilter for ZPrefilter {
-    ///     fn next_candidate(
-    ///         &self,
-    ///         _: &mut State,
-    ///         haystack: &[u8],
-    ///         at: usize,
-    ///     ) -> Candidate {
-    ///         // Try changing b'z' to b'q' and observe this test fail since
-    ///         // the prefilter will skip right over the match.
-    ///         match haystack.iter().position(|&b| b == b'z') {
-    ///             None => Candidate::None,
-    ///             Some(i) => Candidate::PossibleStartOfMatch(at + i),
-    ///         }
-    ///     }
+    /// let dfa = DFA::builder()
+    ///     .configure(DFA::config().match_kind(MatchKind::All))
+    ///     .thompson(thompson::Config::new().reverse(true))
+    ///     .build_many(&[r"", r"☃"])?;
+    /// let mut cache = dfa.create_cache();
     ///
-    ///     fn heap_bytes(&self) -> usize {
-    ///         0
+    /// // Run the reverse DFA to collect all matches.
+    /// let input = Input::new("☃");
+    /// let mut state = OverlappingState::start();
+    /// let mut matches = vec![];
+    /// loop {
+    ///     dfa.try_search_overlapping_rev(&mut cache, &input, &mut state)?;
+    ///     match state.get_match() {
+    ///         None => break,
+    ///         Some(hm) => matches.push(hm),
     ///     }
     /// }
     ///
-    /// let dfa = DFA::new("z[0-9]{3}")?;
-    /// let mut cache = dfa.create_cache();
-    ///
-    /// let haystack = "foobar z123 q123".as_bytes();
-    /// // A scanner executes a prefilter while tracking some state that helps
-    /// // determine whether a prefilter is still "effective" or not.
-    /// let mut scanner = Scanner::new(&ZPrefilter);
-    ///
-    /// let expected = Some(HalfMatch::must(0, 11));
-    /// let got = dfa.find_earliest_fwd_at(
-    ///     &mut cache,
-    ///     Some(&mut scanner),
-    ///     None,
-    ///     haystack,
-    ///     0,
-    ///     haystack.len(),
-    /// )?;
-    /// assert_eq!(expected, got);
+    /// // No matches split a codepoint.
+    /// let expected = vec![
+    ///     HalfMatch::must(0, 3),
+    ///     HalfMatch::must(1, 0),
+    ///     HalfMatch::must(0, 0),
+    /// ];
+    /// assert_eq!(expected, matches);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     ///
-    /// # Example: specific pattern search
-    ///
-    /// This example shows how to build a lazy multi-DFA that permits searching
-    /// for specific patterns.
+    /// Now let's look at the same example, but with UTF-8 mode on the
+    /// underlying NFA disabled:
     ///
     /// ```
     /// use regex_automata::{
-    ///     hybrid::dfa::DFA,
-    ///     HalfMatch,
-    ///     PatternID,
+    ///     hybrid::dfa::{DFA, OverlappingState},
+    ///     nfa::thompson,
+    ///     HalfMatch, Input, MatchKind,
     /// };
     ///
     /// let dfa = DFA::builder()
-    ///     .configure(DFA::config().starts_for_each_pattern(true))
-    ///     .build_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?;
-    /// let mut cache = dfa.create_cache();
-    /// let haystack = "foo123".as_bytes();
-    ///
-    /// // Since we are using the default leftmost-first match and both
-    /// // patterns match at the same starting position, only the first pattern
-    /// // will be returned in this case when doing a search for any of the
-    /// // patterns.
-    /// let expected = Some(HalfMatch::must(0, 6));
-    /// let got = dfa.find_earliest_fwd_at(
-    ///     &mut cache,
-    ///     None,
-    ///     None,
-    ///     haystack,
-    ///     0,
-    ///     haystack.len(),
-    /// )?;
-    /// assert_eq!(expected, got);
-    ///
-    /// // But if we want to check whether some other pattern matches, then we
-    /// // can provide its pattern ID.
-    /// let expected = Some(HalfMatch::must(1, 6));
-    /// let got = dfa.find_earliest_fwd_at(
-    ///     &mut cache,
-    ///     None,
-    ///     Some(PatternID::must(1)),
-    ///     haystack,
-    ///     0,
-    ///     haystack.len(),
-    /// )?;
-    /// assert_eq!(expected, got);
-    ///
-    /// # Ok::<(), Box<dyn std::error::Error>>(())
-    /// ```
-    ///
-    /// # Example: specifying the bounds of a search
-    ///
-    /// This example shows how providing the bounds of a search can produce
-    /// different results than simply sub-slicing the haystack.
-    ///
-    /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
-    ///
-    /// // N.B. We disable Unicode here so that we use a simple ASCII word
-    /// // boundary. Alternatively, we could enable heuristic support for
-    /// // Unicode word boundaries since our haystack is pure ASCII.
-    /// let dfa = DFA::new(r"(?-u)\b[0-9]{3}\b")?;
+    ///     .configure(DFA::config().match_kind(MatchKind::All))
+    ///     .thompson(thompson::Config::new().reverse(true).utf8(false))
+    ///     .build_many(&[r"", r"☃"])?;
     /// let mut cache = dfa.create_cache();
-    /// let haystack = "foo123bar".as_bytes();
     ///
-    /// // Since we sub-slice the haystack, the search doesn't know about the
-    /// // larger context and assumes that `123` is surrounded by word
-    /// // boundaries. And of course, the match position is reported relative
-    /// // to the sub-slice as well, which means we get `3` instead of `6`.
-    /// let expected = Some(HalfMatch::must(0, 3));
-    /// let got = dfa.find_earliest_fwd_at(
-    ///     &mut cache,
-    ///     None,
-    ///     None,
-    ///     &haystack[3..6],
-    ///     0,
-    ///     haystack[3..6].len(),
-    /// )?;
-    /// assert_eq!(expected, got);
+    /// // Run the reverse DFA to collect all matches.
+    /// let input = Input::new("☃");
+    /// let mut state = OverlappingState::start();
+    /// let mut matches = vec![];
+    /// loop {
+    ///     dfa.try_search_overlapping_rev(&mut cache, &input, &mut state)?;
+    ///     match state.get_match() {
+    ///         None => break,
+    ///         Some(hm) => matches.push(hm),
+    ///     }
+    /// }
     ///
-    /// // But if we provide the bounds of the search within the context of the
-    /// // entire haystack, then the search can take the surrounding context
-    /// // into account. (And if we did find a match, it would be reported
-    /// // as a valid offset into `haystack` instead of its sub-slice.)
-    /// let expected = None;
-    /// let got = dfa.find_earliest_fwd_at(
-    ///     &mut cache,
-    ///     None,
-    ///     None,
-    ///     haystack,
-    ///     3,
-    ///     6,
-    /// )?;
-    /// assert_eq!(expected, got);
+    /// // Now *all* positions match, even within a codepoint,
+    /// // because we lifted the requirement that matches
+    /// // correspond to valid UTF-8 spans.
+    /// let expected = vec![
+    ///     HalfMatch::must(0, 3),
+    ///     HalfMatch::must(0, 2),
+    ///     HalfMatch::must(0, 1),
+    ///     HalfMatch::must(1, 0),
+    ///     HalfMatch::must(0, 0),
+    /// ];
+    /// assert_eq!(expected, matches);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
-    pub fn find_earliest_fwd_at(
+    pub fn try_search_overlapping_rev(
         &self,
         cache: &mut Cache,
-        pre: Option<&mut prefilter::Scanner>,
-        pattern_id: Option<PatternID>,
-        bytes: &[u8],
-        start: usize,
-        end: usize,
-    ) -> Result<Option<HalfMatch>, MatchError> {
-        search::find_earliest_fwd(
-            pre, self, cache, pattern_id, bytes, start, end,
-        )
-    }
-
-    /// Executes a reverse search and returns the start position of the first
-    /// match that is found as early as possible. If no match exists, then
-    /// `None` is returned.
-    ///
-    /// This routine stops scanning input as soon as the search observes a
-    /// match state.
-    ///
-    /// This is like [`DFA::find_earliest_rev`], except it provides some
-    /// additional control over how the search is executed. See the
-    /// documentation of [`DFA::find_earliest_fwd_at`] for more details
-    /// on the additional parameters along with examples of their usage.
-    ///
-    /// # Errors
-    ///
-    /// This routine only errors if the search could not complete. For
-    /// lazy DFAs generated by this crate, this only occurs in non-default
-    /// configurations where quit bytes are used, Unicode word boundaries are
-    /// heuristically enabled or limits are set on the number of times the lazy
-    /// DFA's cache may be cleared.
-    ///
-    /// When a search cannot complete, callers cannot know whether a match
-    /// exists or not.
-    ///
-    /// # Panics
-    ///
-    /// This routine panics if a `pattern_id` is given and the underlying
-    /// DFA does not support specific pattern searches.
-    ///
-    /// It also panics if the given haystack range is not valid.
-    #[inline]
-    pub fn find_earliest_rev_at(
-        &self,
-        cache: &mut Cache,
-        pattern_id: Option<PatternID>,
-        bytes: &[u8],
-        start: usize,
-        end: usize,
-    ) -> Result<Option<HalfMatch>, MatchError> {
-        search::find_earliest_rev(self, cache, pattern_id, bytes, start, end)
+        input: &Input<'_>,
+        state: &mut OverlappingState,
+    ) -> Result<(), MatchError> {
+        let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
+        search::find_overlapping_rev(self, cache, input, state)?;
+        match state.get_match() {
+            None => Ok(()),
+            Some(_) if !utf8empty => Ok(()),
+            Some(_) => skip_empty_utf8_splits_overlapping(
+                input,
+                state,
+                |input, state| {
+                    search::find_overlapping_rev(self, cache, input, state)
+                },
+            ),
+        }
     }
 
-    /// Executes a forward search and returns the end position of the leftmost
-    /// match that is found. If no match exists, then `None` is returned.
-    ///
-    /// This is like [`DFA::find_leftmost_fwd`], except it provides some
-    /// additional control over how the search is executed. See the
-    /// documentation of [`DFA::find_earliest_fwd_at`] for more details on the
-    /// additional parameters along with examples of their usage.
-    ///
-    /// # Errors
+    /// Writes the set of patterns that match anywhere in the given search
+    /// configuration to `patset`. If multiple patterns match at the same
+    /// position and the underlying DFA supports overlapping matches, then all
+    /// matching patterns are written to the given set.
     ///
-    /// This routine only errors if the search could not complete. For
-    /// lazy DFAs generated by this crate, this only occurs in non-default
-    /// configurations where quit bytes are used, Unicode word boundaries are
-    /// heuristically enabled or limits are set on the number of times the lazy
-    /// DFA's cache may be cleared.
+    /// Unless all of the patterns in this DFA are anchored, then generally
+    /// speaking, this will visit every byte in the haystack.
     ///
-    /// When a search cannot complete, callers cannot know whether a match
-    /// exists or not.
+    /// This search routine *does not* clear the pattern set. This gives some
+    /// flexibility to the caller (e.g., running multiple searches with the
+    /// same pattern set), but does make the API bug-prone if you're reusing
+    /// the same pattern set for multiple searches but intended them to be
+    /// independent.
     ///
-    /// # Panics
-    ///
-    /// This routine panics if a `pattern_id` is given and the underlying
-    /// DFA does not support specific pattern searches.
-    ///
-    /// It also panics if the given haystack range is not valid.
-    #[inline]
-    pub fn find_leftmost_fwd_at(
-        &self,
-        cache: &mut Cache,
-        pre: Option<&mut prefilter::Scanner>,
-        pattern_id: Option<PatternID>,
-        bytes: &[u8],
-        start: usize,
-        end: usize,
-    ) -> Result<Option<HalfMatch>, MatchError> {
-        search::find_leftmost_fwd(
-            pre, self, cache, pattern_id, bytes, start, end,
-        )
-    }
-
-    /// Executes a reverse search and returns the start of the position of the
-    /// leftmost match that is found. If no match exists, then `None` is
-    /// returned.
-    ///
-    /// This is like [`DFA::find_leftmost_rev`], except it provides some
-    /// additional control over how the search is executed. See the
-    /// documentation of [`DFA::find_earliest_fwd_at`] for more details on the
-    /// additional parameters along with examples of their usage.
+    /// If a pattern ID matched but the given `PatternSet` does not have
+    /// sufficient capacity to store it, then it is not inserted and silently
+    /// dropped.
     ///
     /// # Errors
     ///
-    /// This routine only errors if the search could not complete. For
-    /// lazy DFAs generated by this crate, this only occurs in non-default
-    /// configurations where quit bytes are used, Unicode word boundaries are
-    /// heuristically enabled or limits are set on the number of times the lazy
-    /// DFA's cache may be cleared.
-    ///
-    /// When a search cannot complete, callers cannot know whether a match
+    /// This routine errors if the search could not complete. This can occur
+    /// in a number of circumstances:
+    ///
+    /// * The configuration of the lazy DFA may permit it to "quit" the search.
+    /// For example, setting quit bytes or enabling heuristic support for
+    /// Unicode word boundaries. The default configuration does not enable any
+    /// option that could result in the lazy DFA quitting.
+    /// * The configuration of the lazy DFA may also permit it to "give up"
+    /// on a search if it makes ineffective use of its transition table
+    /// cache. The default configuration does not enable this by default,
+    /// although it is typically a good idea to.
+    /// * When the provided `Input` configuration is not supported. For
+    /// example, by providing an unsupported anchor mode.
+    ///
+    /// When a search returns an error, callers cannot know whether a match
     /// exists or not.
     ///
-    /// # Panics
-    ///
-    /// This routine panics if a `pattern_id` is given and the underlying
-    /// DFA does not support specific pattern searches.
-    ///
-    /// It also panics if the given haystack range is not valid.
-    #[inline]
-    pub fn find_leftmost_rev_at(
-        &self,
-        cache: &mut Cache,
-        pattern_id: Option<PatternID>,
-        bytes: &[u8],
-        start: usize,
-        end: usize,
-    ) -> Result<Option<HalfMatch>, MatchError> {
-        search::find_leftmost_rev(self, cache, pattern_id, bytes, start, end)
-    }
-
-    /// Executes an overlapping forward search and returns the end position of
-    /// matches as they are found. If no match exists, then `None` is returned.
-    ///
-    /// This routine is principally only useful when searching for multiple
-    /// patterns on inputs where multiple patterns may match the same regions
-    /// of text. In particular, callers must preserve the automaton's search
-    /// state from prior calls so that the implementation knows where the last
-    /// match occurred.
-    ///
-    /// This is like [`DFA::find_overlapping_fwd`], except it provides
-    /// some additional control over how the search is executed. See the
-    /// documentation of [`DFA::find_earliest_fwd_at`] for more details
-    /// on the additional parameters along with examples of their usage.
-    ///
-    /// When using this routine to implement an iterator of overlapping
-    /// matches, the `start` of the search should always be set to the end
-    /// of the last match. If more patterns match at the previous location,
-    /// then they will be immediately returned. (This is tracked by the given
-    /// overlapping state.) Otherwise, the search continues at the starting
-    /// position given.
-    ///
-    /// If for some reason you want the search to forget about its previous
-    /// state and restart the search at a particular position, then setting the
-    /// state to [`OverlappingState::start`] will accomplish that.
-    ///
-    /// # Errors
+    /// # Example
     ///
-    /// This routine only errors if the search could not complete. For
-    /// lazy DFAs generated by this crate, this only occurs in non-default
-    /// configurations where quit bytes are used, Unicode word boundaries are
-    /// heuristically enabled or limits are set on the number of times the lazy
-    /// DFA's cache may be cleared.
+    /// This example shows how to find all matching patterns in a haystack,
+    /// even when some patterns match at the same position as other patterns.
     ///
-    /// When a search cannot complete, callers cannot know whether a match
-    /// exists or not.
+    /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     Input, MatchKind, PatternSet,
+    /// };
     ///
-    /// # Panics
+    /// let patterns = &[
+    ///     r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar",
+    /// ];
+    /// let dfa = DFA::builder()
+    ///     .configure(DFA::config().match_kind(MatchKind::All))
+    ///     .build_many(patterns)?;
+    /// let mut cache = dfa.create_cache();
     ///
-    /// This routine panics if a `pattern_id` is given and the underlying
-    /// DFA does not support specific pattern searches.
+    /// let input = Input::new("foobar");
+    /// let mut patset = PatternSet::new(dfa.pattern_len());
+    /// dfa.try_which_overlapping_matches(&mut cache, &input, &mut patset)?;
+    /// let expected = vec![0, 2, 3, 4, 6];
+    /// let got: Vec<usize> = patset.iter().map(|p| p.as_usize()).collect();
+    /// assert_eq!(expected, got);
     ///
-    /// It also panics if the given haystack range is not valid.
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     #[inline]
-    pub fn find_overlapping_fwd_at(
+    pub fn try_which_overlapping_matches(
         &self,
         cache: &mut Cache,
-        pre: Option<&mut prefilter::Scanner>,
-        pattern_id: Option<PatternID>,
-        bytes: &[u8],
-        start: usize,
-        end: usize,
-        state: &mut OverlappingState,
-    ) -> Result<Option<HalfMatch>, MatchError> {
-        search::find_overlapping_fwd(
-            pre, self, cache, pattern_id, bytes, start, end, state,
-        )
+        input: &Input<'_>,
+        patset: &mut PatternSet,
+    ) -> Result<(), MatchError> {
+        let mut state = OverlappingState::start();
+        while let Some(m) = {
+            self.try_search_overlapping_fwd(cache, input, &mut state)?;
+            state.get_match()
+        } {
+            let _ = patset.try_insert(m.pattern());
+            // There's nothing left to find, so we can stop. Or the caller
+            // asked us to.
+            if patset.is_full() || input.get_earliest() {
+                break;
+            }
+        }
+        Ok(())
     }
 }
 
@@ -1189,7 +1189,7 @@ impl DFA {
     /// haystack by using the `next_state` method.
     ///
     /// ```
-    /// use regex_automata::hybrid::dfa::DFA;
+    /// use regex_automata::{hybrid::dfa::DFA, Input};
     ///
     /// let dfa = DFA::new(r"[a-z]+r")?;
     /// let mut cache = dfa.create_cache();
@@ -1198,7 +1198,7 @@ impl DFA {
     /// // The start state is determined by inspecting the position and the
     /// // initial bytes of the haystack.
     /// let mut sid = dfa.start_state_forward(
-    ///     &mut cache, None, haystack, 0, haystack.len(),
+    ///     &mut cache, &Input::new(haystack),
     /// )?;
     /// // Walk all the bytes in the haystack.
     /// for &b in haystack {
@@ -1289,7 +1289,7 @@ impl DFA {
     /// haystack by using the `next_state_untagged` method where possible.
     ///
     /// ```
-    /// use regex_automata::hybrid::dfa::DFA;
+    /// use regex_automata::{hybrid::dfa::DFA, Input};
     ///
     /// let dfa = DFA::new(r"[a-z]+r")?;
     /// let mut cache = dfa.create_cache();
@@ -1298,7 +1298,7 @@ impl DFA {
     /// // The start state is determined by inspecting the position and the
     /// // initial bytes of the haystack.
     /// let mut sid = dfa.start_state_forward(
-    ///     &mut cache, None, haystack, 0, haystack.len(),
+    ///     &mut cache, &Input::new(haystack),
     /// )?;
     /// // Walk all the bytes in the haystack.
     /// let mut at = 0;
@@ -1478,7 +1478,7 @@ impl DFA {
     /// and then finishing the search with the final EOI transition.
     ///
     /// ```
-    /// use regex_automata::hybrid::dfa::DFA;
+    /// use regex_automata::{hybrid::dfa::DFA, Input};
     ///
     /// let dfa = DFA::new(r"[a-z]+r")?;
     /// let mut cache = dfa.create_cache();
@@ -1487,7 +1487,7 @@ impl DFA {
     /// // The start state is determined by inspecting the position and the
     /// // initial bytes of the haystack.
     /// let mut sid = dfa.start_state_forward(
-    ///     &mut cache, None, haystack, 0, haystack.len(),
+    ///     &mut cache, &Input::new(haystack),
     /// )?;
     /// // Walk all the bytes in the haystack.
     /// for &b in haystack {
@@ -1524,44 +1524,43 @@ impl DFA {
     /// Unlike typical DFA implementations, the start state for DFAs in this
     /// crate is dependent on a few different factors:
     ///
-    /// * The pattern ID, if present. When the underlying DFA has been
-    /// configured with multiple patterns _and_ the DFA has been configured to
-    /// build an anchored start state for each pattern, then a pattern ID may
-    /// be specified to execute an anchored search for that specific pattern.
-    /// If `pattern_id` is invalid or if the DFA isn't configured to build
-    /// start states for each pattern, then implementations must panic. DFAs in
-    /// this crate can be configured to build start states for each pattern via
-    /// [`Config::starts_for_each_pattern`].
-    /// * When `start > 0`, the byte at index `start - 1` may influence the
-    /// start state if the regex uses `^` or `\b`.
-    /// * Similarly, when `start == 0`, it may influence the start state when
-    /// the regex uses `^` or `\A`.
-    /// * Currently, `end` is unused.
+    /// * The [`Anchored`] mode of the search. Unanchored, anchored and
+    /// anchored searches for a specific [`PatternID`] all use different start
+    /// states.
+    /// * The position at which the search begins, via [`Input::start`]. This
+    /// and the byte immediately preceding the start of the search (if one
+    /// exists) influence which look-behind assertions are true at the start
+    /// of the search. This in turn influences which start state is selected.
     /// * Whether the search is a forward or reverse search. This routine can
     /// only be used for forward searches.
     ///
-    /// # Panics
+    /// # Errors
     ///
-    /// This panics if `start..end` is not a valid sub-slice of `bytes`. This
-    /// also panics if `pattern_id` is non-None and does not refer to a valid
-    /// pattern, or if the DFA was not configured to build anchored start
-    /// states for each pattern.
-    #[inline]
+    /// This may return a [`MatchError`] (not a [`CacheError`]!) if the search
+    /// needs to give up when determining the start state (for example, if
+    /// it sees a "quit" byte or if the cache has been cleared too many
+    /// times). This can also return an error if the given `Input` contains an
+    /// unsupported [`Anchored`] configuration.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     pub fn start_state_forward(
         &self,
         cache: &mut Cache,
-        pattern_id: Option<PatternID>,
-        bytes: &[u8],
-        start: usize,
-        end: usize,
-    ) -> Result<LazyStateID, CacheError> {
-        let mut lazy = Lazy::new(self, cache);
-        let start_type = Start::from_position_fwd(bytes, start, end);
-        let sid = lazy.as_ref().get_cached_start_id(pattern_id, start_type);
-        if !sid.is_unknown() {
-            return Ok(sid);
+        input: &Input<'_>,
+    ) -> Result<LazyStateID, MatchError> {
+        if !self.quitset.is_empty() && input.start() > 0 {
+            let offset = input.start() - 1;
+            let byte = input.haystack()[offset];
+            if self.quitset.contains(byte) {
+                return Err(MatchError::quit(byte, offset));
+            }
+        }
+        let start_type = self.start_map.fwd(input);
+        let start = LazyRef::new(self, cache)
+            .get_cached_start_id(input, start_type)?;
+        if !start.is_unknown() {
+            return Ok(start);
         }
-        lazy.cache_start_group(pattern_id, start_type)
+        Lazy::new(self, cache).cache_start_group(input, start_type)
     }
 
     /// Return the ID of the start state for this lazy DFA when executing a
@@ -1570,44 +1569,43 @@ impl DFA {
     /// Unlike typical DFA implementations, the start state for DFAs in this
     /// crate is dependent on a few different factors:
     ///
-    /// * The pattern ID, if present. When the underlying DFA has been
-    /// configured with multiple patterns _and_ the DFA has been configured to
-    /// build an anchored start state for each pattern, then a pattern ID may
-    /// be specified to execute an anchored search for that specific pattern.
-    /// If `pattern_id` is invalid or if the DFA isn't configured to build
-    /// start states for each pattern, then implementations must panic. DFAs in
-    /// this crate can be configured to build start states for each pattern via
-    /// [`Config::starts_for_each_pattern`].
-    /// * When `end < bytes.len()`, the byte at index `end` may influence the
-    /// start state if the regex uses `$` or `\b`.
-    /// * Similarly, when `end == bytes.len()`, it may influence the start
-    /// state when the regex uses `$` or `\z`.
-    /// * Currently, `start` is unused.
+    /// * The [`Anchored`] mode of the search. Unanchored, anchored and
+    /// anchored searches for a specific [`PatternID`] all use different start
+    /// states.
+    /// * The position at which the search begins, via [`Input::start`]. This
+    /// and the byte immediately preceding the start of the search (if one
+    /// exists) influence which look-behind assertions are true at the start
+    /// of the search. This in turn influences which start state is selected.
     /// * Whether the search is a forward or reverse search. This routine can
     /// only be used for reverse searches.
     ///
-    /// # Panics
+    /// # Errors
     ///
-    /// This panics if `start..end` is not a valid sub-slice of `bytes`. This
-    /// also panics if `pattern_id` is non-None and does not refer to a valid
-    /// pattern, or if the DFA was not configured to build anchored start
-    /// states for each pattern.
-    #[inline]
+    /// This may return a [`MatchError`] (not a [`CacheError`]!) if the search
+    /// needs to give up when determining the start state (for example, if
+    /// it sees a "quit" byte or if the cache has been cleared too many
+    /// times). This can also return an error if the given `Input` contains an
+    /// unsupported [`Anchored`] configuration.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     pub fn start_state_reverse(
         &self,
         cache: &mut Cache,
-        pattern_id: Option<PatternID>,
-        bytes: &[u8],
-        start: usize,
-        end: usize,
-    ) -> Result<LazyStateID, CacheError> {
-        let mut lazy = Lazy::new(self, cache);
-        let start_type = Start::from_position_rev(bytes, start, end);
-        let sid = lazy.as_ref().get_cached_start_id(pattern_id, start_type);
-        if !sid.is_unknown() {
-            return Ok(sid);
+        input: &Input<'_>,
+    ) -> Result<LazyStateID, MatchError> {
+        if !self.quitset.is_empty() && input.end() < input.haystack().len() {
+            let offset = input.end();
+            let byte = input.haystack()[offset];
+            if self.quitset.contains(byte) {
+                return Err(MatchError::quit(byte, offset));
+            }
         }
-        lazy.cache_start_group(pattern_id, start_type)
+        let start_type = self.start_map.rev(input);
+        let start = LazyRef::new(self, cache)
+            .get_cached_start_id(input, start_type)?;
+        if !start.is_unknown() {
+            return Ok(start);
+        }
+        Lazy::new(self, cache).cache_start_group(input, start_type)
     }
 
     /// Returns the total number of patterns that match in this state.
@@ -1616,9 +1614,11 @@ impl DFA {
     /// necessarily always return `1` for all match states.
     ///
     /// A lazy DFA guarantees that [`DFA::match_pattern`] can be called with
-    /// indices up to (but not including) the count returned by this routine
+    /// indices up to (but not including) the length returned by this routine
     /// without panicking.
     ///
+    /// # Panics
+    ///
     /// If the given state is not a match state, then this may either panic
     /// or return an incorrect result.
     ///
@@ -1629,12 +1629,10 @@ impl DFA {
     /// patterns have matched in a particular state, but also how to access
     /// which specific patterns have matched.
     ///
-    /// Notice that we must use [`MatchKind::All`](crate::MatchKind::All)
-    /// when building the DFA. If we used
-    /// [`MatchKind::LeftmostFirst`](crate::MatchKind::LeftmostFirst)
-    /// instead, then the DFA would not be constructed in a way that supports
-    /// overlapping matches. (It would only report a single pattern that
-    /// matches at any particular point in time.)
+    /// Notice that we must use [`MatchKind::All`] when building the DFA. If we
+    /// used [`MatchKind::LeftmostFirst`] instead, then the DFA would not be
+    /// constructed in a way that supports overlapping matches. (It would only
+    /// report a single pattern that matches at any particular point in time.)
     ///
     /// Another thing to take note of is the patterns used and the order in
     /// which the pattern IDs are reported. In the example below, pattern `3`
@@ -1645,7 +1643,8 @@ impl DFA {
     /// other.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, MatchKind};
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{hybrid::dfa::DFA, Input, MatchKind};
     ///
     /// let dfa = DFA::builder()
     ///     .configure(DFA::config().match_kind(MatchKind::All))
@@ -1658,7 +1657,7 @@ impl DFA {
     /// // The start state is determined by inspecting the position and the
     /// // initial bytes of the haystack.
     /// let mut sid = dfa.start_state_forward(
-    ///     &mut cache, None, haystack, 0, haystack.len(),
+    ///     &mut cache, &Input::new(haystack),
     /// )?;
     /// // Walk all the bytes in the haystack.
     /// for &b in haystack {
@@ -1667,8 +1666,8 @@ impl DFA {
     /// sid = dfa.next_eoi_state(&mut cache, sid)?;
     ///
     /// assert!(sid.is_match());
-    /// assert_eq!(dfa.match_count(&mut cache, sid), 3);
-    /// // The following calls are guaranteed to not panic since `match_count`
+    /// assert_eq!(dfa.match_len(&mut cache, sid), 3);
+    /// // The following calls are guaranteed to not panic since `match_len`
     /// // returned `3` above.
     /// assert_eq!(dfa.match_pattern(&mut cache, sid, 0).as_usize(), 3);
     /// assert_eq!(dfa.match_pattern(&mut cache, sid, 1).as_usize(), 0);
@@ -1677,22 +1676,22 @@ impl DFA {
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
-    pub fn match_count(&self, cache: &Cache, id: LazyStateID) -> usize {
+    pub fn match_len(&self, cache: &Cache, id: LazyStateID) -> usize {
         assert!(id.is_match());
-        LazyRef::new(self, cache).get_cached_state(id).match_count()
+        LazyRef::new(self, cache).get_cached_state(id).match_len()
     }
 
     /// Returns the pattern ID corresponding to the given match index in the
     /// given state.
     ///
-    /// See [`DFA::match_count`] for an example of how to use this method
+    /// See [`DFA::match_len`] for an example of how to use this method
     /// correctly. Note that if you know your lazy DFA is configured with a
     /// single pattern, then this routine is never necessary since it will
     /// always return a pattern ID of `0` for an index of `0` when `id`
     /// corresponds to a match state.
     ///
     /// Typically, this routine is used when implementing an overlapping
-    /// search, as the example for `DFA::match_count` does.
+    /// search, as the example for `DFA::match_len` does.
     ///
     /// # Panics
     ///
@@ -1713,7 +1712,7 @@ impl DFA {
         // that finds the pattern ID from the corresponding `State`, which
         // requires a bit of slicing/pointer-chasing. This optimization tends
         // to only matter when matches are frequent.
-        if self.pattern_count() == 1 {
+        if self.pattern_len() == 1 {
             return PatternID::ZERO;
         }
         LazyRef::new(self, cache)
@@ -1809,6 +1808,25 @@ pub struct Cache {
     /// clear count is set, then the cache will return an error instead of
     /// clearing the cache if the count has been exceeded.
     clear_count: usize,
+    /// The total number of bytes searched since the last time this cache was
+    /// cleared, not including the current search.
+    ///
+    /// This can be added to the length of the current search to get the true
+    /// total number of bytes searched.
+    ///
+    /// This is generally only non-zero when the
+    /// `Cache::search_{start,update,finish}` APIs are used to track search
+    /// progress.
+    bytes_searched: usize,
+    /// The progress of the current search.
+    ///
+    /// This is only non-`None` when callers utlize the `Cache::search_start`,
+    /// `Cache::search_update` and `Cache::search_finish` APIs.
+    ///
+    /// The purpose of recording search progress is to be able to make a
+    /// determination about the efficiency of the cache. Namely, by keeping
+    /// track of the
+    progress: Option<SearchProgress>,
 }
 
 impl Cache {
@@ -1823,14 +1841,18 @@ impl Cache {
             starts: alloc::vec![],
             states: alloc::vec![],
             states_to_id: StateMap::new(),
-            sparses: SparseSets::new(dfa.nfa.len()),
+            sparses: SparseSets::new(dfa.get_nfa().states().len()),
             stack: alloc::vec![],
             scratch_state_builder: StateBuilderEmpty::new(),
             state_saver: StateSaver::none(),
             memory_usage_state: 0,
             clear_count: 0,
+            bytes_searched: 0,
+            progress: None,
         };
+        debug!("pre-init lazy DFA cache size: {}", cache.memory_usage());
         Lazy { dfa, cache: &mut cache }.init_cache();
+        debug!("post-init lazy DFA cache size: {}", cache.memory_usage());
         cache
     }
 
@@ -1852,7 +1874,8 @@ impl Cache {
     /// This shows how to re-purpose a cache for use with a different DFA.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input};
     ///
     /// let dfa1 = DFA::new(r"\w")?;
     /// let dfa2 = DFA::new(r"\W")?;
@@ -1860,7 +1883,7 @@ impl Cache {
     /// let mut cache = dfa1.create_cache();
     /// assert_eq!(
     ///     Some(HalfMatch::must(0, 2)),
-    ///     dfa1.find_leftmost_fwd(&mut cache, "Δ".as_bytes())?,
+    ///     dfa1.try_search_fwd(&mut cache, &Input::new("Δ"))?,
     /// );
     ///
     /// // Using 'cache' with dfa2 is not allowed. It may result in panics or
@@ -1872,7 +1895,7 @@ impl Cache {
     /// cache.reset(&dfa2);
     /// assert_eq!(
     ///     Some(HalfMatch::must(0, 3)),
-    ///     dfa2.find_leftmost_fwd(&mut cache, "☃".as_bytes())?,
+    ///     dfa2.try_search_fwd(&mut cache, &Input::new("☃"))?,
     /// );
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
@@ -1881,6 +1904,69 @@ impl Cache {
         Lazy::new(dfa, self).reset_cache()
     }
 
+    /// Initializes a new search starting at the given position.
+    ///
+    /// If a previous search was unfinished, then it is finished automatically
+    /// and a new search is begun.
+    ///
+    /// Note that keeping track of search progress is _not necessary_
+    /// for correct implementations of search using a lazy DFA. Keeping
+    /// track of search progress is only necessary if you want the
+    /// [`Config::minimum_bytes_per_state`] configuration knob to work.
+    #[inline]
+    pub fn search_start(&mut self, at: usize) {
+        // If a previous search wasn't marked as finished, then finish it
+        // now automatically.
+        if let Some(p) = self.progress.take() {
+            self.bytes_searched += p.len();
+        }
+        self.progress = Some(SearchProgress { start: at, at });
+    }
+
+    /// Updates the current search to indicate that it has search to the
+    /// current position.
+    ///
+    /// No special care needs to be taken for reverse searches. Namely, the
+    /// position given may be _less than_ the starting position of the search.
+    ///
+    /// # Panics
+    ///
+    /// This panics if no search has been started by [`Cache::search_start`].
+    #[inline]
+    pub fn search_update(&mut self, at: usize) {
+        let p =
+            self.progress.as_mut().expect("no in-progress search to update");
+        p.at = at;
+    }
+
+    /// Indicates that a search has finished at the given position.
+    ///
+    /// # Panics
+    ///
+    /// This panics if no search has been started by [`Cache::search_start`].
+    #[inline]
+    pub fn search_finish(&mut self, at: usize) {
+        let mut p =
+            self.progress.take().expect("no in-progress search to finish");
+        p.at = at;
+        self.bytes_searched += p.len();
+    }
+
+    /// Returns the total number of bytes that have been searched since this
+    /// cache was last cleared.
+    ///
+    /// This is useful for determining the efficiency of the cache. For
+    /// example, the lazy DFA uses this value in conjunction with the
+    /// [`Config::minimum_bytes_per_state`] knob to help determine whether it
+    /// should quit searching.
+    ///
+    /// This always returns `0` if search progress isn't being tracked. Note
+    /// that the lazy DFA search routines in this crate always track search
+    /// progress.
+    pub fn search_total_len(&self) -> usize {
+        self.bytes_searched + self.progress.as_ref().map_or(0, |p| p.len())
+    }
+
     /// Returns the total number of times this cache has been cleared since it
     /// was either created or last reset.
     ///
@@ -1899,6 +1985,9 @@ impl Cache {
         const ID_SIZE: usize = size_of::<LazyStateID>();
         const STATE_SIZE: usize = size_of::<State>();
 
+        // NOTE: If you make changes to the below, then
+        // 'minimum_cache_capacity' should be updated correspondingly.
+
         self.trans.len() * ID_SIZE
         + self.starts.len() * ID_SIZE
         + self.states.len() * STATE_SIZE
@@ -1912,6 +2001,32 @@ impl Cache {
     }
 }
 
+/// Keeps track of the progress of the current search.
+///
+/// This is updated via the `Cache::search_{start,update,finish}` APIs to
+/// record how many bytes have been searched. This permits computing a
+/// heuristic that represents the efficiency of a cache, and thus helps inform
+/// whether the lazy DFA should give up or not.
+#[derive(Clone, Debug)]
+struct SearchProgress {
+    start: usize,
+    at: usize,
+}
+
+impl SearchProgress {
+    /// Returns the length, in bytes, of this search so far.
+    ///
+    /// This automatically handles the case of a reverse search, where `at`
+    /// is likely to be less than `start`.
+    fn len(&self) -> usize {
+        if self.start <= self.at {
+            self.at - self.start
+        } else {
+            self.start - self.at
+        }
+    }
+}
+
 /// A map from states to state identifiers. When using std, we use a standard
 /// hashmap, since it's a bit faster for this use case. (Other maps, like
 /// one's based on FNV, have not yet been benchmarked.)
@@ -1960,6 +2075,7 @@ impl<'i, 'c> Lazy<'i, 'c> {
     ///
     /// With 'inline(never)' hyperfine reports 1.1s per run. With
     /// 'inline(always)', hyperfine reports 1.23s. So that's a 10% improvement.
+    #[cold]
     #[inline(never)]
     fn cache_next_state(
         &mut self,
@@ -1969,8 +2085,8 @@ impl<'i, 'c> Lazy<'i, 'c> {
         let stride2 = self.dfa.stride2();
         let empty_builder = self.get_state_builder();
         let builder = determinize::next(
-            &self.dfa.nfa,
-            self.dfa.match_kind,
+            self.dfa.get_nfa(),
+            self.dfa.get_config().get_match_kind(),
             &mut self.cache.sparses,
             &mut self.cache.stack,
             &self.cache.states[current.as_usize_untagged() >> stride2],
@@ -2002,26 +2118,32 @@ impl<'i, 'c> Lazy<'i, 'c> {
     ///
     /// If caching this state would otherwise result in a cache that has been
     /// cleared too many times, then an error is returned.
+    #[cold]
+    #[inline(never)]
     fn cache_start_group(
         &mut self,
-        pattern_id: Option<PatternID>,
+        input: &Input<'_>,
         start: Start,
-    ) -> Result<LazyStateID, CacheError> {
-        let nfa_start_id = match pattern_id {
-            Some(pid) => {
-                assert!(
-                    self.dfa.starts_for_each_pattern,
-                    "attempted to search for a specific pattern \
-                     without enabling starts_for_each_pattern",
-                );
-                self.dfa.nfa.start_pattern(pid)
+    ) -> Result<LazyStateID, MatchError> {
+        let mode = input.get_anchored();
+        let nfa_start_id = match mode {
+            Anchored::No => self.dfa.get_nfa().start_unanchored(),
+            Anchored::Yes => self.dfa.get_nfa().start_anchored(),
+            Anchored::Pattern(pid) => {
+                if !self.dfa.get_config().get_starts_for_each_pattern() {
+                    return Err(MatchError::unsupported_anchored(mode));
+                }
+                match self.dfa.get_nfa().start_pattern(pid) {
+                    None => return Ok(self.as_ref().dead_id()),
+                    Some(sid) => sid,
+                }
             }
-            None if self.dfa.anchored => self.dfa.nfa.start_anchored(),
-            None => self.dfa.nfa.start_unanchored(),
         };
 
-        let id = self.cache_start_one(nfa_start_id, start)?;
-        self.set_start_state(pattern_id, start, id);
+        let id = self
+            .cache_start_one(nfa_start_id, start)
+            .map_err(|_| MatchError::gave_up(input.start()))?;
+        self.set_start_state(input, start, id);
         Ok(id)
     }
 
@@ -2042,22 +2164,33 @@ impl<'i, 'c> Lazy<'i, 'c> {
         start: Start,
     ) -> Result<LazyStateID, CacheError> {
         let mut builder_matches = self.get_state_builder().into_matches();
-        determinize::set_lookbehind_from_start(&start, &mut builder_matches);
+        determinize::set_lookbehind_from_start(
+            self.dfa.get_nfa(),
+            &start,
+            &mut builder_matches,
+        );
         self.cache.sparses.set1.clear();
         determinize::epsilon_closure(
-            self.dfa.nfa.borrow(),
+            self.dfa.get_nfa(),
             nfa_start_id,
-            *builder_matches.look_have(),
+            builder_matches.look_have(),
             &mut self.cache.stack,
             &mut self.cache.sparses.set1,
         );
         let mut builder = builder_matches.into_nfa();
         determinize::add_nfa_states(
-            self.dfa.nfa.borrow(),
+            &self.dfa.get_nfa(),
             &self.cache.sparses.set1,
             &mut builder,
         );
-        self.add_builder_state(builder, |id| id.to_start())
+        let tag_starts = self.dfa.get_config().get_specialize_start_states();
+        self.add_builder_state(builder, |id| {
+            if tag_starts {
+                id.to_start()
+            } else {
+                id
+            }
+        })
     }
 
     /// Either add the given builder state to this cache, or return an ID to an
@@ -2164,7 +2297,7 @@ impl<'i, 'c> Lazy<'i, 'c> {
     /// clearings, then this will return a cache error. In this case,
     /// callers should bubble this up as the cache can't be used until it is
     /// reset. Implementations of search should convert this error into a
-    /// `MatchError::GaveUp`.
+    /// [`MatchError::gave_up`].
     ///
     /// If 'self.state_saver' is set to save a state, then this state is
     /// persisted through cache clearing. Otherwise, the cache is returned to
@@ -2175,21 +2308,68 @@ impl<'i, 'c> Lazy<'i, 'c> {
     /// Otherwise, any lazy state ID generated by the cache prior to resetting
     /// it is invalid after the reset.
     fn try_clear_cache(&mut self) -> Result<(), CacheError> {
-        // Currently, the only heuristic we use is the minimum cache clear
-        // count. If we pass that minimum, then we give up.
-        //
-        // It would be good to also add a heuristic based on "bytes searched
-        // per generated state," but this requires API design work. Namely,
-        // we really do not want to add a counter increment to the transition
-        // function, which implies we need to expose APIs to update the number
-        // of bytes searched by implementers of the search routines. And that
-        // doesn't seem great... But we should do it if this heuristic isn't
-        // enough. (The original lazy DFA implementation in the 'regex' crate
-        // had this heuristic, since the lazy DFA was coupled with the search
-        // routines.)
-        if let Some(min_count) = self.dfa.minimum_cache_clear_count {
+        let c = self.dfa.get_config();
+        if let Some(min_count) = c.get_minimum_cache_clear_count() {
             if self.cache.clear_count >= min_count {
-                return Err(CacheError::too_many_cache_clears());
+                if let Some(min_bytes_per) = c.get_minimum_bytes_per_state() {
+                    let len = self.cache.search_total_len();
+                    let min_bytes =
+                        min_bytes_per.saturating_mul(self.cache.states.len());
+                    // If we've searched 0 bytes then probably something has
+                    // gone wrong and the lazy DFA search implementation isn't
+                    // correctly updating the search progress state.
+                    if len == 0 {
+                        trace!(
+                            "number of bytes searched is 0, but \
+                             a minimum bytes per state searched ({}) is \
+                             enabled, maybe Cache::search_update \
+                             is not being used?",
+                            min_bytes_per,
+                        );
+                    }
+                    if len < min_bytes {
+                        trace!(
+                            "lazy DFA cache has been cleared {} times, \
+                             which exceeds the limit of {}, \
+                             AND its bytes searched per state is less \
+                             than the configured minimum of {}, \
+                             therefore lazy DFA is giving up \
+                             (bytes searched since cache clear = {}, \
+                              number of states = {})",
+                            self.cache.clear_count,
+                            min_count,
+                            min_bytes_per,
+                            len,
+                            self.cache.states.len(),
+                        );
+                        return Err(CacheError::bad_efficiency());
+                    } else {
+                        trace!(
+                            "lazy DFA cache has been cleared {} times, \
+                             which exceeds the limit of {}, \
+                             AND its bytes searched per state is greater \
+                             than the configured minimum of {}, \
+                             therefore lazy DFA is continuing! \
+                             (bytes searched since cache clear = {}, \
+                              number of states = {})",
+                            self.cache.clear_count,
+                            min_count,
+                            min_bytes_per,
+                            len,
+                            self.cache.states.len(),
+                        );
+                    }
+                } else {
+                    trace!(
+                        "lazy DFA cache has been cleared {} times, \
+                         which exceeds the limit of {}, \
+                         since there is no configured bytes per state \
+                         minimum, lazy DFA is giving up",
+                        self.cache.clear_count,
+                        min_count,
+                    );
+                    return Err(CacheError::too_many_cache_clears());
+                }
             }
         }
         self.clear_cache();
@@ -2209,18 +2389,13 @@ impl<'i, 'c> Lazy<'i, 'c> {
         // If a new DFA is used, it might have a different number of NFA
         // states, so we need to make sure our sparse sets have the appropriate
         // size.
-        self.cache.sparses.resize(self.dfa.nfa.len());
+        self.cache.sparses.resize(self.dfa.get_nfa().states().len());
         self.cache.clear_count = 0;
+        self.cache.progress = None;
     }
 
     /// Clear the cache used by this lazy DFA.
     ///
-    /// If clearing the cache exceeds the minimum number of required cache
-    /// clearings, then this will return a cache error. In this case,
-    /// callers should bubble this up as the cache can't be used until it is
-    /// reset. Implementations of search should convert this error into a
-    /// `MatchError::GaveUp`.
-    ///
     /// If 'self.state_saver' is set to save a state, then this state is
     /// persisted through cache clearing. Otherwise, the cache is returned to
     /// its state after initialization with two exceptions: its clear count
@@ -2236,6 +2411,10 @@ impl<'i, 'c> Lazy<'i, 'c> {
         self.cache.states_to_id.clear();
         self.cache.memory_usage_state = 0;
         self.cache.clear_count += 1;
+        self.cache.bytes_searched = 0;
+        if let Some(ref mut progress) = self.cache.progress {
+            progress.start = progress.at;
+        }
         trace!(
             "lazy DFA cache has been cleared (count: {})",
             self.cache.clear_count
@@ -2260,6 +2439,10 @@ impl<'i, 'c> Lazy<'i, 'c> {
             let new_id = self
                 .add_state(state, |id| {
                     if old_id.is_start() {
+                        // We don't need to consult the
+                        // 'specialize_start_states' config knob here, because
+                        // if it's disabled, old_id.is_start() will never
+                        // return true.
                         id.to_start()
                     } else {
                         id
@@ -2282,9 +2465,13 @@ impl<'i, 'c> Lazy<'i, 'c> {
     /// Primarily, this adds the three sentinel states and allocates some
     /// initial memory.
     fn init_cache(&mut self) {
-        let mut starts_len = Start::count();
-        if self.dfa.starts_for_each_pattern {
-            starts_len += Start::count() * self.dfa.pattern_count();
+        // Why multiply by 2 here? Because we make room for both the unanchored
+        // and anchored start states. Unanchored is first and then anchored.
+        let mut starts_len = Start::len().checked_mul(2).unwrap();
+        // ... but if we also want start states for every pattern, we make room
+        // for that too.
+        if self.dfa.get_config().get_starts_for_each_pattern() {
+            starts_len += Start::len() * self.dfa.pattern_len();
         }
         self.cache
             .starts
@@ -2357,7 +2544,7 @@ impl<'i, 'c> Lazy<'i, 'c> {
 
     /// Set all transitions on the state 'from' to 'to'.
     fn set_all_transitions(&mut self, from: LazyStateID, to: LazyStateID) {
-        for unit in self.dfa.classes.representatives() {
+        for unit in self.dfa.classes.representatives(..) {
             self.set_transition(from, unit, to);
         }
     }
@@ -2387,22 +2574,23 @@ impl<'i, 'c> Lazy<'i, 'c> {
     /// 'starts_for_each_pattern' is not enabled.
     fn set_start_state(
         &mut self,
-        pattern_id: Option<PatternID>,
+        input: &Input<'_>,
         start: Start,
         id: LazyStateID,
     ) {
         assert!(self.as_ref().is_valid(id));
         let start_index = start.as_usize();
-        let index = match pattern_id {
-            None => start_index,
-            Some(pid) => {
+        let index = match input.get_anchored() {
+            Anchored::No => start_index,
+            Anchored::Yes => Start::len() + start_index,
+            Anchored::Pattern(pid) => {
                 assert!(
-                    self.dfa.starts_for_each_pattern,
+                    self.dfa.get_config().get_starts_for_each_pattern(),
                     "attempted to search for a specific pattern \
                      without enabling starts_for_each_pattern",
                 );
                 let pid = pid.as_usize();
-                Start::count() + (Start::count() * pid) + start_index
+                (2 * Start::len()) + (Start::len() * pid) + start_index
             }
         };
         self.cache.starts[index] = id;
@@ -2451,25 +2639,30 @@ impl<'i, 'c> LazyRef<'i, 'c> {
     ///
     /// If the start state has not yet been computed, then this returns an
     /// unknown lazy state ID.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn get_cached_start_id(
         &self,
-        pattern_id: Option<PatternID>,
+        input: &Input<'_>,
         start: Start,
-    ) -> LazyStateID {
+    ) -> Result<LazyStateID, MatchError> {
         let start_index = start.as_usize();
-        let index = match pattern_id {
-            None => start_index,
-            Some(pid) => {
-                let pid = pid.as_usize();
-                assert!(
-                    pid < self.dfa.pattern_count(),
-                    "invalid pattern ID: {:?}",
-                    pid
-                );
-                Start::count() + (Start::count() * pid) + start_index
+        let mode = input.get_anchored();
+        let index = match mode {
+            Anchored::No => start_index,
+            Anchored::Yes => Start::len() + start_index,
+            Anchored::Pattern(pid) => {
+                if !self.dfa.get_config().get_starts_for_each_pattern() {
+                    return Err(MatchError::unsupported_anchored(mode));
+                }
+                if pid.as_usize() >= self.dfa.pattern_len() {
+                    return Ok(self.dead_id());
+                }
+                (2 * Start::len())
+                    + (Start::len() * pid.as_usize())
+                    + start_index
             }
         };
-        self.cache.starts[index]
+        Ok(self.cache.starts[index])
     }
 
     /// Return the cached NFA/DFA powerset state for the given ID.
@@ -2530,6 +2723,11 @@ impl<'i, 'c> LazyRef<'i, 'c> {
     fn state_fits_in_cache(&self, state: &State) -> bool {
         let needed = self.cache.memory_usage()
             + self.memory_usage_for_one_more_state(state.memory_usage());
+        trace!(
+            "lazy DFA cache capacity check: {:?} ?<=? {:?}",
+            needed,
+            self.dfa.cache_capacity
+        );
         needed <= self.dfa.cache_capacity
     }
 
@@ -2573,7 +2771,7 @@ enum StateSaver {
     /// is stored in 'Saved' since it may have changed.
     ToSave { id: LazyStateID, state: State },
     /// An ID that of a state that has been persisted through a lazy DFA
-    /// cache clearing. The ID recorded here corresonds to an ID that was
+    /// cache clearing. The ID recorded here corresponds to an ID that was
     /// once marked as ToSave. The IDs are likely not equivalent even though
     /// the states they point to are.
     Saved(LazyStateID),
@@ -2620,14 +2818,11 @@ impl StateSaver {
 /// A lazy DFA configuration is a simple data object that is typically used
 /// with [`Builder::configure`].
 ///
-/// The default configuration guarantees that a search will _never_ return
-/// a [`MatchError`] for any haystack or pattern. Setting a quit byte with
-/// [`Config::quit`], enabling heuristic support for Unicode word boundaries
-/// with [`Config::unicode_word_boundary`], or setting a minimum cache clear
-/// count with [`Config::minimum_cache_clear_count`] can in turn cause a search
-/// to return an error. See the corresponding configuration options for more
-/// details on when those error conditions arise.
-#[derive(Clone, Copy, Debug, Default)]
+/// The default configuration guarantees that a search will never return a
+/// "gave up" or "quit" error, although it is possible for a search to fail
+/// if [`Config::starts_for_each_pattern`] wasn't enabled (which it is not by
+/// default) and an [`Anchored::Pattern`] mode is requested via [`Input`].
+#[derive(Clone, Debug, Default)]
 pub struct Config {
     // As with other configuration types in this crate, we put all our knobs
     // in options so that we can distinguish between "default" and "not set."
@@ -2636,15 +2831,17 @@ pub struct Config {
     // 'overwrite' method.
     //
     // For docs on the fields below, see the corresponding method setters.
-    anchored: Option<bool>,
     match_kind: Option<MatchKind>,
+    pre: Option<Option<Prefilter>>,
     starts_for_each_pattern: Option<bool>,
     byte_classes: Option<bool>,
     unicode_word_boundary: Option<bool>,
     quitset: Option<ByteSet>,
+    specialize_start_states: Option<bool>,
     cache_capacity: Option<usize>,
     skip_cache_capacity_check: Option<bool>,
     minimum_cache_clear_count: Option<Option<usize>>,
+    minimum_bytes_per_state: Option<Option<usize>>,
 }
 
 impl Config {
@@ -2653,116 +2850,6 @@ impl Config {
         Config::default()
     }
 
-    /// Set whether matching must be anchored at the beginning of the input.
-    ///
-    /// When enabled, a match must begin at the start of a search. When
-    /// disabled (the default), the lazy DFA will act as if the pattern started
-    /// with a `(?s:.)*?`, which enables a match to appear anywhere.
-    ///
-    /// Note that if you want to run both anchored and unanchored
-    /// searches without building multiple automatons, you can enable the
-    /// [`Config::starts_for_each_pattern`] configuration instead. This will
-    /// permit unanchored any-pattern searches and pattern-specific anchored
-    /// searches. See the documentation for that configuration for an example.
-    ///
-    /// By default this is disabled.
-    ///
-    /// **WARNING:** this is subtly different than using a `^` at the start of
-    /// your regex. A `^` forces a regex to match exclusively at the start of
-    /// input, regardless of where you begin your search. In contrast, enabling
-    /// this option will allow your regex to match anywhere in your input,
-    /// but the match must start at the beginning of a search. (Most of the
-    /// higher level convenience search routines make "start of input" and
-    /// "start of search" equivalent, but some routines allow treating these as
-    /// orthogonal.)
-    ///
-    /// For example, consider the haystack `aba` and the following searches:
-    ///
-    /// 1. The regex `^a` is compiled with `anchored=false` and searches
-    ///    `aba` starting at position `2`. Since `^` requires the match to
-    ///    start at the beginning of the input and `2 > 0`, no match is found.
-    /// 2. The regex `a` is compiled with `anchored=true` and searches `aba`
-    ///    starting at position `2`. This reports a match at `[2, 3]` since
-    ///    the match starts where the search started. Since there is no `^`,
-    ///    there is no requirement for the match to start at the beginning of
-    ///    the input.
-    /// 3. The regex `a` is compiled with `anchored=true` and searches `aba`
-    ///    starting at position `1`. Since `b` corresponds to position `1` and
-    ///    since the regex is anchored, it finds no match.
-    /// 4. The regex `a` is compiled with `anchored=false` and searches `aba`
-    ///    startting at position `1`. Since the regex is neither anchored nor
-    ///    starts with `^`, the regex is compiled with an implicit `(?s:.)*?`
-    ///    prefix that permits it to match anywhere. Thus, it reports a match
-    ///    at `[2, 3]`.
-    ///
-    /// # Example
-    ///
-    /// This demonstrates the differences between an anchored search and
-    /// a pattern that begins with `^` (as described in the above warning
-    /// message).
-    ///
-    /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch};
-    ///
-    /// let haystack = "aba".as_bytes();
-    ///
-    /// let dfa = DFA::builder()
-    ///     .configure(DFA::config().anchored(false)) // default
-    ///     .build(r"^a")?;
-    /// let mut cache = dfa.create_cache();
-    /// let got = dfa.find_leftmost_fwd_at(
-    ///     &mut cache, None, None, haystack, 2, 3,
-    /// )?;
-    /// // No match is found because 2 is not the beginning of the haystack,
-    /// // which is what ^ requires.
-    /// let expected = None;
-    /// assert_eq!(expected, got);
-    ///
-    /// let dfa = DFA::builder()
-    ///     .configure(DFA::config().anchored(true))
-    ///     .build(r"a")?;
-    /// let mut cache = dfa.create_cache();
-    /// let got = dfa.find_leftmost_fwd_at(
-    ///     &mut cache, None, None, haystack, 2, 3,
-    /// )?;
-    /// // An anchored search can still match anywhere in the haystack, it just
-    /// // must begin at the start of the search which is '2' in this case.
-    /// let expected = Some(HalfMatch::must(0, 3));
-    /// assert_eq!(expected, got);
-    ///
-    /// let dfa = DFA::builder()
-    ///     .configure(DFA::config().anchored(true))
-    ///     .build(r"a")?;
-    /// let mut cache = dfa.create_cache();
-    /// let got = dfa.find_leftmost_fwd_at(
-    ///     &mut cache, None, None, haystack, 1, 3,
-    /// )?;
-    /// // No match is found since we start searching at offset 1 which
-    /// // corresponds to 'b'. Since there is no '(?s:.)*?' prefix, no match
-    /// // is found.
-    /// let expected = None;
-    /// assert_eq!(expected, got);
-    ///
-    /// let dfa = DFA::builder()
-    ///     .configure(DFA::config().anchored(false))
-    ///     .build(r"a")?;
-    /// let mut cache = dfa.create_cache();
-    /// let got = dfa.find_leftmost_fwd_at(
-    ///     &mut cache, None, None, haystack, 1, 3,
-    /// )?;
-    /// // Since anchored=false, an implicit '(?s:.)*?' prefix was added to the
-    /// // pattern. Even though the search starts at 'b', the 'match anything'
-    /// // prefix allows the search to match 'a'.
-    /// let expected = Some(HalfMatch::must(0, 3));
-    /// assert_eq!(expected, got);
-    ///
-    /// # Ok::<(), Box<dyn std::error::Error>>(())
-    /// ```
-    pub fn anchored(mut self, yes: bool) -> Config {
-        self.anchored = Some(yes);
-        self
-    }
-
     /// Set the desired match semantics.
     ///
     /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the
@@ -2789,21 +2876,24 @@ impl Config {
     /// report overlapping matches.
     ///
     /// ```
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
     /// use regex_automata::{
-    ///     hybrid::{dfa::DFA, OverlappingState},
-    ///     HalfMatch, MatchKind,
+    ///     hybrid::dfa::{DFA, OverlappingState},
+    ///     HalfMatch, Input, MatchKind,
     /// };
     ///
     /// let dfa = DFA::builder()
     ///     .configure(DFA::config().match_kind(MatchKind::All))
     ///     .build_many(&[r"\w+$", r"\S+$"])?;
     /// let mut cache = dfa.create_cache();
-    /// let haystack = "@foo".as_bytes();
+    /// let haystack = "@foo";
     /// let mut state = OverlappingState::start();
     ///
     /// let expected = Some(HalfMatch::must(1, 4));
-    /// let got = dfa.find_overlapping_fwd(&mut cache, haystack, &mut state)?;
-    /// assert_eq!(expected, got);
+    /// dfa.try_search_overlapping_fwd(
+    ///     &mut cache, &Input::new(haystack), &mut state,
+    /// )?;
+    /// assert_eq!(expected, state.get_match());
     ///
     /// // The first pattern also matches at the same position, so re-running
     /// // the search will yield another match. Notice also that the first
@@ -2811,8 +2901,10 @@ impl Config {
     /// // pattern begins its match before the first, is therefore an earlier
     /// // match and is thus reported first.
     /// let expected = Some(HalfMatch::must(0, 4));
-    /// let got = dfa.find_overlapping_fwd(&mut cache, haystack, &mut state)?;
-    /// assert_eq!(expected, got);
+    /// dfa.try_search_overlapping_fwd(
+    ///     &mut cache, &Input::new(haystack), &mut state,
+    /// )?;
+    /// assert_eq!(expected, state.get_match());
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
@@ -2829,36 +2921,38 @@ impl Config {
     /// for you, so it's usually not necessary to do this yourself.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchKind};
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     nfa::thompson::NFA,
+    ///     Anchored, HalfMatch, Input, MatchKind,
+    /// };
     ///
-    /// let haystack = "123foobar456".as_bytes();
-    /// let pattern = r"[a-z]+";
+    /// let input = Input::new("123foobar456");
+    /// let pattern = r"[a-z]+r";
     ///
     /// let dfa_fwd = DFA::new(pattern)?;
     /// let dfa_rev = DFA::builder()
-    ///     .configure(DFA::config()
-    ///         .anchored(true)
-    ///         .match_kind(MatchKind::All)
-    ///     )
+    ///     .thompson(NFA::config().reverse(true))
+    ///     .configure(DFA::config().match_kind(MatchKind::All))
     ///     .build(pattern)?;
     /// let mut cache_fwd = dfa_fwd.create_cache();
     /// let mut cache_rev = dfa_rev.create_cache();
     ///
     /// let expected_fwd = HalfMatch::must(0, 9);
     /// let expected_rev = HalfMatch::must(0, 3);
-    /// let got_fwd = dfa_fwd.find_leftmost_fwd(
-    ///     &mut cache_fwd, haystack,
-    /// )?.unwrap();
+    /// let got_fwd = dfa_fwd.try_search_fwd(&mut cache_fwd, &input)?.unwrap();
     /// // Here we don't specify the pattern to search for since there's only
     /// // one pattern and we're doing a leftmost search. But if this were an
     /// // overlapping search, you'd need to specify the pattern that matched
     /// // in the forward direction. (Otherwise, you might wind up finding the
     /// // starting position of a match of some other pattern.) That in turn
     /// // requires building the reverse automaton with starts_for_each_pattern
-    /// // enabled. Indeed, this is what Regex does internally.
-    /// let got_rev = dfa_rev.find_leftmost_rev_at(
-    ///     &mut cache_rev, None, haystack, 0, got_fwd.offset(),
-    /// )?.unwrap();
+    /// // enabled.
+    /// let input = input
+    ///     .clone()
+    ///     .range(..got_fwd.offset())
+    ///     .anchored(Anchored::Yes);
+    /// let got_rev = dfa_rev.try_search_rev(&mut cache_rev, &input)?.unwrap();
     /// assert_eq!(expected_fwd, got_fwd);
     /// assert_eq!(expected_rev, got_rev);
     ///
@@ -2869,6 +2963,86 @@ impl Config {
         self
     }
 
+    /// Set a prefilter to be used whenever a start state is entered.
+    ///
+    /// A [`Prefilter`] in this context is meant to accelerate searches by
+    /// looking for literal prefixes that every match for the corresponding
+    /// pattern (or patterns) must start with. Once a prefilter produces a
+    /// match, the underlying search routine continues on to try and confirm
+    /// the match.
+    ///
+    /// Be warned that setting a prefilter does not guarantee that the search
+    /// will be faster. While it's usually a good bet, if the prefilter
+    /// produces a lot of false positive candidates (i.e., positions matched
+    /// by the prefilter but not by the regex), then the overall result can
+    /// be slower than if you had just executed the regex engine without any
+    /// prefilters.
+    ///
+    /// Note that unless [`Config::specialize_start_states`] has been
+    /// explicitly set, then setting this will also enable (when `pre` is
+    /// `Some`) or disable (when `pre` is `None`) start state specialization.
+    /// This occurs because without start state specialization, a prefilter
+    /// is likely to be less effective. And without a prefilter, start state
+    /// specialization is usually pointless.
+    ///
+    /// By default no prefilter is set.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     util::prefilter::Prefilter,
+    ///     Input, HalfMatch, MatchKind,
+    /// };
+    ///
+    /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]);
+    /// let re = DFA::builder()
+    ///     .configure(DFA::config().prefilter(pre))
+    ///     .build(r"(foo|bar)[a-z]+")?;
+    /// let mut cache = re.create_cache();
+    /// let input = Input::new("foo1 barfox bar");
+    /// assert_eq!(
+    ///     Some(HalfMatch::must(0, 11)),
+    ///     re.try_search_fwd(&mut cache, &input)?,
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// Be warned though that an incorrect prefilter can lead to incorrect
+    /// results!
+    ///
+    /// ```
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     util::prefilter::Prefilter,
+    ///     Input, HalfMatch, MatchKind,
+    /// };
+    ///
+    /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]);
+    /// let re = DFA::builder()
+    ///     .configure(DFA::config().prefilter(pre))
+    ///     .build(r"(foo|bar)[a-z]+")?;
+    /// let mut cache = re.create_cache();
+    /// let input = Input::new("foo1 barfox bar");
+    /// assert_eq!(
+    ///     // No match reported even though there clearly is one!
+    ///     None,
+    ///     re.try_search_fwd(&mut cache, &input)?,
+    /// );
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn prefilter(mut self, pre: Option<Prefilter>) -> Config {
+        self.pre = Some(pre);
+        if self.specialize_start_states.is_none() {
+            self.specialize_start_states =
+                Some(self.get_prefilter().is_some());
+        }
+        self
+    }
+
     /// Whether to compile a separate start state for each pattern in the
     /// lazy DFA.
     ///
@@ -2897,50 +3071,45 @@ impl Config {
     /// for matches of any pattern or to search for anchored matches of one
     /// particular pattern while using the same DFA. (Otherwise, you would need
     /// to compile a new DFA for each pattern.)
-    /// 3. Since the start states added for each pattern are anchored, if you
-    /// compile an unanchored DFA with one pattern while also enabling this
-    /// option, then you can use the same DFA to perform anchored or unanchored
-    /// searches. The latter you get with the standard search APIs. The former
-    /// you get from the various `_at` search methods that allow you specify a
-    /// pattern ID to search for.
     ///
     /// By default this is disabled.
     ///
     /// # Example
     ///
     /// This example shows how to use this option to permit the same lazy DFA
-    /// to run both anchored and unanchored searches for a single pattern.
+    /// to run both general searches for any pattern and anchored searches for
+    /// a specific pattern.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, PatternID};
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     Anchored, HalfMatch, Input, PatternID,
+    /// };
     ///
     /// let dfa = DFA::builder()
     ///     .configure(DFA::config().starts_for_each_pattern(true))
-    ///     .build(r"foo[0-9]+")?;
+    ///     .build_many(&[r"[a-z0-9]{6}", r"[a-z][a-z0-9]{5}"])?;
     /// let mut cache = dfa.create_cache();
-    /// let haystack = b"quux foo123";
-    ///
-    /// // Here's a normal unanchored search. Notice that we use 'None' for the
-    /// // pattern ID. Since the DFA was built as an unanchored machine, it
-    /// // uses its default unanchored starting state.
-    /// let expected = HalfMatch::must(0, 11);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd_at(
-    ///     &mut cache, None, None, haystack, 0, haystack.len(),
-    /// )?);
-    /// // But now if we explicitly specify the pattern to search ('0' being
-    /// // the only pattern in the DFA), then it will use the starting state
-    /// // for that specific pattern which is always anchored. Since the
-    /// // pattern doesn't have a match at the beginning of the haystack, we
-    /// // find nothing.
-    /// assert_eq!(None, dfa.find_leftmost_fwd_at(
-    ///     &mut cache, None, Some(PatternID::must(0)), haystack, 0, haystack.len(),
-    /// )?);
-    /// // And finally, an anchored search is not the same as putting a '^' at
-    /// // beginning of the pattern. An anchored search can only match at the
-    /// // beginning of the *search*, which we can change:
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd_at(
-    ///     &mut cache, None, Some(PatternID::must(0)), haystack, 5, haystack.len(),
-    /// )?);
+    /// let haystack = "bar foo123";
+    ///
+    /// // Here's a normal unanchored search that looks for any pattern.
+    /// let expected = HalfMatch::must(0, 10);
+    /// let input = Input::new(haystack);
+    /// assert_eq!(Some(expected), dfa.try_search_fwd(&mut cache, &input)?);
+    /// // We can also do a normal anchored search for any pattern. Since it's
+    /// // an anchored search, we position the start of the search where we
+    /// // know the match will begin.
+    /// let expected = HalfMatch::must(0, 10);
+    /// let input = Input::new(haystack).range(4..);
+    /// assert_eq!(Some(expected), dfa.try_search_fwd(&mut cache, &input)?);
+    /// // Since we compiled anchored start states for each pattern, we can
+    /// // also look for matches of other patterns explicitly, even if a
+    /// // different pattern would have normally matched.
+    /// let expected = HalfMatch::must(1, 10);
+    /// let input = Input::new(haystack)
+    ///     .range(4..)
+    ///     .anchored(Anchored::Pattern(PatternID::must(1)));
+    /// assert_eq!(Some(expected), dfa.try_search_fwd(&mut cache, &input)?);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
@@ -2990,7 +3159,7 @@ impl Config {
     /// When set, this will attempt to implement Unicode word boundaries as if
     /// they were ASCII word boundaries. This only works when the search input
     /// is ASCII only. If a non-ASCII byte is observed while searching, then a
-    /// [`MatchError::Quit`](crate::MatchError::Quit) error is returned.
+    /// [`MatchError::quit`] error is returned.
     ///
     /// A possible alternative to enabling this option is to simply use an
     /// ASCII word boundary, e.g., via `(?-u:\b)`. The main reason to use this
@@ -3014,8 +3183,7 @@ impl Config {
     /// When using a [`Regex`](crate::hybrid::regex::Regex), this
     /// corresponds to using the `try_` suite of methods. Alternatively,
     /// if callers can guarantee that their input is ASCII only, then a
-    /// [`MatchError::Quit`](crate::MatchError::Quit) error will never be
-    /// returned while searching.
+    /// [`MatchError::quit`] error will never be returned while searching.
     ///
     /// This is disabled by default.
     ///
@@ -3028,7 +3196,7 @@ impl Config {
     /// ```
     /// use regex_automata::{
     ///     hybrid::dfa::DFA,
-    ///     HalfMatch, MatchError, MatchKind,
+    ///     HalfMatch, Input, MatchError,
     /// };
     ///
     /// let dfa = DFA::builder()
@@ -3038,9 +3206,9 @@ impl Config {
     ///
     /// // The match occurs before the search ever observes the snowman
     /// // character, so no error occurs.
-    /// let haystack = "foo 123 ☃".as_bytes();
+    /// let haystack = "foo 123  ☃";
     /// let expected = Some(HalfMatch::must(0, 7));
-    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack)?;
+    /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?;
     /// assert_eq!(expected, got);
     ///
     /// // Notice that this search fails, even though the snowman character
@@ -3048,9 +3216,23 @@ impl Config {
     /// // routines read one byte past the end of the search to account for
     /// // look-around, and indeed, this is required here to determine whether
     /// // the trailing \b matches.
-    /// let haystack = "foo 123☃".as_bytes();
-    /// let expected = MatchError::Quit { byte: 0xE2, offset: 7 };
-    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack);
+    /// let haystack = "foo 123 ☃";
+    /// let expected = MatchError::quit(0xE2, 8);
+    /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack));
+    /// assert_eq!(Err(expected), got);
+    ///
+    /// // Another example is executing a search where the span of the haystack
+    /// // we specify is all ASCII, but there is non-ASCII just before it. This
+    /// // correctly also reports an error.
+    /// let input = Input::new("β123").range(2..);
+    /// let expected = MatchError::quit(0xB2, 1);
+    /// let got = dfa.try_search_fwd(&mut cache, &input);
+    /// assert_eq!(Err(expected), got);
+    ///
+    /// // And similarly for the trailing word boundary.
+    /// let input = Input::new("123β").range(..3);
+    /// let expected = MatchError::quit(0xCE, 3);
+    /// let got = dfa.try_search_fwd(&mut cache, &input);
     /// assert_eq!(Err(expected), got);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
@@ -3066,9 +3248,9 @@ impl Config {
 
     /// Add a "quit" byte to the lazy DFA.
     ///
-    /// When a quit byte is seen during search time, then search will return
-    /// a [`MatchError::Quit`](crate::MatchError::Quit) error indicating the
-    /// offset at which the search stopped.
+    /// When a quit byte is seen during search time, then search will return a
+    /// [`MatchError::quit`] error indicating the offset at which the search
+    /// stopped.
     ///
     /// A quit byte will always overrule any other aspects of a regex. For
     /// example, if the `x` byte is added as a quit byte and the regex `\w` is
@@ -3109,19 +3291,23 @@ impl Config {
     /// a user supplied pattern from matching across a line boundary.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchError};
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{hybrid::dfa::DFA, MatchError, Input};
     ///
     /// let dfa = DFA::builder()
     ///     .configure(DFA::config().quit(b'\n', true))
     ///     .build(r"foo\p{any}+bar")?;
     /// let mut cache = dfa.create_cache();
     ///
-    /// let haystack = "foo\nbar".as_bytes();
+    /// let haystack = "foo\nbar";
     /// // Normally this would produce a match, since \p{any} contains '\n'.
     /// // But since we instructed the automaton to enter a quit state if a
     /// // '\n' is observed, this produces a match error instead.
-    /// let expected = MatchError::Quit { byte: 0x0A, offset: 3 };
-    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack).unwrap_err();
+    /// let expected = MatchError::quit(b'\n', 3);
+    /// let got = dfa.try_search_fwd(
+    ///     &mut cache,
+    ///     &Input::new(haystack),
+    /// ).unwrap_err();
     /// assert_eq!(expected, got);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
@@ -3144,6 +3330,89 @@ impl Config {
         self
     }
 
+    /// Enable specializing start states in the lazy DFA.
+    ///
+    /// When start states are specialized, an implementor of a search routine
+    /// using a lazy DFA can tell when the search has entered a starting state.
+    /// When start states aren't specialized, then it is impossible to know
+    /// whether the search has entered a start state.
+    ///
+    /// Ideally, this option wouldn't need to exist and we could always
+    /// specialize start states. The problem is that start states can be quite
+    /// active. This in turn means that an efficient search routine is likely
+    /// to ping-pong between a heavily optimized hot loop that handles most
+    /// states and to a less optimized specialized handling of start states.
+    /// This causes branches to get heavily mispredicted and overall can
+    /// materially decrease throughput. Therefore, specializing start states
+    /// should only be enabled when it is needed.
+    ///
+    /// Knowing whether a search is in a start state is typically useful when a
+    /// prefilter is active for the search. A prefilter is typically only run
+    /// when in a start state and a prefilter can greatly accelerate a search.
+    /// Therefore, the possible cost of specializing start states is worth it
+    /// in this case. Otherwise, if you have no prefilter, there is likely no
+    /// reason to specialize start states.
+    ///
+    /// This is disabled by default, but note that it is automatically
+    /// enabled (or disabled) if [`Config::prefilter`] is set. Namely, unless
+    /// `specialize_start_states` has already been set, [`Config::prefilter`]
+    /// will automatically enable or disable it based on whether a prefilter
+    /// is present or not, respectively. This is done because a prefilter's
+    /// effectiveness is rooted in being executed whenever the DFA is in a
+    /// start state, and that's only possible to do when they are specialized.
+    ///
+    /// Note that it is plausibly reasonable to _disable_ this option
+    /// explicitly while _enabling_ a prefilter. In that case, a prefilter
+    /// will still be run at the beginning of a search, but never again. This
+    /// in theory could strike a good balance if you're in a situation where a
+    /// prefilter is likely to produce many false positive candidates.
+    ///
+    /// # Example
+    ///
+    /// This example shows how to enable start state specialization and then
+    /// shows how to check whether a state is a start state or not.
+    ///
+    /// ```
+    /// use regex_automata::{hybrid::dfa::DFA, MatchError, Input};
+    ///
+    /// let dfa = DFA::builder()
+    ///     .configure(DFA::config().specialize_start_states(true))
+    ///     .build(r"[a-z]+")?;
+    /// let mut cache = dfa.create_cache();
+    ///
+    /// let haystack = "123 foobar 4567".as_bytes();
+    /// let sid = dfa.start_state_forward(&mut cache, &Input::new(haystack))?;
+    /// // The ID returned by 'start_state_forward' will always be tagged as
+    /// // a start state when start state specialization is enabled.
+    /// assert!(sid.is_tagged());
+    /// assert!(sid.is_start());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// Compare the above with the default lazy DFA configuration where
+    /// start states are _not_ specialized. In this case, the start state
+    /// is not tagged and `sid.is_start()` returns false.
+    ///
+    /// ```
+    /// use regex_automata::{hybrid::dfa::DFA, MatchError, Input};
+    ///
+    /// let dfa = DFA::new(r"[a-z]+")?;
+    /// let mut cache = dfa.create_cache();
+    ///
+    /// let haystack = "123 foobar 4567".as_bytes();
+    /// let sid = dfa.start_state_forward(&mut cache, &Input::new(haystack))?;
+    /// // Start states are not tagged in the default configuration!
+    /// assert!(!sid.is_tagged());
+    /// assert!(!sid.is_start());
+    ///
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn specialize_start_states(mut self, yes: bool) -> Config {
+        self.specialize_start_states = Some(yes);
+        self
+    }
+
     /// Sets the maximum amount of heap memory, in bytes, to allocate to the
     /// cache for use during a lazy DFA search. If the lazy DFA would otherwise
     /// use more heap memory, then, depending on other configuration knobs,
@@ -3157,7 +3426,7 @@ impl Config {
     ///
     /// Note that while building a lazy DFA will do a "minimum" check to ensure
     /// the capacity is big enough, this is more or less about correctness.
-    /// If the cache is bigger than the minimum but still too small, then the
+    /// If the cache is bigger than the minimum but still "too small," then the
     /// lazy DFA could wind up spending a lot of time clearing the cache and
     /// recomputing transitions, thus negating the performance benefits of a
     /// lazy DFA. Thus, setting the cache capacity is mostly an experimental
@@ -3175,7 +3444,8 @@ impl Config {
     /// a smaller cache capacity.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchError};
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input};
     ///
     /// let pattern = r"\p{L}{1000}";
     ///
@@ -3191,7 +3461,7 @@ impl Config {
     ///
     /// let haystack = "ͰͲͶͿΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙ".repeat(50);
     /// let expected = Some(HalfMatch::must(0, 2000));
-    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack.as_bytes())?;
+    /// let got = dfa.try_search_fwd(&mut cache, &Input::new(&haystack))?;
     /// assert_eq!(expected, got);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
@@ -3211,7 +3481,11 @@ impl Config {
     /// while a minimum cache capacity does permit the lazy DFA to function
     /// where it otherwise couldn't, it's plausible that it may not function
     /// well if it's constantly running out of room. In that case, the speed
-    /// advantages of the lazy DFA may be negated.
+    /// advantages of the lazy DFA may be negated. On the other hand, the
+    /// "minimum" cache capacity computed may not be completely accurate and
+    /// could actually be bigger than what is really necessary. Therefore, it
+    /// is plausible that using the minimum cache capacity could still result
+    /// in very good performance.
     ///
     /// This is disabled by default.
     ///
@@ -3224,7 +3498,8 @@ impl Config {
     /// too small.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchError};
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input};
     ///
     /// let pattern = r"\p{L}{1000}";
     ///
@@ -3241,7 +3516,7 @@ impl Config {
     ///
     /// let haystack = "ͰͲͶͿΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙ".repeat(50);
     /// let expected = Some(HalfMatch::must(0, 2000));
-    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack.as_bytes())?;
+    /// let got = dfa.try_search_fwd(&mut cache, &Input::new(&haystack))?;
     /// assert_eq!(expected, got);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
@@ -3254,23 +3529,27 @@ impl Config {
     /// Configure a lazy DFA search to quit after a certain number of cache
     /// clearings.
     ///
-    /// When a minimum is set, then a lazy DFA search will "give up" after
-    /// the minimum number of cache clearings has occurred. This is typically
-    /// useful in scenarios where callers want to detect whether the lazy DFA
-    /// search is "efficient" or not. If the cache is cleared too many times,
-    /// this is a good indicator that it is not efficient, and thus, the caller
-    /// may wish to use some other regex engine.
+    /// When a minimum is set, then a lazy DFA search will *possibly* "give
+    /// up" after the minimum number of cache clearings has occurred. This is
+    /// typically useful in scenarios where callers want to detect whether the
+    /// lazy DFA search is "efficient" or not. If the cache is cleared too many
+    /// times, this is a good indicator that it is not efficient, and thus, the
+    /// caller may wish to use some other regex engine.
     ///
     /// Note that the number of times a cache is cleared is a property of
     /// the cache itself. Thus, if a cache is used in a subsequent search
-    /// with a similarly configured lazy DFA, then it would cause the
-    /// search to "give up" if the cache needed to be cleared. The cache
-    /// clear count can only be reset to `0` via [`DFA::reset_cache`] (or
+    /// with a similarly configured lazy DFA, then it could cause the
+    /// search to "give up" if the cache needed to be cleared, depending
+    /// on its internal count and configured minimum. The cache clear
+    /// count can only be reset to `0` via [`DFA::reset_cache`] (or
     /// [`Regex::reset_cache`](crate::hybrid::regex::Regex::reset_cache) if
     /// you're using the `Regex` API).
     ///
     /// By default, no minimum is configured. Thus, a lazy DFA search will
-    /// never give up due to cache clearings.
+    /// never give up due to cache clearings. If you do set this option, you
+    /// might consider also setting [`Config::minimum_bytes_per_state`] in
+    /// order for the lazy DFA to take efficiency into account before giving
+    /// up.
     ///
     /// # Example
     ///
@@ -3279,13 +3558,11 @@ impl Config {
     /// in a search that returns an error.
     ///
     /// It is important to note that the precise mechanics of how and when
-    /// a cache gets cleared is an implementation detail. Thus, the asserts
-    /// in the tests below with respect to the particular offsets at which a
-    /// search gave up should be viewed strictly as a demonstration. They are
-    /// not part of any API guarantees offered by this crate.
+    /// a cache gets cleared is an implementation detail.
     ///
     /// ```
-    /// use regex_automata::{hybrid::dfa::DFA, MatchError};
+    /// # if cfg!(miri) { return Ok(()); } // miri takes too long
+    /// use regex_automata::{hybrid::dfa::DFA, Input, MatchError, MatchErrorKind};
     ///
     /// // This is a carefully chosen regex. The idea is to pick one
     /// // that requires some decent number of states (hence the bounded
@@ -3309,37 +3586,42 @@ impl Config {
     ///     .build(pattern)?;
     /// let mut cache = dfa.create_cache();
     ///
+    /// // Our search will give up before reaching the end!
     /// let haystack = "a".repeat(101).into_bytes();
-    /// assert_eq!(
-    ///     dfa.find_leftmost_fwd(&mut cache, &haystack),
-    ///     Err(MatchError::GaveUp { offset: 25 }),
-    /// );
+    /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack));
+    /// assert!(matches!(
+    ///     *result.unwrap_err().kind(),
+    ///     MatchErrorKind::GaveUp { .. },
+    /// ));
     ///
     /// // Now that we know the cache is full, if we search a haystack that we
     /// // know will require creating at least one new state, it should not
-    /// // be able to make any progress.
+    /// // be able to make much progress.
     /// let haystack = "β".repeat(101).into_bytes();
-    /// assert_eq!(
-    ///     dfa.find_leftmost_fwd(&mut cache, &haystack),
-    ///     Err(MatchError::GaveUp { offset: 0 }),
-    /// );
+    /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack));
+    /// assert!(matches!(
+    ///     *result.unwrap_err().kind(),
+    ///     MatchErrorKind::GaveUp { .. },
+    /// ));
     ///
     /// // If we reset the cache, then we should be able to create more states
     /// // and make more progress with searching for betas.
     /// cache.reset(&dfa);
     /// let haystack = "β".repeat(101).into_bytes();
-    /// assert_eq!(
-    ///     dfa.find_earliest_fwd(&mut cache, &haystack),
-    ///     Err(MatchError::GaveUp { offset: 26 }),
-    /// );
+    /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack));
+    /// assert!(matches!(
+    ///     *result.unwrap_err().kind(),
+    ///     MatchErrorKind::GaveUp { .. },
+    /// ));
     ///
     /// // ... switching back to ASCII still makes progress since it just needs
     /// // to set transitions on existing states!
     /// let haystack = "a".repeat(101).into_bytes();
-    /// assert_eq!(
-    ///     dfa.find_earliest_fwd(&mut cache, &haystack),
-    ///     Err(MatchError::GaveUp { offset: 13 }),
-    /// );
+    /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack));
+    /// assert!(matches!(
+    ///     *result.unwrap_err().kind(),
+    ///     MatchErrorKind::GaveUp { .. },
+    /// ));
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
@@ -3348,9 +3630,48 @@ impl Config {
         self
     }
 
-    /// Returns whether this configuration has enabled anchored searches.
-    pub fn get_anchored(&self) -> bool {
-        self.anchored.unwrap_or(false)
+    /// Configure a lazy DFA search to quit only when its efficiency drops
+    /// below the given minimum.
+    ///
+    /// The efficiency of the cache is determined by the number of DFA states
+    /// compiled per byte of haystack searched. For example, if the efficiency
+    /// is 2, then it means the lazy DFA is creating a new DFA state after
+    /// searching approximately 2 bytes in a haystack. Generally speaking, 2
+    /// is quite bad and it's likely that even a slower regex engine like the
+    /// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) would be faster.
+    ///
+    /// This has no effect if [`Config::minimum_cache_clear_count`] is not set.
+    /// Namely, this option only kicks in when the cache has been cleared more
+    /// than the minimum number. If no minimum is set, then the cache is simply
+    /// cleared whenever it fills up and it is impossible for the lazy DFA to
+    /// quit due to ineffective use of the cache.
+    ///
+    /// In general, if one is setting [`Config::minimum_cache_clear_count`],
+    /// then one should probably also set this knob as well. The reason is
+    /// that the absolute number of times the cache is cleared is generally
+    /// not a great predictor of efficiency. For example, if a new DFA state
+    /// is created for every 1,000 bytes searched, then it wouldn't be hard
+    /// for the cache to get cleared more than `N` times and then cause the
+    /// lazy DFA to quit. But a new DFA state every 1,000 bytes is likely quite
+    /// good from a performance perspective, and it's likely that the lazy
+    /// DFA should continue searching, even if it requires clearing the cache
+    /// occasionally.
+    ///
+    /// Finally, note that if you're implementing your own lazy DFA search
+    /// routine and also want this efficiency check to work correctly, then
+    /// you'll need to use the following routines to record search progress:
+    ///
+    /// * Call [`Cache::search_start`] at the beginning of every search.
+    /// * Call [`Cache::search_update`] whenever [`DFA::next_state`] is
+    /// called.
+    /// * Call [`Cache::search_finish`] before completing a search. (It is
+    /// not strictly necessary to call this when an error is returned, as
+    /// `Cache::search_start` will automatically finish the previous search
+    /// for you. But calling it where possible before returning helps improve
+    /// the accuracy of how many bytes have actually been searched.)
+    pub fn minimum_bytes_per_state(mut self, min: Option<usize>) -> Config {
+        self.minimum_bytes_per_state = Some(min);
+        self
     }
 
     /// Returns the match semantics set in this configuration.
@@ -3358,6 +3679,11 @@ impl Config {
         self.match_kind.unwrap_or(MatchKind::LeftmostFirst)
     }
 
+    /// Returns the prefilter set in this configuration, if one at all.
+    pub fn get_prefilter(&self) -> Option<&Prefilter> {
+        self.pre.as_ref().unwrap_or(&None).as_ref()
+    }
+
     /// Returns whether this configuration has enabled anchored starting states
     /// for every pattern in the DFA.
     pub fn get_starts_for_each_pattern(&self) -> bool {
@@ -3378,14 +3704,23 @@ impl Config {
         self.unicode_word_boundary.unwrap_or(false)
     }
 
-    /// Returns whether this configuration will instruct the DFA to enter a
-    /// quit state whenever the given byte is seen during a search. When at
+    /// Returns whether this configuration will instruct the lazy DFA to enter
+    /// a quit state whenever the given byte is seen during a search. When at
     /// least one byte has this enabled, it is possible for a search to return
     /// an error.
     pub fn get_quit(&self, byte: u8) -> bool {
         self.quitset.map_or(false, |q| q.contains(byte))
     }
 
+    /// Returns whether this configuration will instruct the lazy DFA to
+    /// "specialize" start states. When enabled, the lazy DFA will tag start
+    /// states so that search routines using the lazy DFA can detect when
+    /// it's in a start state and do some kind of optimization (like run a
+    /// prefilter).
+    pub fn get_specialize_start_states(&self) -> bool {
+        self.specialize_start_states.unwrap_or(false)
+    }
+
     /// Returns the cache capacity set on this configuration.
     pub fn get_cache_capacity(&self) -> usize {
         self.cache_capacity.unwrap_or(2 * (1 << 20))
@@ -3404,6 +3739,14 @@ impl Config {
         self.minimum_cache_clear_count.unwrap_or(None)
     }
 
+    /// Returns, if set, the minimum number of bytes per state that need to be
+    /// processed in order for the lazy DFA to keep going. If the minimum falls
+    /// below this number (and the cache has been cleared a minimum number of
+    /// times), then the lazy DFA will return a "gave up" error.
+    pub fn get_minimum_bytes_per_state(&self) -> Option<usize> {
+        self.minimum_bytes_per_state.unwrap_or(None)
+    }
+
     /// Returns the minimum lazy DFA cache capacity required for the given NFA.
     ///
     /// The cache capacity required for a particular NFA may change without
@@ -3449,6 +3792,11 @@ impl Config {
             // It is important to distinguish any "quit" bytes from all other
             // bytes. Otherwise, a non-quit byte may end up in the same class
             // as a quit byte, and thus cause the DFA stop when it shouldn't.
+            //
+            // Test case:
+            //
+            //   regex-cli find hybrid regex -w @conn.json.1000x.log \
+            //     '^#' '\b10\.55\.182\.100\b'
             if !quit.is_empty() {
                 set.add_set(&quit);
             }
@@ -3466,7 +3814,7 @@ impl Config {
         nfa: &thompson::NFA,
     ) -> Result<ByteSet, BuildError> {
         let mut quit = self.quitset.unwrap_or(ByteSet::empty());
-        if nfa.has_word_boundary_unicode() {
+        if nfa.look_set_any().contains_word_unicode() {
             if self.get_unicode_word_boundary() {
                 for b in 0x80..=0xFF {
                     quit.add(b);
@@ -3491,10 +3839,10 @@ impl Config {
     /// always used. If an option in `o` is not set, then the corresponding
     /// option in `self` is used. If it's not set in `self` either, then it
     /// remains not set.
-    fn overwrite(self, o: Config) -> Config {
+    fn overwrite(&self, o: Config) -> Config {
         Config {
-            anchored: o.anchored.or(self.anchored),
             match_kind: o.match_kind.or(self.match_kind),
+            pre: o.pre.or_else(|| self.pre.clone()),
             starts_for_each_pattern: o
                 .starts_for_each_pattern
                 .or(self.starts_for_each_pattern),
@@ -3503,6 +3851,9 @@ impl Config {
                 .unicode_word_boundary
                 .or(self.unicode_word_boundary),
             quitset: o.quitset.or(self.quitset),
+            specialize_start_states: o
+                .specialize_start_states
+                .or(self.specialize_start_states),
             cache_capacity: o.cache_capacity.or(self.cache_capacity),
             skip_cache_capacity_check: o
                 .skip_cache_capacity_check
@@ -3510,6 +3861,9 @@ impl Config {
             minimum_cache_clear_count: o
                 .minimum_cache_clear_count
                 .or(self.minimum_cache_clear_count),
+            minimum_bytes_per_state: o
+                .minimum_bytes_per_state
+                .or(self.minimum_bytes_per_state),
         }
     }
 }
@@ -3556,27 +3910,25 @@ impl Config {
 ///   `\n`). Things that are Unicode only, such as `\pL`, are not allowed.
 /// * The pattern itself is permitted to match invalid UTF-8. For example,
 ///   things like `[^a]` that match any byte except for `a` are permitted.
-/// * Unanchored patterns can search through invalid UTF-8. That is, for
-///   unanchored patterns, the implicit prefix is `(?s-u:.)*?` instead of
-///   `(?s:.)*?`.
 ///
 /// ```
 /// use regex_automata::{
 ///     hybrid::dfa::DFA,
 ///     nfa::thompson,
-///     HalfMatch, SyntaxConfig,
+///     util::syntax,
+///     HalfMatch, Input,
 /// };
 ///
 /// let dfa = DFA::builder()
 ///     .configure(DFA::config().cache_capacity(5_000))
-///     .syntax(SyntaxConfig::new().unicode(false).utf8(false))
 ///     .thompson(thompson::Config::new().utf8(false))
+///     .syntax(syntax::Config::new().unicode(false).utf8(false))
 ///     .build(r"foo[^b]ar.*")?;
 /// let mut cache = dfa.create_cache();
 ///
 /// let haystack = b"\xFEfoo\xFFar\xE2\x98\xFF\n";
 /// let expected = Some(HalfMatch::must(0, 10));
-/// let got = dfa.find_leftmost_fwd(&mut cache, haystack)?;
+/// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?;
 /// assert_eq!(expected, got);
 ///
 /// # Ok::<(), Box<dyn std::error::Error>>(())
@@ -3584,7 +3936,8 @@ impl Config {
 #[derive(Clone, Debug)]
 pub struct Builder {
     config: Config,
-    thompson: thompson::Builder,
+    #[cfg(feature = "syntax")]
+    thompson: thompson::Compiler,
 }
 
 impl Builder {
@@ -3592,7 +3945,8 @@ impl Builder {
     pub fn new() -> Builder {
         Builder {
             config: Config::default(),
-            thompson: thompson::Builder::new(),
+            #[cfg(feature = "syntax")]
+            thompson: thompson::Compiler::new(),
         }
     }
 
@@ -3600,6 +3954,7 @@ impl Builder {
     ///
     /// If there was a problem parsing or compiling the pattern, then an error
     /// is returned.
+    #[cfg(feature = "syntax")]
     pub fn build(&self, pattern: &str) -> Result<DFA, BuildError> {
         self.build_many(&[pattern])
     }
@@ -3608,22 +3963,31 @@ impl Builder {
     ///
     /// When matches are returned, the pattern ID corresponds to the index of
     /// the pattern in the slice given.
+    #[cfg(feature = "syntax")]
     pub fn build_many<P: AsRef<str>>(
         &self,
         patterns: &[P],
     ) -> Result<DFA, BuildError> {
-        let nfa =
-            self.thompson.build_many(patterns).map_err(BuildError::nfa)?;
-        self.build_from_nfa(Arc::new(nfa))
+        let nfa = self
+            .thompson
+            .clone()
+            // We can always forcefully disable captures because DFAs do not
+            // support them.
+            .configure(
+                thompson::Config::new()
+                    .which_captures(thompson::WhichCaptures::None),
+            )
+            .build_many(patterns)
+            .map_err(BuildError::nfa)?;
+        self.build_from_nfa(nfa)
     }
 
     /// Build a DFA from the given NFA.
     ///
-    /// Note that this requires an `Arc<thompson::NFA>` instead of a
-    /// `&thompson::NFA` because the lazy DFA builds itself from the NFA at
-    /// search time. This means that the lazy DFA must hold on to its source
-    /// NFA for the entirety of its lifetime. An `Arc` is used so that callers
-    /// aren't forced to clone the NFA if it is needed elsewhere.
+    /// Note that this requires owning a `thompson::NFA`. While this may force
+    /// you to clone the NFA, such a clone is not a deep clone. Namely, NFAs
+    /// are defined internally to support shared ownership such that cloning is
+    /// very cheap.
     ///
     /// # Example
     ///
@@ -3631,26 +3995,29 @@ impl Builder {
     /// in hand.
     ///
     /// ```
-    /// use std::sync::Arc;
-    /// use regex_automata::{hybrid::dfa::DFA, nfa::thompson, HalfMatch};
+    /// use regex_automata::{
+    ///     hybrid::dfa::DFA,
+    ///     nfa::thompson,
+    ///     HalfMatch, Input,
+    /// };
     ///
-    /// let haystack = "foo123bar".as_bytes();
+    /// let haystack = "foo123bar";
     ///
     /// // This shows how to set non-default options for building an NFA.
-    /// let nfa = thompson::Builder::new()
-    ///     .configure(thompson::Config::new().shrink(false))
+    /// let nfa = thompson::Compiler::new()
+    ///     .configure(thompson::Config::new().shrink(true))
     ///     .build(r"[0-9]+")?;
-    /// let dfa = DFA::builder().build_from_nfa(Arc::new(nfa))?;
+    /// let dfa = DFA::builder().build_from_nfa(nfa)?;
     /// let mut cache = dfa.create_cache();
     /// let expected = Some(HalfMatch::must(0, 6));
-    /// let got = dfa.find_leftmost_fwd(&mut cache, haystack)?;
+    /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?;
     /// assert_eq!(expected, got);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn build_from_nfa(
         &self,
-        nfa: Arc<thompson::NFA>,
+        nfa: thompson::NFA,
     ) -> Result<DFA, BuildError> {
         let quitset = self.config.quit_set_from_nfa(&nfa)?;
         let classes = self.config.byte_classes_from_nfa(&nfa, &quitset);
@@ -3675,12 +4042,11 @@ impl Builder {
             // then we simply force the cache capacity to its minimum amount
             // and mush on.
             if self.config.get_skip_cache_capacity_check() {
-                trace!(
+                debug!(
                     "given capacity ({}) is too small, \
                      since skip_cache_capacity_check is enabled, \
                      setting cache capacity to minimum ({})",
-                    cache_capacity,
-                    min_cache,
+                    cache_capacity, min_cache,
                 );
                 cache_capacity = min_cache;
             } else {
@@ -3694,22 +4060,19 @@ impl Builder {
         // of states in our state ID space. This is unlikely to trigger in
         // >=32-bit systems, but 16-bit systems have a pretty small state ID
         // space since a number of bits are used up as sentinels.
-        if let Err(err) = minimum_lazy_state_id(&nfa, &classes) {
+        if let Err(err) = minimum_lazy_state_id(&classes) {
             return Err(BuildError::insufficient_state_id_capacity(err));
         }
         let stride2 = classes.stride2();
+        let start_map = StartByteMap::new(nfa.look_matcher());
         Ok(DFA {
+            config: self.config.clone(),
             nfa,
             stride2,
+            start_map,
             classes,
             quitset,
-            anchored: self.config.get_anchored(),
-            match_kind: self.config.get_match_kind(),
-            starts_for_each_pattern: self.config.get_starts_for_each_pattern(),
             cache_capacity,
-            minimum_cache_clear_count: self
-                .config
-                .get_minimum_cache_clear_count(),
         })
     }
 
@@ -3720,16 +4083,17 @@ impl Builder {
     }
 
     /// Set the syntax configuration for this builder using
-    /// [`SyntaxConfig`](crate::SyntaxConfig).
+    /// [`syntax::Config`](crate::util::syntax::Config).
     ///
     /// This permits setting things like case insensitivity, Unicode and multi
     /// line mode.
     ///
     /// These settings only apply when constructing a lazy DFA directly from a
     /// pattern.
+    #[cfg(feature = "syntax")]
     pub fn syntax(
         &mut self,
-        config: crate::util::syntax::SyntaxConfig,
+        config: crate::util::syntax::Config,
     ) -> &mut Builder {
         self.thompson.syntax(config);
         self
@@ -3744,20 +4108,144 @@ impl Builder {
     ///
     /// These settings only apply when constructing a DFA directly from a
     /// pattern.
+    #[cfg(feature = "syntax")]
     pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder {
         self.thompson.configure(config);
         self
     }
 }
 
+/// Represents the current state of an overlapping search.
+///
+/// This is used for overlapping searches since they need to know something
+/// about the previous search. For example, when multiple patterns match at the
+/// same position, this state tracks the last reported pattern so that the next
+/// search knows whether to report another matching pattern or continue with
+/// the search at the next position. Additionally, it also tracks which state
+/// the last search call terminated in.
+///
+/// This type provides little introspection capabilities. The only thing a
+/// caller can do is construct it and pass it around to permit search routines
+/// to use it to track state, and also ask whether a match has been found.
+///
+/// Callers should always provide a fresh state constructed via
+/// [`OverlappingState::start`] when starting a new search. Reusing state from
+/// a previous search may result in incorrect results.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct OverlappingState {
+    /// The match reported by the most recent overlapping search to use this
+    /// state.
+    ///
+    /// If a search does not find any matches, then it is expected to clear
+    /// this value.
+    pub(crate) mat: Option<HalfMatch>,
+    /// The state ID of the state at which the search was in when the call
+    /// terminated. When this is a match state, `last_match` must be set to a
+    /// non-None value.
+    ///
+    /// A `None` value indicates the start state of the corresponding
+    /// automaton. We cannot use the actual ID, since any one automaton may
+    /// have many start states, and which one is in use depends on several
+    /// search-time factors.
+    pub(crate) id: Option<LazyStateID>,
+    /// The position of the search.
+    ///
+    /// When `id` is None (i.e., we are starting a search), this is set to
+    /// the beginning of the search as given by the caller regardless of its
+    /// current value. Subsequent calls to an overlapping search pick up at
+    /// this offset.
+    pub(crate) at: usize,
+    /// The index into the matching patterns of the next match to report if the
+    /// current state is a match state. Note that this may be 1 greater than
+    /// the total number of matches to report for the current match state. (In
+    /// which case, no more matches should be reported at the current position
+    /// and the search should advance to the next position.)
+    pub(crate) next_match_index: Option<usize>,
+    /// This is set to true when a reverse overlapping search has entered its
+    /// EOI transitions.
+    ///
+    /// This isn't used in a forward search because it knows to stop once the
+    /// position exceeds the end of the search range. In a reverse search,
+    /// since we use unsigned offsets, we don't "know" once we've gone past
+    /// `0`. So the only way to detect it is with this extra flag. The reverse
+    /// overlapping search knows to terminate specifically after it has
+    /// reported all matches after following the EOI transition.
+    pub(crate) rev_eoi: bool,
+}
+
+impl OverlappingState {
+    /// Create a new overlapping state that begins at the start state of any
+    /// automaton.
+    pub fn start() -> OverlappingState {
+        OverlappingState {
+            mat: None,
+            id: None,
+            at: 0,
+            next_match_index: None,
+            rev_eoi: false,
+        }
+    }
+
+    /// Return the match result of the most recent search to execute with this
+    /// state.
+    ///
+    /// A searches will clear this result automatically, such that if no
+    /// match is found, this will correctly report `None`.
+    pub fn get_match(&self) -> Option<HalfMatch> {
+        self.mat
+    }
+}
+
+/// Runs the given overlapping `search` function (forwards or backwards) until
+/// a match is found whose offset does not split a codepoint.
+///
+/// This is *not* always correct to call. It should only be called when the
+/// underlying NFA has UTF-8 mode enabled *and* it can produce zero-width
+/// matches. Calling this when both of those things aren't true might result
+/// in legitimate matches getting skipped.
+#[cold]
+#[inline(never)]
+fn skip_empty_utf8_splits_overlapping<F>(
+    input: &Input<'_>,
+    state: &mut OverlappingState,
+    mut search: F,
+) -> Result<(), MatchError>
+where
+    F: FnMut(&Input<'_>, &mut OverlappingState) -> Result<(), MatchError>,
+{
+    // Note that this routine works for forwards and reverse searches
+    // even though there's no code here to handle those cases. That's
+    // because overlapping searches drive themselves to completion via
+    // `OverlappingState`. So all we have to do is push it until no matches are
+    // found.
+
+    let mut hm = match state.get_match() {
+        None => return Ok(()),
+        Some(hm) => hm,
+    };
+    if input.get_anchored().is_anchored() {
+        if !input.is_char_boundary(hm.offset()) {
+            state.mat = None;
+        }
+        return Ok(());
+    }
+    while !input.is_char_boundary(hm.offset()) {
+        search(input, state)?;
+        hm = match state.get_match() {
+            None => return Ok(()),
+            Some(hm) => hm,
+        };
+    }
+    Ok(())
+}
+
 /// Based on the minimum number of states required for a useful lazy DFA cache,
 /// this returns the minimum lazy state ID that must be representable.
 ///
-/// It's likely not plausible for this to impose constraints on 32-bit systems
-/// (or higher), but on 16-bit systems, the lazy state ID space is quite
-/// constrained and thus may be insufficient for bigger regexes.
+/// It's not likely for this to have any impact 32-bit systems (or higher), but
+/// on 16-bit systems, the lazy state ID space is quite constrained and thus
+/// may be insufficient if our MIN_STATES value is (for some reason) too high.
 fn minimum_lazy_state_id(
-    nfa: &thompson::NFA,
     classes: &ByteClasses,
 ) -> Result<LazyStateID, LazyStateIDError> {
     let stride = 1 << classes.stride2();
@@ -3774,37 +4262,71 @@ fn minimum_lazy_state_id(
 /// than what is required in practice. Computing the true minimum effectively
 /// requires determinization, which is probably too much work to do for a
 /// simple check like this.
+///
+/// One of the issues with this approach IMO is that it requires that this
+/// be in sync with the calculation above for computing how much heap memory
+/// the DFA cache uses. If we get it wrong, it's possible for example for the
+/// minimum to be smaller than the computed heap memory, and thus, it may be
+/// the case that we can't add the required minimum number of states. That in
+/// turn will make lazy DFA panic because we assume that we can add at least a
+/// minimum number of states.
+///
+/// Another approach would be to always allow the minimum number of states to
+/// be added to the lazy DFA cache, even if it exceeds the configured cache
+/// limit. This does mean that the limit isn't really a limit in all cases,
+/// which is unfortunate. But it does at least guarantee that the lazy DFA can
+/// always make progress, even if it is slow. (This approach is very similar to
+/// enabling the 'skip_cache_capacity_check' config knob, except it wouldn't
+/// rely on cache size calculation. Instead, it would just always permit a
+/// minimum number of states to be added.)
 fn minimum_cache_capacity(
     nfa: &thompson::NFA,
     classes: &ByteClasses,
     starts_for_each_pattern: bool,
 ) -> usize {
     const ID_SIZE: usize = size_of::<LazyStateID>();
-    let stride = 1 << classes.stride2();
+    const STATE_SIZE: usize = size_of::<State>();
 
-    let sparses = 2 * nfa.len() * NFAStateID::SIZE;
+    let stride = 1 << classes.stride2();
+    let states_len = nfa.states().len();
+    let sparses = 2 * states_len * NFAStateID::SIZE;
     let trans = MIN_STATES * stride * ID_SIZE;
 
-    let mut starts = Start::count() * ID_SIZE;
+    let mut starts = Start::len() * ID_SIZE;
     if starts_for_each_pattern {
-        starts += (Start::count() * nfa.pattern_len()) * ID_SIZE;
+        starts += (Start::len() * nfa.pattern_len()) * ID_SIZE;
     }
 
-    // Every `State` has three bytes for flags, 4 bytes (max) for the number
-    // of patterns, followed by 32-bit encodings of patterns and then delta
+    // The min number of states HAS to be at least 4: we have 3 sentinel states
+    // and then we need space for one more when we save a state after clearing
+    // the cache. We also need space for one more, otherwise we get stuck in a
+    // loop where we try to add a 5th state, which gets rejected, which clears
+    // the cache, which adds back a saved state (4th total state) which then
+    // tries to add the 5th state again.
+    assert!(MIN_STATES >= 5, "minimum number of states has to be at least 5");
+    // The minimum number of non-sentinel states. We consider this separately
+    // because sentinel states are much smaller in that they contain no NFA
+    // states. Given our aggressive calculation here, it's worth being more
+    // precise with the number of states we need.
+    let non_sentinel = MIN_STATES.checked_sub(SENTINEL_STATES).unwrap();
+
+    // Every `State` has 5 bytes for flags, 4 bytes (max) for the number of
+    // patterns, followed by 32-bit encodings of patterns and then delta
     // varint encodings of NFA state IDs. We use the worst case (which isn't
     // technically possible) of 5 bytes for each NFA state ID.
     //
     // HOWEVER, three of the states needed by a lazy DFA are just the sentinel
     // unknown, dead and quit states. Those states have a known size and it is
     // small.
-    assert!(MIN_STATES >= 3, "minimum number of states has to be at least 3");
     let dead_state_size = State::dead().memory_usage();
-    let max_state_size = 3 + 4 + (nfa.pattern_len() * 4) + (nfa.len() * 5);
-    let states = (3 * (size_of::<State>() + dead_state_size))
-        + ((MIN_STATES - 3) * (size_of::<State>() + max_state_size));
-    let states_to_sid = states + (MIN_STATES * ID_SIZE);
-    let stack = nfa.len() * NFAStateID::SIZE;
+    let max_state_size = 5 + 4 + (nfa.pattern_len() * 4) + (states_len * 5);
+    let states = (SENTINEL_STATES * (STATE_SIZE + dead_state_size))
+        + (non_sentinel * (STATE_SIZE + max_state_size));
+    // NOTE: We don't double count heap memory used by State for this map since
+    // we use reference counting to avoid doubling memory usage. (This tends to
+    // be where most memory is allocated in the cache.)
+    let states_to_sid = (MIN_STATES * STATE_SIZE) + (MIN_STATES * ID_SIZE);
+    let stack = states_len * NFAStateID::SIZE;
     let scratch_state_builder = max_state_size;
 
     trans
@@ -3815,3 +4337,45 @@ fn minimum_cache_capacity(
         + stack
         + scratch_state_builder
 }
+
+#[cfg(all(test, feature = "syntax"))]
+mod tests {
+    use super::*;
+
+    // Tests that we handle heuristic Unicode word boundary support in reverse
+    // DFAs in the specific case of contextual searches.
+    //
+    // I wrote this test when I discovered a bug in how heuristic word
+    // boundaries were handled. Namely, that the starting state selection
+    // didn't consider the DFA's quit byte set when looking at the byte
+    // immediately before the start of the search (or immediately after the
+    // end of the search in the case of a reverse search). As a result, it was
+    // possible for '\bfoo\b' to match 'β123' because the trailing \xB2 byte
+    // in the 'β' codepoint would be treated as a non-word character. But of
+    // course, this search should trigger the DFA to quit, since there is a
+    // non-ASCII byte in consideration.
+    //
+    // Thus, I fixed 'start_state_{forward,reverse}' to check the quit byte set
+    // if it wasn't empty. The forward case is tested in the doc test for the
+    // Config::unicode_word_boundary API. We test the reverse case here, which
+    // is sufficiently niche that it doesn't really belong in a doc test.
+    #[test]
+    fn heuristic_unicode_reverse() {
+        let dfa = DFA::builder()
+            .configure(DFA::config().unicode_word_boundary(true))
+            .thompson(thompson::Config::new().reverse(true))
+            .build(r"\b[0-9]+\b")
+            .unwrap();
+        let mut cache = dfa.create_cache();
+
+        let input = Input::new("β123").range(2..);
+        let expected = MatchError::quit(0xB2, 1);
+        let got = dfa.try_search_rev(&mut cache, &input);
+        assert_eq!(Err(expected), got);
+
+        let input = Input::new("123β").range(..3);
+        let expected = MatchError::quit(0xCE, 3);
+        let got = dfa.try_search_rev(&mut cache, &input);
+        assert_eq!(Err(expected), got);
+    }
+}