From c23a457e72abe608715ac76f076f47dc42af07a5 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Thu, 30 May 2024 20:31:44 +0200
Subject: Merging upstream version 1.74.1+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 vendor/regex-automata/src/dfa/sparse.rs | 1279 ++++++++++++++++++++-----------
 1 file changed, 826 insertions(+), 453 deletions(-)

(limited to 'vendor/regex-automata/src/dfa/sparse.rs')
diff --git a/vendor/regex-automata/src/dfa/sparse.rs b/vendor/regex-automata/src/dfa/sparse.rs
index 346606987..5d8ec2340 100644
--- a/vendor/regex-automata/src/dfa/sparse.rs
+++ b/vendor/regex-automata/src/dfa/sparse.rs
@@ -14,7 +14,7 @@ example, this configures a sparse DFA to do an overlapping search:
 ```
 use regex_automata::{
     dfa::{Automaton, OverlappingState, dense},
-    HalfMatch, MatchKind,
+    HalfMatch, Input, MatchKind,
 };
 
 let dense_re = dense::Builder::new()
@@ -23,25 +23,21 @@ let dense_re = dense::Builder::new()
 let sparse_re = dense_re.to_sparse()?;
 
 // Setup our haystack and initial start state.
-let haystack = b"Samwise";
+let input = Input::new("Samwise");
 let mut state = OverlappingState::start();
 
 // First, 'Sam' will match.
-let end1 = sparse_re.find_overlapping_fwd_at(
-    None, None, haystack, 0, haystack.len(), &mut state,
-)?;
-assert_eq!(end1, Some(HalfMatch::must(0, 3)));
+sparse_re.try_search_overlapping_fwd(&input, &mut state)?;
+assert_eq!(Some(HalfMatch::must(0, 3)), state.get_match());
 
 // And now 'Samwise' will match.
-let end2 = sparse_re.find_overlapping_fwd_at(
-    None, None, haystack, 3, haystack.len(), &mut state,
-)?;
-assert_eq!(end2, Some(HalfMatch::must(0, 7)));
+sparse_re.try_search_overlapping_fwd(&input, &mut state)?;
+assert_eq!(Some(HalfMatch::must(0, 7)), state.get_match());
 # Ok::<(), Box<dyn std::error::Error>>(())
 ```
 */
 
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-build")]
 use core::iter;
 use core::{
     convert::{TryFrom, TryInto},
@@ -49,23 +45,27 @@ use core::{
     mem::size_of,
 };
 
-#[cfg(feature = "alloc")]
-use alloc::{collections::BTreeSet, vec, vec::Vec};
+#[cfg(feature = "dfa-build")]
+use alloc::{vec, vec::Vec};
 
-#[cfg(feature = "alloc")]
-use crate::dfa::{dense, error::Error};
+#[cfg(feature = "dfa-build")]
+use crate::dfa::dense::{self, BuildError};
 use crate::{
     dfa::{
         automaton::{fmt_state_indicator, Automaton},
+        dense::Flags,
         special::Special,
-        DEAD,
+        StartKind, DEAD,
     },
     util::{
-        alphabet::ByteClasses,
-        bytes::{self, DeserializeError, Endian, SerializeError},
-        id::{PatternID, StateID},
-        start::Start,
-        DebugByte,
+        alphabet::{ByteClasses, ByteSet},
+        escape::DebugByte,
+        int::{Pointer, Usize, U16, U32},
+        prefilter::Prefilter,
+        primitives::{PatternID, StateID},
+        search::{Anchored, Input, MatchError},
+        start::{Start, StartByteMap},
+        wire::{self, DeserializeError, Endian, SerializeError},
     },
 };
 
@@ -107,14 +107,11 @@ const VERSION: u32 = 2;
 /// for searching. For example:
 ///
 /// ```
-/// use regex_automata::{
-///     dfa::{Automaton, sparse::DFA},
-///     HalfMatch,
-/// };
+/// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input};
 ///
 /// let dfa = DFA::new("foo[0-9]+")?;
-/// let expected = HalfMatch::must(0, 8);
-/// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
+/// let expected = Some(HalfMatch::must(0, 8));
+/// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?);
 /// # Ok::<(), Box<dyn std::error::Error>>(())
 /// ```
 #[derive(Clone)]
@@ -130,12 +127,15 @@ pub struct DFA<T> {
     //
     // That is, a lot of the complexity is pushed down into how each state
     // itself is represented.
-    trans: Transitions<T>,
-    starts: StartTable<T>,
+    tt: Transitions<T>,
+    st: StartTable<T>,
     special: Special,
+    pre: Option<Prefilter>,
+    quitset: ByteSet,
+    flags: Flags,
 }
 
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-build")]
 impl DFA<Vec<u8>> {
     /// Parse the given regular expression using a default configuration and
     /// return the corresponding sparse DFA.
@@ -149,18 +149,16 @@ impl DFA<Vec<u8>> {
     /// # Example
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse}, HalfMatch, Input};
     ///
     /// let dfa = sparse::DFA::new("foo[0-9]+bar")?;
     ///
-    /// let expected = HalfMatch::must(0, 11);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345bar")?);
+    /// let expected = Some(HalfMatch::must(0, 11));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345bar"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
-    pub fn new(pattern: &str) -> Result<DFA<Vec<u8>>, Error> {
+    #[cfg(feature = "syntax")]
+    pub fn new(pattern: &str) -> Result<DFA<Vec<u8>>, BuildError> {
         dense::Builder::new()
             .build(pattern)
             .and_then(|dense| dense.to_sparse())
@@ -178,26 +176,24 @@ impl DFA<Vec<u8>> {
     /// # Example
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse}, HalfMatch, Input};
     ///
     /// let dfa = sparse::DFA::new_many(&["[0-9]+", "[a-z]+"])?;
-    /// let expected = HalfMatch::must(1, 3);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345bar")?);
+    /// let expected = Some(HalfMatch::must(1, 3));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345bar"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
+    #[cfg(feature = "syntax")]
     pub fn new_many<P: AsRef<str>>(
         patterns: &[P],
-    ) -> Result<DFA<Vec<u8>>, Error> {
+    ) -> Result<DFA<Vec<u8>>, BuildError> {
         dense::Builder::new()
             .build_many(patterns)
             .and_then(|dense| dense.to_sparse())
     }
 }
 
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-build")]
 impl DFA<Vec<u8>> {
     /// Create a new DFA that matches every input.
     ///
@@ -206,17 +202,17 @@ impl DFA<Vec<u8>> {
     /// ```
     /// use regex_automata::{
     ///     dfa::{Automaton, sparse},
-    ///     HalfMatch,
+    ///     HalfMatch, Input,
     /// };
     ///
     /// let dfa = sparse::DFA::always_match()?;
     ///
-    /// let expected = HalfMatch::must(0, 0);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"")?);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo")?);
+    /// let expected = Some(HalfMatch::must(0, 0));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new(""))?);
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
-    pub fn always_match() -> Result<DFA<Vec<u8>>, Error> {
+    pub fn always_match() -> Result<DFA<Vec<u8>>, BuildError> {
         dense::DFA::always_match()?.to_sparse()
     }
 
@@ -225,21 +221,21 @@ impl DFA<Vec<u8>> {
     /// # Example
     ///
     /// ```
-    /// use regex_automata::dfa::{Automaton, sparse};
+    /// use regex_automata::{dfa::{Automaton, sparse}, Input};
     ///
     /// let dfa = sparse::DFA::never_match()?;
-    /// assert_eq!(None, dfa.find_leftmost_fwd(b"")?);
-    /// assert_eq!(None, dfa.find_leftmost_fwd(b"foo")?);
+    /// assert_eq!(None, dfa.try_search_fwd(&Input::new(""))?);
+    /// assert_eq!(None, dfa.try_search_fwd(&Input::new("foo"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
-    pub fn never_match() -> Result<DFA<Vec<u8>>, Error> {
+    pub fn never_match() -> Result<DFA<Vec<u8>>, BuildError> {
         dense::DFA::never_match()?.to_sparse()
     }
 
     /// The implementation for constructing a sparse DFA from a dense DFA.
     pub(crate) fn from_dense<T: AsRef<[u32]>>(
         dfa: &dense::DFA<T>,
-    ) -> Result<DFA<Vec<u8>>, Error> {
+    ) -> Result<DFA<Vec<u8>>, BuildError> {
         // In order to build the transition table, we need to be able to write
         // state identifiers for each of the "next" transitions in each state.
         // Our state identifiers correspond to the byte offset in the
@@ -249,35 +245,35 @@ impl DFA<Vec<u8>> {
         // of the transition table happens in two passes.
         //
         // In the first pass, we fill out the shell of each state, which
-        // includes the transition count, the input byte ranges and zero-filled
-        // space for the transitions and accelerators, if present. In this
-        // first pass, we also build up a map from the state identifier index
-        // of the dense DFA to the state identifier in this sparse DFA.
+        // includes the transition length, the input byte ranges and
+        // zero-filled space for the transitions and accelerators, if present.
+        // In this first pass, we also build up a map from the state identifier
+        // index of the dense DFA to the state identifier in this sparse DFA.
         //
         // In the second pass, we fill in the transitions based on the map
         // built in the first pass.
 
         // The capacity given here reflects a minimum. (Well, the true minimum
         // is likely even bigger, but hopefully this saves a few reallocs.)
-        let mut sparse = Vec::with_capacity(StateID::SIZE * dfa.state_count());
+        let mut sparse = Vec::with_capacity(StateID::SIZE * dfa.state_len());
         // This maps state indices from the dense DFA to StateIDs in the sparse
         // DFA. We build out this map on the first pass, and then use it in the
         // second pass to back-fill our transitions.
-        let mut remap: Vec<StateID> = vec![DEAD; dfa.state_count()];
+        let mut remap: Vec<StateID> = vec![DEAD; dfa.state_len()];
         for state in dfa.states() {
             let pos = sparse.len();
 
-            remap[dfa.to_index(state.id())] =
-                StateID::new(pos).map_err(|_| Error::too_many_states())?;
-            // zero-filled space for the transition count
+            remap[dfa.to_index(state.id())] = StateID::new(pos)
+                .map_err(|_| BuildError::too_many_states())?;
+            // zero-filled space for the transition length
             sparse.push(0);
             sparse.push(0);
 
-            let mut transition_count = 0;
+            let mut transition_len = 0;
             for (unit1, unit2, _) in state.sparse_transitions() {
                 match (unit1.as_u8(), unit2.as_u8()) {
                     (Some(b1), Some(b2)) => {
-                        transition_count += 1;
+                        transition_len += 1;
                         sparse.push(b1);
                         sparse.push(b2);
                     }
@@ -298,40 +294,40 @@ impl DFA<Vec<u8>> {
             // N.B. The loop above is not guaranteed to yield the EOI
             // transition, since it may point to a DEAD state. By putting
             // it here, we always write the EOI transition, and thus
-            // guarantee that our transition count is >0. Why do we always
+            // guarantee that our transition length is >0. Why do we always
             // need the EOI transition? Because in order to implement
             // Automaton::next_eoi_state, this lets us just ask for the last
             // transition. There are probably other/better ways to do this.
-            transition_count += 1;
+            transition_len += 1;
             sparse.push(0);
             sparse.push(0);
 
-            // Check some assumptions about transition count.
+            // Check some assumptions about transition length.
             assert_ne!(
-                transition_count, 0,
-                "transition count should be non-zero",
+                transition_len, 0,
+                "transition length should be non-zero",
             );
             assert!(
-                transition_count <= 257,
-                "expected transition count {} to be <= 257",
-                transition_count,
+                transition_len <= 257,
+                "expected transition length {} to be <= 257",
+                transition_len,
             );
 
-            // Fill in the transition count.
-            // Since transition count is always <= 257, we use the most
+            // Fill in the transition length.
+            // Since transition length is always <= 257, we use the most
             // significant bit to indicate whether this is a match state or
             // not.
             let ntrans = if dfa.is_match_state(state.id()) {
-                transition_count | (1 << 15)
+                transition_len | (1 << 15)
             } else {
-                transition_count
+                transition_len
             };
-            bytes::NE::write_u16(ntrans, &mut sparse[pos..]);
+            wire::NE::write_u16(ntrans, &mut sparse[pos..]);
 
             // zero-fill the actual transitions.
-            // Unwraps are OK since transition_count <= 257 and our minimum
+            // Unwraps are OK since transition_length <= 257 and our minimum
             // support usize size is 16-bits.
-            let zeros = usize::try_from(transition_count)
+            let zeros = usize::try_from(transition_len)
                 .unwrap()
                 .checked_mul(StateID::SIZE)
                 .unwrap();
@@ -355,18 +351,18 @@ impl DFA<Vec<u8>> {
                 sparse.extend(iter::repeat(0).take(zeros));
 
                 // Now write the length prefix.
-                bytes::NE::write_u32(
+                wire::NE::write_u32(
                     // Will never fail since u32::MAX is invalid pattern ID.
                     // Thus, the number of pattern IDs is representable by a
                     // u32.
-                    plen.try_into().expect("pattern ID count fits in u32"),
+                    plen.try_into().expect("pattern ID length fits in u32"),
                     &mut sparse[pos..],
                 );
                 pos += size_of::<u32>();
 
                 // Now write the pattern IDs.
                 for &pid in dfa.pattern_id_slice(state.id()) {
-                    pos += bytes::write_pattern_id::<bytes::NE>(
+                    pos += wire::write_pattern_id::<wire::NE>(
                         pid,
                         &mut sparse[pos..],
                     );
@@ -384,28 +380,31 @@ impl DFA<Vec<u8>> {
         }
 
         let mut new = DFA {
-            trans: Transitions {
+            tt: Transitions {
                 sparse,
                 classes: dfa.byte_classes().clone(),
-                count: dfa.state_count(),
-                patterns: dfa.pattern_count(),
+                state_len: dfa.state_len(),
+                pattern_len: dfa.pattern_len(),
             },
-            starts: StartTable::from_dense_dfa(dfa, &remap)?,
+            st: StartTable::from_dense_dfa(dfa, &remap)?,
             special: dfa.special().remap(|id| remap[dfa.to_index(id)]),
+            pre: dfa.get_prefilter().map(|p| p.clone()),
+            quitset: dfa.quitset().clone(),
+            flags: dfa.flags().clone(),
         };
         // And here's our second pass. Iterate over all of the dense states
         // again, and update the transitions in each of the states in the
         // sparse DFA.
         for old_state in dfa.states() {
             let new_id = remap[dfa.to_index(old_state.id())];
-            let mut new_state = new.trans.state_mut(new_id);
+            let mut new_state = new.tt.state_mut(new_id);
             let sparse = old_state.sparse_transitions();
             for (i, (_, _, next)) in sparse.enumerate() {
                 let next = remap[dfa.to_index(next)];
                 new_state.set_next_at(i, next);
             }
         }
-        trace!(
+        debug!(
             "created sparse DFA, memory usage: {} (dense memory usage: {})",
             new.memory_usage(),
             dfa.memory_usage(),
@@ -419,9 +418,12 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// DFA returned always uses `&[u8]` for its transitions.
     pub fn as_ref<'a>(&'a self) -> DFA<&'a [u8]> {
         DFA {
-            trans: self.trans.as_ref(),
-            starts: self.starts.as_ref(),
+            tt: self.tt.as_ref(),
+            st: self.st.as_ref(),
             special: self.special,
+            pre: self.pre.clone(),
+            quitset: self.quitset,
+            flags: self.flags,
         }
     }
 
@@ -431,36 +433,67 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// Effectively, this returns a sparse DFA whose transitions live on the
     /// heap.
     #[cfg(feature = "alloc")]
-    pub fn to_owned(&self) -> DFA<Vec<u8>> {
+    pub fn to_owned(&self) -> DFA<alloc::vec::Vec<u8>> {
         DFA {
-            trans: self.trans.to_owned(),
-            starts: self.starts.to_owned(),
+            tt: self.tt.to_owned(),
+            st: self.st.to_owned(),
             special: self.special,
+            pre: self.pre.clone(),
+            quitset: self.quitset,
+            flags: self.flags,
         }
     }
 
-    /// Returns the memory usage, in bytes, of this DFA.
+    /// Returns the starting state configuration for this DFA.
     ///
-    /// The memory usage is computed based on the number of bytes used to
-    /// represent this DFA.
-    ///
-    /// This does **not** include the stack size used up by this DFA. To
-    /// compute that, use `std::mem::size_of::<sparse::DFA>()`.
-    pub fn memory_usage(&self) -> usize {
-        self.trans.memory_usage() + self.starts.memory_usage()
+    /// The default is [`StartKind::Both`], which means the DFA supports both
+    /// unanchored and anchored searches. However, this can generally lead to
+    /// bigger DFAs. Therefore, a DFA might be compiled with support for just
+    /// unanchored or anchored searches. In that case, running a search with
+    /// an unsupported configuration will panic.
+    pub fn start_kind(&self) -> StartKind {
+        self.st.kind
     }
 
     /// Returns true only if this DFA has starting states for each pattern.
     ///
     /// When a DFA has starting states for each pattern, then a search with the
     /// DFA can be configured to only look for anchored matches of a specific
-    /// pattern. Specifically, APIs like [`Automaton::find_earliest_fwd_at`]
-    /// can accept a non-None `pattern_id` if and only if this method returns
-    /// true. Otherwise, calling `find_earliest_fwd_at` will panic.
+    /// pattern. Specifically, APIs like [`Automaton::try_search_fwd`] can
+    /// accept a [`Anchored::Pattern`] if and only if this method returns true.
+    /// Otherwise, an error will be returned.
     ///
     /// Note that if the DFA is empty, this always returns false.
-    pub fn has_starts_for_each_pattern(&self) -> bool {
-        self.starts.patterns > 0
+    pub fn starts_for_each_pattern(&self) -> bool {
+        self.st.pattern_len.is_some()
+    }
+
+    /// Returns the equivalence classes that make up the alphabet for this DFA.
+    ///
+    /// Unless [`dense::Config::byte_classes`] was disabled, it is possible
+    /// that multiple distinct bytes are grouped into the same equivalence
+    /// class if it is impossible for them to discriminate between a match and
+    /// a non-match. This has the effect of reducing the overall alphabet size
+    /// and in turn potentially substantially reducing the size of the DFA's
+    /// transition table.
+    ///
+    /// The downside of using equivalence classes like this is that every state
+    /// transition will automatically use this map to convert an arbitrary
+    /// byte to its corresponding equivalence class. In practice this has a
+    /// negligible impact on performance.
+    pub fn byte_classes(&self) -> &ByteClasses {
+        &self.tt.classes
+    }
+
+    /// Returns the memory usage, in bytes, of this DFA.
+    ///
+    /// The memory usage is computed based on the number of bytes used to
+    /// represent this DFA.
+    ///
+    /// This does **not** include the stack size used up by this DFA. To
+    /// compute that, use `std::mem::size_of::<sparse::DFA>()`.
+    pub fn memory_usage(&self) -> usize {
+        self.tt.memory_usage() + self.st.memory_usage()
     }
 }
 
@@ -488,10 +521,7 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// This example shows how to serialize and deserialize a DFA:
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse::DFA},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input};
     ///
     /// // Compile our original DFA.
     /// let original_dfa = DFA::new("foo[0-9]+")?;
@@ -503,13 +533,13 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// // ignore it.
     /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf)?.0;
     ///
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
+    /// let expected = Some(HalfMatch::must(0, 8));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
-    #[cfg(feature = "alloc")]
+    #[cfg(feature = "dfa-build")]
     pub fn to_bytes_little_endian(&self) -> Vec<u8> {
-        self.to_bytes::<bytes::LE>()
+        self.to_bytes::<wire::LE>()
     }
 
     /// Serialize this DFA as raw bytes to a `Vec<u8>` in big endian
@@ -533,10 +563,7 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// This example shows how to serialize and deserialize a DFA:
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse::DFA},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input};
     ///
     /// // Compile our original DFA.
     /// let original_dfa = DFA::new("foo[0-9]+")?;
@@ -548,13 +575,13 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// // ignore it.
     /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf)?.0;
     ///
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
+    /// let expected = Some(HalfMatch::must(0, 8));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
-    #[cfg(feature = "alloc")]
+    #[cfg(feature = "dfa-build")]
     pub fn to_bytes_big_endian(&self) -> Vec<u8> {
-        self.to_bytes::<bytes::BE>()
+        self.to_bytes::<wire::BE>()
     }
 
     /// Serialize this DFA as raw bytes to a `Vec<u8>` in native endian
@@ -587,10 +614,7 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// This example shows how to serialize and deserialize a DFA:
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse::DFA},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input};
     ///
     /// // Compile our original DFA.
     /// let original_dfa = DFA::new("foo[0-9]+")?;
@@ -600,18 +624,18 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// // ignore it.
     /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf)?.0;
     ///
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
+    /// let expected = Some(HalfMatch::must(0, 8));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
-    #[cfg(feature = "alloc")]
+    #[cfg(feature = "dfa-build")]
     pub fn to_bytes_native_endian(&self) -> Vec<u8> {
-        self.to_bytes::<bytes::NE>()
+        self.to_bytes::<wire::NE>()
     }
 
     /// The implementation of the public `to_bytes` serialization methods,
     /// which is generic over endianness.
-    #[cfg(feature = "alloc")]
+    #[cfg(feature = "dfa-build")]
     fn to_bytes<E: Endian>(&self) -> Vec<u8> {
         let mut buf = vec![0; self.write_to_len()];
         // This should always succeed since the only possible serialization
@@ -645,10 +669,7 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// dynamic memory allocation.
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse::DFA},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input};
     ///
     /// // Compile our original DFA.
     /// let original_dfa = DFA::new("foo[0-9]+")?;
@@ -660,15 +681,15 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// let written = original_dfa.write_to_native_endian(&mut buf)?;
     /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0;
     ///
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
+    /// let expected = Some(HalfMatch::must(0, 8));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn write_to_little_endian(
         &self,
         dst: &mut [u8],
     ) -> Result<usize, SerializeError> {
-        self.write_to::<bytes::LE>(dst)
+        self.write_to::<wire::LE>(dst)
     }
 
     /// Serialize this DFA as raw bytes to the given slice, in big endian
@@ -695,10 +716,7 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// dynamic memory allocation.
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse::DFA},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input};
     ///
     /// // Compile our original DFA.
     /// let original_dfa = DFA::new("foo[0-9]+")?;
@@ -710,15 +728,15 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// let written = original_dfa.write_to_native_endian(&mut buf)?;
     /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0;
     ///
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
+    /// let expected = Some(HalfMatch::must(0, 8));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn write_to_big_endian(
         &self,
         dst: &mut [u8],
     ) -> Result<usize, SerializeError> {
-        self.write_to::<bytes::BE>(dst)
+        self.write_to::<wire::BE>(dst)
     }
 
     /// Serialize this DFA as raw bytes to the given slice, in native endian
@@ -754,10 +772,7 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// dynamic memory allocation.
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse::DFA},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input};
     ///
     /// // Compile our original DFA.
     /// let original_dfa = DFA::new("foo[0-9]+")?;
@@ -767,15 +782,15 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// let written = original_dfa.write_to_native_endian(&mut buf)?;
     /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0;
     ///
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
+    /// let expected = Some(HalfMatch::must(0, 8));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn write_to_native_endian(
         &self,
         dst: &mut [u8],
     ) -> Result<usize, SerializeError> {
-        self.write_to::<bytes::NE>(dst)
+        self.write_to::<wire::NE>(dst)
     }
 
     /// The implementation of the public `write_to` serialization methods,
@@ -785,17 +800,19 @@ impl<T: AsRef<[u8]>> DFA<T> {
         dst: &mut [u8],
     ) -> Result<usize, SerializeError> {
         let mut nw = 0;
-        nw += bytes::write_label(LABEL, &mut dst[nw..])?;
-        nw += bytes::write_endianness_check::<E>(&mut dst[nw..])?;
-        nw += bytes::write_version::<E>(VERSION, &mut dst[nw..])?;
+        nw += wire::write_label(LABEL, &mut dst[nw..])?;
+        nw += wire::write_endianness_check::<E>(&mut dst[nw..])?;
+        nw += wire::write_version::<E>(VERSION, &mut dst[nw..])?;
         nw += {
             // Currently unused, intended for future flexibility
             E::write_u32(0, &mut dst[nw..]);
             size_of::<u32>()
         };
-        nw += self.trans.write_to::<E>(&mut dst[nw..])?;
-        nw += self.starts.write_to::<E>(&mut dst[nw..])?;
+        nw += self.flags.write_to::<E>(&mut dst[nw..])?;
+        nw += self.tt.write_to::<E>(&mut dst[nw..])?;
+        nw += self.st.write_to::<E>(&mut dst[nw..])?;
         nw += self.special.write_to::<E>(&mut dst[nw..])?;
+        nw += self.quitset.write_to::<E>(&mut dst[nw..])?;
         Ok(nw)
     }
 
@@ -817,10 +834,7 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// a sparse DFA.
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse::DFA},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input};
     ///
     /// // Compile our original DFA.
     /// let original_dfa = DFA::new("foo[0-9]+")?;
@@ -829,18 +843,20 @@ impl<T: AsRef<[u8]>> DFA<T> {
     /// let written = original_dfa.write_to_native_endian(&mut buf)?;
     /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0;
     ///
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
+    /// let expected = Some(HalfMatch::must(0, 8));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn write_to_len(&self) -> usize {
-        bytes::write_label_len(LABEL)
-        + bytes::write_endianness_check_len()
-        + bytes::write_version_len()
+        wire::write_label_len(LABEL)
+        + wire::write_endianness_check_len()
+        + wire::write_version_len()
         + size_of::<u32>() // unused, intended for future flexibility
-        + self.trans.write_to_len()
-        + self.starts.write_to_len()
+        + self.flags.write_to_len()
+        + self.tt.write_to_len()
+        + self.st.write_to_len()
         + self.special.write_to_len()
+        + self.quitset.write_to_len()
     }
 }
 
@@ -901,17 +917,14 @@ impl<'a> DFA<&'a [u8]> {
     /// and then use it for searching.
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse::DFA},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input};
     ///
     /// let initial = DFA::new("foo[0-9]+")?;
     /// let bytes = initial.to_bytes_native_endian();
     /// let dfa: DFA<&[u8]> = DFA::from_bytes(&bytes)?.0;
     ///
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
+    /// let expected = Some(HalfMatch::must(0, 8));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     ///
@@ -927,7 +940,7 @@ impl<'a> DFA<&'a [u8]> {
     /// a file:
     ///
     /// ```no_run
-    /// use regex_automata::dfa::{Automaton, sparse::DFA};
+    /// use regex_automata::dfa::sparse::DFA;
     ///
     /// let dfa = DFA::new("foo[0-9]+")?;
     ///
@@ -943,23 +956,22 @@ impl<'a> DFA<&'a [u8]> {
     ///
     /// And now the second part is embedding the DFA into the compiled program
     /// and deserializing it at runtime on first use. We use conditional
-    /// compilation to choose the correct endianness. As mentioned above, we
-    /// do not need to employ any special tricks to ensure a proper alignment,
-    /// since a sparse DFA has no alignment requirements.
+    /// compilation to choose the correct endianness. We do not need to employ
+    /// any special tricks to ensure a proper alignment, since a sparse DFA has
+    /// no alignment requirements.
     ///
     /// ```no_run
     /// use regex_automata::{
-    ///     dfa::{Automaton, sparse},
-    ///     HalfMatch,
+    ///     dfa::{Automaton, sparse::DFA},
+    ///     util::lazy::Lazy,
+    ///     HalfMatch, Input,
     /// };
     ///
-    /// type DFA = sparse::DFA<&'static [u8]>;
-    ///
-    /// fn get_foo() -> &'static DFA {
-    ///     use std::cell::Cell;
-    ///     use std::mem::MaybeUninit;
-    ///     use std::sync::Once;
-    ///
+    /// // This crate provides its own "lazy" type, kind of like
+    /// // lazy_static! or once_cell::sync::Lazy. But it works in no-alloc
+    /// // no-std environments and let's us write this using completely
+    /// // safe code.
+    /// static RE: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
     ///     # const _: &str = stringify! {
     ///     #[cfg(target_endian = "big")]
     ///     static BYTES: &[u8] = include_bytes!("foo.bigendian.dfa");
@@ -968,33 +980,13 @@ impl<'a> DFA<&'a [u8]> {
     ///     # };
     ///     # static BYTES: &[u8] = b"";
     ///
-    ///     struct Lazy(Cell<MaybeUninit<DFA>>);
-    ///     // SAFETY: This is safe because DFA impls Sync.
-    ///     unsafe impl Sync for Lazy {}
-    ///
-    ///     static INIT: Once = Once::new();
-    ///     static DFA: Lazy = Lazy(Cell::new(MaybeUninit::uninit()));
-    ///
-    ///     INIT.call_once(|| {
-    ///         let (dfa, _) = DFA::from_bytes(BYTES)
-    ///             .expect("serialized DFA should be valid");
-    ///         // SAFETY: This is guaranteed to only execute once, and all
-    ///         // we do with the pointer is write the DFA to it.
-    ///         unsafe {
-    ///             (*DFA.0.as_ptr()).as_mut_ptr().write(dfa);
-    ///         }
-    ///     });
-    ///     // SAFETY: DFA is guaranteed to by initialized via INIT and is
-    ///     // stored in static memory.
-    ///     unsafe {
-    ///         let dfa = (*DFA.0.as_ptr()).as_ptr();
-    ///         std::mem::transmute::<*const DFA, &'static DFA>(dfa)
-    ///     }
-    /// }
-    ///
-    /// let dfa = get_foo();
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(Ok(Some(expected)), dfa.find_leftmost_fwd(b"foo12345"));
+    ///     let (dfa, _) = DFA::from_bytes(BYTES)
+    ///         .expect("serialized DFA should be valid");
+    ///     dfa
+    /// });
+    ///
+    /// let expected = Ok(Some(HalfMatch::must(0, 8)));
+    /// assert_eq!(expected, RE.try_search_fwd(&Input::new("foo12345")));
     /// ```
     ///
     /// Alternatively, consider using
@@ -1009,8 +1001,8 @@ impl<'a> DFA<&'a [u8]> {
         // (by trying to decode every state) and start state ID list below. If
         // either validation fails, then we return an error.
         let (dfa, nread) = unsafe { DFA::from_bytes_unchecked(slice)? };
-        dfa.trans.validate()?;
-        dfa.starts.validate(&dfa.trans)?;
+        dfa.tt.validate(&dfa.special)?;
+        dfa.st.validate(&dfa.special, &dfa.tt)?;
         // N.B. dfa.special doesn't have a way to do unchecked deserialization,
         // so it has already been validated.
         Ok((dfa, nread))
@@ -1029,23 +1021,20 @@ impl<'a> DFA<&'a [u8]> {
     ///
     /// # Safety
     ///
-    /// This routine is unsafe because it permits callers to provide
+    /// This routine is not safe because it permits callers to provide
     /// arbitrary transitions with possibly incorrect state identifiers. While
     /// the various serialization routines will never return an incorrect
-    /// DFA, there is no guarantee that the bytes provided here
-    /// are correct. While `from_bytes_unchecked` will still do several forms
-    /// of basic validation, this routine does not check that the transitions
-    /// themselves are correct. Given an incorrect transition table, it is
-    /// possible for the search routines to access out-of-bounds memory because
-    /// of explicit bounds check elision.
+    /// DFA, there is no guarantee that the bytes provided here are correct.
+    /// While `from_bytes_unchecked` will still do several forms of basic
+    /// validation, this routine does not check that the transitions themselves
+    /// are correct. Given an incorrect transition table, it is possible for
+    /// the search routines to access out-of-bounds memory because of explicit
+    /// bounds check elision.
     ///
     /// # Example
     ///
     /// ```
-    /// use regex_automata::{
-    ///     dfa::{Automaton, sparse::DFA},
-    ///     HalfMatch,
-    /// };
+    /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input};
     ///
     /// let initial = DFA::new("foo[0-9]+")?;
     /// let bytes = initial.to_bytes_native_endian();
@@ -1053,8 +1042,8 @@ impl<'a> DFA<&'a [u8]> {
     /// // directly from a compatible serialization routine.
     /// let dfa: DFA<&[u8]> = unsafe { DFA::from_bytes_unchecked(&bytes)?.0 };
     ///
-    /// let expected = HalfMatch::must(0, 8);
-    /// assert_eq!(Some(expected), dfa.find_leftmost_fwd(b"foo12345")?);
+    /// let expected = Some(HalfMatch::must(0, 8));
+    /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub unsafe fn from_bytes_unchecked(
@@ -1062,56 +1051,70 @@ impl<'a> DFA<&'a [u8]> {
     ) -> Result<(DFA<&'a [u8]>, usize), DeserializeError> {
         let mut nr = 0;
 
-        nr += bytes::read_label(&slice[nr..], LABEL)?;
-        nr += bytes::read_endianness_check(&slice[nr..])?;
-        nr += bytes::read_version(&slice[nr..], VERSION)?;
+        nr += wire::read_label(&slice[nr..], LABEL)?;
+        nr += wire::read_endianness_check(&slice[nr..])?;
+        nr += wire::read_version(&slice[nr..], VERSION)?;
 
-        let _unused = bytes::try_read_u32(&slice[nr..], "unused space")?;
+        let _unused = wire::try_read_u32(&slice[nr..], "unused space")?;
         nr += size_of::<u32>();
 
-        let (trans, nread) = Transitions::from_bytes_unchecked(&slice[nr..])?;
+        let (flags, nread) = Flags::from_bytes(&slice[nr..])?;
+        nr += nread;
+
+        let (tt, nread) = Transitions::from_bytes_unchecked(&slice[nr..])?;
         nr += nread;
 
-        let (starts, nread) = StartTable::from_bytes_unchecked(&slice[nr..])?;
+        let (st, nread) = StartTable::from_bytes_unchecked(&slice[nr..])?;
         nr += nread;
 
         let (special, nread) = Special::from_bytes(&slice[nr..])?;
         nr += nread;
-        if special.max.as_usize() >= trans.sparse().len() {
+        if special.max.as_usize() >= tt.sparse().len() {
             return Err(DeserializeError::generic(
                 "max should not be greater than or equal to sparse bytes",
             ));
         }
 
-        Ok((DFA { trans, starts, special }, nr))
+        let (quitset, nread) = ByteSet::from_bytes(&slice[nr..])?;
+        nr += nread;
+
+        // Prefilters don't support serialization, so they're always absent.
+        let pre = None;
+        Ok((DFA { tt, st, special, pre, quitset, flags }, nr))
     }
 }
 
 impl<T: AsRef<[u8]>> fmt::Debug for DFA<T> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         writeln!(f, "sparse::DFA(")?;
-        for state in self.trans.states() {
+        for state in self.tt.states() {
             fmt_state_indicator(f, self, state.id())?;
-            writeln!(f, "{:06?}: {:?}", state.id(), state)?;
+            writeln!(f, "{:06?}: {:?}", state.id().as_usize(), state)?;
         }
         writeln!(f, "")?;
-        for (i, (start_id, sty, pid)) in self.starts.iter().enumerate() {
-            if i % self.starts.stride == 0 {
-                match pid {
-                    None => writeln!(f, "START-GROUP(ALL)")?,
-                    Some(pid) => {
-                        writeln!(f, "START_GROUP(pattern: {:?})", pid)?
-                    }
+        for (i, (start_id, anchored, sty)) in self.st.iter().enumerate() {
+            if i % self.st.stride == 0 {
+                match anchored {
+                    Anchored::No => writeln!(f, "START-GROUP(unanchored)")?,
+                    Anchored::Yes => writeln!(f, "START-GROUP(anchored)")?,
+                    Anchored::Pattern(pid) => writeln!(
+                        f,
+                        "START_GROUP(pattern: {:?})",
+                        pid.as_usize()
+                    )?,
                 }
             }
             writeln!(f, "  {:?} => {:06?}", sty, start_id.as_usize())?;
         }
-        writeln!(f, "state count: {:?}", self.trans.count)?;
+        writeln!(f, "state length: {:?}", self.tt.state_len)?;
+        writeln!(f, "pattern length: {:?}", self.pattern_len())?;
+        writeln!(f, "flags: {:?}", self.flags)?;
         writeln!(f, ")")?;
         Ok(())
     }
 }
 
+// SAFETY: We assert that our implementation of each method is correct.
 unsafe impl<T: AsRef<[u8]>> Automaton for DFA<T> {
     #[inline]
     fn is_special_state(&self, id: StateID) -> bool {
@@ -1145,10 +1148,10 @@ unsafe impl<T: AsRef<[u8]>> Automaton for DFA<T> {
 
     // This is marked as inline to help dramatically boost sparse searching,
     // which decodes each state it enters to follow the next transition.
-    #[inline(always)]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn next_state(&self, current: StateID, input: u8) -> StateID {
-        let input = self.trans.classes.get(input);
-        self.trans.state(current).next(input)
+        let input = self.tt.classes.get(input);
+        self.tt.state(current).next(input)
     }
 
     #[inline]
@@ -1162,17 +1165,17 @@ unsafe impl<T: AsRef<[u8]>> Automaton for DFA<T> {
 
     #[inline]
     fn next_eoi_state(&self, current: StateID) -> StateID {
-        self.trans.state(current).next_eoi()
+        self.tt.state(current).next_eoi()
     }
 
     #[inline]
-    fn pattern_count(&self) -> usize {
-        self.trans.patterns
+    fn pattern_len(&self) -> usize {
+        self.tt.pattern_len
     }
 
     #[inline]
-    fn match_count(&self, id: StateID) -> usize {
-        self.trans.state(id).pattern_count()
+    fn match_len(&self, id: StateID) -> usize {
+        self.tt.state(id).pattern_len()
     }
 
     #[inline]
@@ -1182,39 +1185,76 @@ unsafe impl<T: AsRef<[u8]>> Automaton for DFA<T> {
         // that finds the pattern ID from the state machine, which requires
         // a bit of slicing/pointer-chasing. This optimization tends to only
         // matter when matches are frequent.
-        if self.trans.patterns == 1 {
+        if self.tt.pattern_len == 1 {
             return PatternID::ZERO;
         }
-        self.trans.state(id).pattern_id(match_index)
+        self.tt.state(id).pattern_id(match_index)
+    }
+
+    #[inline]
+    fn has_empty(&self) -> bool {
+        self.flags.has_empty
+    }
+
+    #[inline]
+    fn is_utf8(&self) -> bool {
+        self.flags.is_utf8
+    }
+
+    #[inline]
+    fn is_always_start_anchored(&self) -> bool {
+        self.flags.is_always_start_anchored
     }
 
     #[inline]
     fn start_state_forward(
         &self,
-        pattern_id: Option<PatternID>,
-        bytes: &[u8],
-        start: usize,
-        end: usize,
-    ) -> StateID {
-        let index = Start::from_position_fwd(bytes, start, end);
-        self.starts.start(index, pattern_id)
+        input: &Input<'_>,
+    ) -> Result<StateID, MatchError> {
+        if !self.quitset.is_empty() && input.start() > 0 {
+            let offset = input.start() - 1;
+            let byte = input.haystack()[offset];
+            if self.quitset.contains(byte) {
+                return Err(MatchError::quit(byte, offset));
+            }
+        }
+        let start = self.st.start_map.fwd(&input);
+        self.st.start(input, start)
     }
 
     #[inline]
     fn start_state_reverse(
         &self,
-        pattern_id: Option<PatternID>,
-        bytes: &[u8],
-        start: usize,
-        end: usize,
-    ) -> StateID {
-        let index = Start::from_position_rev(bytes, start, end);
-        self.starts.start(index, pattern_id)
+        input: &Input<'_>,
+    ) -> Result<StateID, MatchError> {
+        if !self.quitset.is_empty() && input.end() < input.haystack().len() {
+            let offset = input.end();
+            let byte = input.haystack()[offset];
+            if self.quitset.contains(byte) {
+                return Err(MatchError::quit(byte, offset));
+            }
+        }
+        let start = self.st.start_map.rev(&input);
+        self.st.start(input, start)
+    }
+
+    #[inline]
+    fn universal_start_state(&self, mode: Anchored) -> Option<StateID> {
+        match mode {
+            Anchored::No => self.st.universal_start_unanchored,
+            Anchored::Yes => self.st.universal_start_anchored,
+            Anchored::Pattern(_) => None,
+        }
     }
 
     #[inline]
     fn accelerator(&self, id: StateID) -> &[u8] {
-        self.trans.state(id).accelerator()
+        self.tt.state(id).accelerator()
+    }
+
+    #[inline]
+    fn get_prefilter(&self) -> Option<&Prefilter> {
+        self.pre.as_ref()
     }
 }
 
@@ -1278,43 +1318,38 @@ struct Transitions<T> {
     /// least one state---the dead state---even the empty DFA. In particular,
     /// the dead state always has ID 0 and is correspondingly always the first
     /// state. The dead state is never a match state.
-    count: usize,
+    state_len: usize,
     /// The total number of unique patterns represented by these match states.
-    patterns: usize,
+    pattern_len: usize,
 }
 
 impl<'a> Transitions<&'a [u8]> {
     unsafe fn from_bytes_unchecked(
         mut slice: &'a [u8],
     ) -> Result<(Transitions<&'a [u8]>, usize), DeserializeError> {
-        let slice_start = slice.as_ptr() as usize;
+        let slice_start = slice.as_ptr().as_usize();
 
-        let (state_count, nr) =
-            bytes::try_read_u32_as_usize(&slice, "state count")?;
+        let (state_len, nr) =
+            wire::try_read_u32_as_usize(&slice, "state length")?;
         slice = &slice[nr..];
 
-        let (pattern_count, nr) =
-            bytes::try_read_u32_as_usize(&slice, "pattern count")?;
+        let (pattern_len, nr) =
+            wire::try_read_u32_as_usize(&slice, "pattern length")?;
         slice = &slice[nr..];
 
         let (classes, nr) = ByteClasses::from_bytes(&slice)?;
         slice = &slice[nr..];
 
         let (len, nr) =
-            bytes::try_read_u32_as_usize(&slice, "sparse transitions length")?;
+            wire::try_read_u32_as_usize(&slice, "sparse transitions length")?;
         slice = &slice[nr..];
 
-        bytes::check_slice_len(slice, len, "sparse states byte length")?;
+        wire::check_slice_len(slice, len, "sparse states byte length")?;
         let sparse = &slice[..len];
         slice = &slice[len..];
 
-        let trans = Transitions {
-            sparse,
-            classes,
-            count: state_count,
-            patterns: pattern_count,
-        };
-        Ok((trans, slice.as_ptr() as usize - slice_start))
+        let trans = Transitions { sparse, classes, state_len, pattern_len };
+        Ok((trans, slice.as_ptr().as_usize() - slice_start))
     }
 }
 
@@ -1334,12 +1369,12 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         }
         dst = &mut dst[..nwrite];
 
-        // write state count
-        E::write_u32(u32::try_from(self.count).unwrap(), dst);
+        // write state length
+        E::write_u32(u32::try_from(self.state_len).unwrap(), dst);
         dst = &mut dst[size_of::<u32>()..];
 
-        // write pattern count
-        E::write_u32(u32::try_from(self.patterns).unwrap(), dst);
+        // write pattern length
+        E::write_u32(u32::try_from(self.pattern_len).unwrap(), dst);
         dst = &mut dst[size_of::<u32>()..];
 
         // write byte class map
@@ -1351,15 +1386,22 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         dst = &mut dst[size_of::<u32>()..];
 
         // write actual transitions
-        dst.copy_from_slice(self.sparse());
+        let mut id = DEAD;
+        while id.as_usize() < self.sparse().len() {
+            let state = self.state(id);
+            let n = state.write_to::<E>(&mut dst)?;
+            dst = &mut dst[n..];
+            // The next ID is the offset immediately following `state`.
+            id = StateID::new(id.as_usize() + state.write_to_len()).unwrap();
+        }
         Ok(nwrite)
     }
 
     /// Returns the number of bytes the serialized form of this transition
     /// table will use.
     fn write_to_len(&self) -> usize {
-        size_of::<u32>()   // state count
-        + size_of::<u32>() // pattern count
+        size_of::<u32>()   // state length
+        + size_of::<u32>() // pattern length
         + self.classes.write_to_len()
         + size_of::<u32>() // sparse transitions length
         + self.sparse().len()
@@ -1369,7 +1411,7 @@ impl<T: AsRef<[u8]>> Transitions<T> {
     ///
     /// That is, every state ID can be used to correctly index a state in this
     /// table.
-    fn validate(&self) -> Result<(), DeserializeError> {
+    fn validate(&self, sp: &Special) -> Result<(), DeserializeError> {
         // In order to validate everything, we not only need to make sure we
         // can decode every state, but that every transition in every state
         // points to a valid state. There are many duplicative transitions, so
@@ -1381,10 +1423,22 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         // whether doing something more clever is worth it just yet. If you're
         // profiling this code and need it to run faster, please file an issue.
         //
+        // OK, so we also use this to record the set of valid state IDs. Since
+        // it is possible for a transition to point to an invalid state ID that
+        // still (somehow) deserializes to a valid state. So we need to make
+        // sure our transitions are limited to actually correct state IDs.
+        // The problem is, I'm not sure how to do this verification step in
+        // no-std no-alloc mode. I think we'd *have* to store the set of valid
+        // state IDs in the DFA itself. For now, we don't do this verification
+        // in no-std no-alloc mode. The worst thing that can happen is an
+        // incorrect result. But no panics or memory safety problems should
+        // result. Because we still do validate that the state itself is
+        // "valid" in the sense that everything it points to actually exists.
+        //
         // ---AG
         struct Seen {
             #[cfg(feature = "alloc")]
-            set: BTreeSet<StateID>,
+            set: alloc::collections::BTreeSet<StateID>,
             #[cfg(not(feature = "alloc"))]
             set: core::marker::PhantomData<StateID>,
         }
@@ -1392,7 +1446,7 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         #[cfg(feature = "alloc")]
         impl Seen {
             fn new() -> Seen {
-                Seen { set: BTreeSet::new() }
+                Seen { set: alloc::collections::BTreeSet::new() }
             }
             fn insert(&mut self, id: StateID) {
                 self.set.insert(id);
@@ -1416,38 +1470,78 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         let mut verified: Seen = Seen::new();
         // We need to make sure that we decode the correct number of states.
         // Otherwise, an empty set of transitions would validate even if the
-        // recorded state count is non-empty.
-        let mut count = 0;
+        // recorded state length is non-empty.
+        let mut len = 0;
         // We can't use the self.states() iterator because it assumes the state
         // encodings are valid. It could panic if they aren't.
         let mut id = DEAD;
         while id.as_usize() < self.sparse().len() {
-            let state = self.try_state(id)?;
+            // Before we even decode the state, we check that the ID itself
+            // is well formed. That is, if it's a special state then it must
+            // actually be a quit, dead, accel, match or start state.
+            if sp.is_special_state(id) {
+                let is_actually_special = sp.is_dead_state(id)
+                    || sp.is_quit_state(id)
+                    || sp.is_match_state(id)
+                    || sp.is_start_state(id)
+                    || sp.is_accel_state(id);
+                if !is_actually_special {
+                    // This is kind of a cryptic error message...
+                    return Err(DeserializeError::generic(
+                        "found sparse state tagged as special but \
+                         wasn't actually special",
+                    ));
+                }
+            }
+            let state = self.try_state(sp, id)?;
             verified.insert(id);
             // The next ID should be the offset immediately following `state`.
-            id = StateID::new(bytes::add(
+            id = StateID::new(wire::add(
                 id.as_usize(),
-                state.bytes_len(),
+                state.write_to_len(),
                 "next state ID offset",
             )?)
             .map_err(|err| {
                 DeserializeError::state_id_error(err, "next state ID offset")
             })?;
-            count += 1;
-
-            // Now check that all transitions in this state are correct.
+            len += 1;
+        }
+        // Now that we've checked that all top-level states are correct and
+        // importantly, collected a set of valid state IDs, we have all the
+        // information we need to check that all transitions are correct too.
+        //
+        // Note that we can't use `valid_ids` to iterate because it will
+        // be empty in no-std no-alloc contexts. (And yes, that means our
+        // verification isn't quite as good.) We can use `self.states()`
+        // though at least, since we know that all states can at least be
+        // decoded and traversed correctly.
+        for state in self.states() {
+            // Check that all transitions in this state are correct.
             for i in 0..state.ntrans {
                 let to = state.next_at(i);
-                if verified.contains(&to) {
-                    continue;
+                // For no-alloc, we just check that the state can decode. It is
+                // technically possible that the state ID could still point to
+                // a non-existent state even if it decodes (fuzzing proved this
+                // to be true), but it shouldn't result in any memory unsafety
+                // or panics in non-debug mode.
+                #[cfg(not(feature = "alloc"))]
+                {
+                    let _ = self.try_state(sp, to)?;
+                }
+                #[cfg(feature = "alloc")]
+                {
+                    if !verified.contains(&to) {
+                        return Err(DeserializeError::generic(
+                            "found transition that points to a \
+                             non-existent state",
+                        ));
+                    }
                 }
-                let _ = self.try_state(to)?;
-                verified.insert(id);
             }
         }
-        if count != self.count {
+        if len != self.state_len {
             return Err(DeserializeError::generic(
-                "mismatching sparse state count",
+                "mismatching sparse state length",
             ));
         }
         Ok(())
@@ -1458,19 +1552,19 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         Transitions {
             sparse: self.sparse(),
             classes: self.classes.clone(),
-            count: self.count,
-            patterns: self.patterns,
+            state_len: self.state_len,
+            pattern_len: self.pattern_len,
         }
     }
 
     /// Converts these transitions to an owned value.
     #[cfg(feature = "alloc")]
-    fn to_owned(&self) -> Transitions<Vec<u8>> {
+    fn to_owned(&self) -> Transitions<alloc::vec::Vec<u8>> {
         Transitions {
             sparse: self.sparse().to_vec(),
             classes: self.classes.clone(),
-            count: self.count,
-            patterns: self.patterns,
+            state_len: self.state_len,
+            pattern_len: self.pattern_len,
         }
     }
 
@@ -1483,10 +1577,10 @@ impl<T: AsRef<[u8]>> Transitions<T> {
     /// functions involved are also inlined, which should hopefully eliminate
     /// a lot of the extraneous decoding that is never needed just to follow
     /// the next transition.
-    #[inline(always)]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn state(&self, id: StateID) -> State<'_> {
         let mut state = &self.sparse()[id.as_usize()..];
-        let mut ntrans = bytes::read_u16(&state) as usize;
+        let mut ntrans = wire::read_u16(&state).as_usize();
         let is_match = (1 << 15) & ntrans != 0;
         ntrans &= !(1 << 15);
         state = &state[2..];
@@ -1494,13 +1588,13 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         let (input_ranges, state) = state.split_at(ntrans * 2);
         let (next, state) = state.split_at(ntrans * StateID::SIZE);
         let (pattern_ids, state) = if is_match {
-            let npats = bytes::read_u32(&state) as usize;
+            let npats = wire::read_u32(&state).as_usize();
             state[4..].split_at(npats * 4)
         } else {
             (&[][..], state)
         };
 
-        let accel_len = state[0] as usize;
+        let accel_len = usize::from(state[0]);
         let accel = &state[1..accel_len + 1];
         State { id, is_match, ntrans, input_ranges, next, pattern_ids, accel }
     }
@@ -1513,27 +1607,44 @@ impl<T: AsRef<[u8]>> Transitions<T> {
     /// all of its data is consistent. It does not verify that its state ID
     /// transitions point to valid states themselves, nor does it verify that
     /// every pattern ID is valid.
-    fn try_state(&self, id: StateID) -> Result<State<'_>, DeserializeError> {
+    fn try_state(
+        &self,
+        sp: &Special,
+        id: StateID,
+    ) -> Result<State<'_>, DeserializeError> {
         if id.as_usize() > self.sparse().len() {
-            return Err(DeserializeError::generic("invalid sparse state ID"));
+            return Err(DeserializeError::generic(
+                "invalid caller provided sparse state ID",
+            ));
         }
         let mut state = &self.sparse()[id.as_usize()..];
         // Encoding format starts with a u16 that stores the total number of
         // transitions in this state.
         let (mut ntrans, _) =
-            bytes::try_read_u16_as_usize(state, "state transition count")?;
+            wire::try_read_u16_as_usize(state, "state transition length")?;
         let is_match = ((1 << 15) & ntrans) != 0;
         ntrans &= !(1 << 15);
         state = &state[2..];
         if ntrans > 257 || ntrans == 0 {
-            return Err(DeserializeError::generic("invalid transition count"));
+            return Err(DeserializeError::generic(
+                "invalid transition length",
+            ));
+        }
+        if is_match && !sp.is_match_state(id) {
+            return Err(DeserializeError::generic(
+                "state marked as match but not in match ID range",
+            ));
+        } else if !is_match && sp.is_match_state(id) {
+            return Err(DeserializeError::generic(
+                "state in match ID range but not marked as match state",
+            ));
         }
 
         // Each transition has two pieces: an inclusive range of bytes on which
         // it is defined, and the state ID that those bytes transition to. The
         // pairs come first, followed by a corresponding sequence of state IDs.
         let input_ranges_len = ntrans.checked_mul(2).unwrap();
-        bytes::check_slice_len(state, input_ranges_len, "sparse byte pairs")?;
+        wire::check_slice_len(state, input_ranges_len, "sparse byte pairs")?;
         let (input_ranges, state) = state.split_at(input_ranges_len);
         // Every range should be of the form A-B, where A<=B.
         for pair in input_ranges.chunks(2) {
@@ -1549,13 +1660,13 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         let next_len = ntrans
             .checked_mul(self.id_len())
             .expect("state size * #trans should always fit in a usize");
-        bytes::check_slice_len(state, next_len, "sparse trans state IDs")?;
+        wire::check_slice_len(state, next_len, "sparse trans state IDs")?;
         let (next, state) = state.split_at(next_len);
         // We can at least verify that every state ID is in bounds.
         for idbytes in next.chunks(self.id_len()) {
             let (id, _) =
-                bytes::read_state_id(idbytes, "sparse state ID in try_state")?;
-            bytes::check_slice_len(
+                wire::read_state_id(idbytes, "sparse state ID in try_state")?;
+            wire::check_slice_len(
                 self.sparse(),
                 id.as_usize(),
                 "invalid sparse state ID",
@@ -1567,19 +1678,24 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         // encoded 32-bit integers.
         let (pattern_ids, state) = if is_match {
             let (npats, nr) =
-                bytes::try_read_u32_as_usize(state, "pattern ID count")?;
+                wire::try_read_u32_as_usize(state, "pattern ID length")?;
             let state = &state[nr..];
+            if npats == 0 {
+                return Err(DeserializeError::generic(
+                    "state marked as a match, but has no pattern IDs",
+                ));
+            }
 
             let pattern_ids_len =
-                bytes::mul(npats, 4, "sparse pattern ID byte length")?;
-            bytes::check_slice_len(
+                wire::mul(npats, 4, "sparse pattern ID byte length")?;
+            wire::check_slice_len(
                 state,
                 pattern_ids_len,
                 "sparse pattern IDs",
             )?;
             let (pattern_ids, state) = state.split_at(pattern_ids_len);
             for patbytes in pattern_ids.chunks(PatternID::SIZE) {
-                bytes::read_pattern_id(
+                wire::read_pattern_id(
                     patbytes,
                     "sparse pattern ID in try_state",
                 )?;
@@ -1597,21 +1713,30 @@ impl<T: AsRef<[u8]>> Transitions<T> {
         if state.is_empty() {
             return Err(DeserializeError::generic("no accelerator length"));
         }
-        let (accel_len, state) = (state[0] as usize, &state[1..]);
+        let (accel_len, state) = (usize::from(state[0]), &state[1..]);
 
         if accel_len > 3 {
             return Err(DeserializeError::generic(
                 "sparse invalid accelerator length",
             ));
+        } else if accel_len == 0 && sp.is_accel_state(id) {
+            return Err(DeserializeError::generic(
+                "got no accelerators in state, but in accelerator ID range",
+            ));
+        } else if accel_len > 0 && !sp.is_accel_state(id) {
+            return Err(DeserializeError::generic(
+                "state in accelerator ID range, but has no accelerators",
+            ));
         }
-        bytes::check_slice_len(
+
+        wire::check_slice_len(
             state,
             accel_len,
             "sparse corrupt accelerator length",
         )?;
         let (accel, _) = (&state[..accel_len], &state[accel_len..]);
 
-        Ok(State {
+        let state = State {
             id,
             is_match,
             ntrans,
@@ -1619,7 +1744,13 @@ impl<T: AsRef<[u8]>> Transitions<T> {
             next,
             pattern_ids,
             accel,
-        })
+        };
+        if sp.is_quit_state(state.next_at(state.ntrans - 1)) {
+            return Err(DeserializeError::generic(
+                "state with EOI transition to quit state is illegal",
+            ));
+        }
+        Ok(state)
     }
 
     /// Return an iterator over all of the states in this DFA.
@@ -1648,13 +1779,13 @@ impl<T: AsRef<[u8]>> Transitions<T> {
     }
 }
 
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-build")]
 impl<T: AsMut<[u8]>> Transitions<T> {
     /// Return a convenient mutable representation of the given state.
     /// This panics if the state is invalid.
     fn state_mut(&mut self, id: StateID) -> StateMut<'_> {
         let mut state = &mut self.sparse_mut()[id.as_usize()..];
-        let mut ntrans = bytes::read_u16(&state) as usize;
+        let mut ntrans = wire::read_u16(&state).as_usize();
         let is_match = (1 << 15) & ntrans != 0;
         ntrans &= !(1 << 15);
         state = &mut state[2..];
@@ -1662,13 +1793,13 @@ impl<T: AsMut<[u8]>> Transitions<T> {
         let (input_ranges, state) = state.split_at_mut(ntrans * 2);
         let (next, state) = state.split_at_mut(ntrans * StateID::SIZE);
         let (pattern_ids, state) = if is_match {
-            let npats = bytes::read_u32(&state) as usize;
+            let npats = wire::read_u32(&state).as_usize();
             state[4..].split_at_mut(npats * 4)
         } else {
             (&mut [][..], state)
         };
 
-        let accel_len = state[0] as usize;
+        let accel_len = usize::from(state[0]);
         let accel = &mut state[1..accel_len + 1];
         StateMut {
             id,
@@ -1702,53 +1833,85 @@ struct StartTable<T> {
     /// In practice, T is either Vec<u8> or &[u8] and has no alignment
     /// requirements.
     ///
-    /// The first `stride` (currently always 4) entries always correspond to
-    /// the start states for the entire DFA. After that, there are
-    /// `stride * patterns` state IDs, where `patterns` may be zero in the
-    /// case of a DFA with no patterns or in the case where the DFA was built
-    /// without enabling starting states for each pattern.
+    /// The first `2 * stride` (currently always 8) entries always correspond
+    /// to the starts states for the entire DFA, with the first 4 entries being
+    /// for unanchored searches and the second 4 entries being for anchored
+    /// searches. To keep things simple, we always use 8 entries even if the
+    /// `StartKind` is not both.
+    ///
+    /// After that, there are `stride * patterns` state IDs, where `patterns`
+    /// may be zero in the case of a DFA with no patterns or in the case where
+    /// the DFA was built without enabling starting states for each pattern.
     table: T,
+    /// The starting state configuration supported. When 'both', both
+    /// unanchored and anchored searches work. When 'unanchored', anchored
+    /// searches panic. When 'anchored', unanchored searches panic.
+    kind: StartKind,
+    /// The start state configuration for every possible byte.
+    start_map: StartByteMap,
     /// The number of starting state IDs per pattern.
     stride: usize,
     /// The total number of patterns for which starting states are encoded.
-    /// This may be zero for non-empty DFAs when the DFA was built without
-    /// start states for each pattern.
-    patterns: usize,
+    /// This is `None` for DFAs that were built without start states for each
+    /// pattern. Thus, one cannot use this field to say how many patterns
+    /// are in the DFA in all cases. It is specific to how many patterns are
+    /// represented in this start table.
+    pattern_len: Option<usize>,
+    /// The universal starting state for unanchored searches. This is only
+    /// present when the DFA supports unanchored searches and when all starting
+    /// state IDs for an unanchored search are equivalent.
+    universal_start_unanchored: Option<StateID>,
+    /// The universal starting state for anchored searches. This is only
+    /// present when the DFA supports anchored searches and when all starting
+    /// state IDs for an anchored search are equivalent.
+    universal_start_anchored: Option<StateID>,
 }
 
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-build")]
 impl StartTable<Vec<u8>> {
-    fn new(patterns: usize) -> StartTable<Vec<u8>> {
-        let stride = Start::count();
+    fn new<T: AsRef<[u32]>>(
+        dfa: &dense::DFA<T>,
+        pattern_len: Option<usize>,
+    ) -> StartTable<Vec<u8>> {
+        let stride = Start::len();
         // This is OK since the only way we're here is if a dense DFA could be
         // constructed successfully, which uses the same space.
         let len = stride
-            .checked_mul(patterns)
+            .checked_mul(pattern_len.unwrap_or(0))
             .unwrap()
-            .checked_add(stride)
+            .checked_add(stride.checked_mul(2).unwrap())
             .unwrap()
             .checked_mul(StateID::SIZE)
             .unwrap();
-        StartTable { table: vec![0; len], stride, patterns }
+        StartTable {
+            table: vec![0; len],
+            kind: dfa.start_kind(),
+            start_map: dfa.start_map().clone(),
+            stride,
+            pattern_len,
+            universal_start_unanchored: dfa
+                .universal_start_state(Anchored::No),
+            universal_start_anchored: dfa.universal_start_state(Anchored::Yes),
+        }
     }
 
     fn from_dense_dfa<T: AsRef<[u32]>>(
         dfa: &dense::DFA<T>,
         remap: &[StateID],
-    ) -> Result<StartTable<Vec<u8>>, Error> {
+    ) -> Result<StartTable<Vec<u8>>, BuildError> {
         // Unless the DFA has start states compiled for each pattern, then
         // as far as the starting state table is concerned, there are zero
         // patterns to account for. It will instead only store starting states
         // for the entire DFA.
-        let start_pattern_count = if dfa.has_starts_for_each_pattern() {
-            dfa.pattern_count()
+        let start_pattern_len = if dfa.starts_for_each_pattern() {
+            Some(dfa.pattern_len())
         } else {
-            0
+            None
         };
-        let mut sl = StartTable::new(start_pattern_count);
-        for (old_start_id, sty, pid) in dfa.starts() {
+        let mut sl = StartTable::new(dfa, start_pattern_len);
+        for (old_start_id, anchored, sty) in dfa.starts() {
             let new_start_id = remap[dfa.to_index(old_start_id)];
-            sl.set_start(sty, pid, new_start_id);
+            sl.set_start(anchored, sty, new_start_id);
         }
         Ok(sl)
     }
@@ -1758,53 +1921,98 @@ impl<'a> StartTable<&'a [u8]> {
     unsafe fn from_bytes_unchecked(
         mut slice: &'a [u8],
     ) -> Result<(StartTable<&'a [u8]>, usize), DeserializeError> {
-        let slice_start = slice.as_ptr() as usize;
+        let slice_start = slice.as_ptr().as_usize();
 
-        let (stride, nr) =
-            bytes::try_read_u32_as_usize(slice, "sparse start table stride")?;
+        let (kind, nr) = StartKind::from_bytes(slice)?;
         slice = &slice[nr..];
 
-        let (patterns, nr) = bytes::try_read_u32_as_usize(
-            slice,
-            "sparse start table patterns",
-        )?;
+        let (start_map, nr) = StartByteMap::from_bytes(slice)?;
         slice = &slice[nr..];
 
-        if stride != Start::count() {
+        let (stride, nr) =
+            wire::try_read_u32_as_usize(slice, "sparse start table stride")?;
+        slice = &slice[nr..];
+        if stride != Start::len() {
             return Err(DeserializeError::generic(
                 "invalid sparse starting table stride",
             ));
         }
-        if patterns > PatternID::LIMIT {
+
+        let (maybe_pattern_len, nr) =
+            wire::try_read_u32_as_usize(slice, "sparse start table patterns")?;
+        slice = &slice[nr..];
+        let pattern_len = if maybe_pattern_len.as_u32() == u32::MAX {
+            None
+        } else {
+            Some(maybe_pattern_len)
+        };
+        if pattern_len.map_or(false, |len| len > PatternID::LIMIT) {
             return Err(DeserializeError::generic(
                 "sparse invalid number of patterns",
             ));
         }
-        let pattern_table_size =
-            bytes::mul(stride, patterns, "sparse invalid pattern count")?;
+
+        let (universal_unanchored, nr) =
+            wire::try_read_u32(slice, "universal unanchored start")?;
+        slice = &slice[nr..];
+        let universal_start_unanchored = if universal_unanchored == u32::MAX {
+            None
+        } else {
+            Some(StateID::try_from(universal_unanchored).map_err(|e| {
+                DeserializeError::state_id_error(
+                    e,
+                    "universal unanchored start",
+                )
+            })?)
+        };
+
+        let (universal_anchored, nr) =
+            wire::try_read_u32(slice, "universal anchored start")?;
+        slice = &slice[nr..];
+        let universal_start_anchored = if universal_anchored == u32::MAX {
+            None
+        } else {
+            Some(StateID::try_from(universal_anchored).map_err(|e| {
+                DeserializeError::state_id_error(e, "universal anchored start")
+            })?)
+        };
+
+        let pattern_table_size = wire::mul(
+            stride,
+            pattern_len.unwrap_or(0),
+            "sparse invalid pattern length",
+        )?;
         // Our start states always start with a single stride of start states
         // for the entire automaton which permit it to match any pattern. What
         // follows it are an optional set of start states for each pattern.
-        let start_state_count = bytes::add(
-            stride,
+        let start_state_len = wire::add(
+            wire::mul(2, stride, "start state stride too big")?,
             pattern_table_size,
             "sparse invalid 'any' pattern starts size",
         )?;
-        let table_bytes_len = bytes::mul(
-            start_state_count,
+        let table_bytes_len = wire::mul(
+            start_state_len,
             StateID::SIZE,
             "sparse pattern table bytes length",
         )?;
-        bytes::check_slice_len(
+        wire::check_slice_len(
             slice,
             table_bytes_len,
             "sparse start ID table",
         )?;
-        let table_bytes = &slice[..table_bytes_len];
+        let table = &slice[..table_bytes_len];
         slice = &slice[table_bytes_len..];
 
-        let sl = StartTable { table: table_bytes, stride, patterns };
-        Ok((sl, slice.as_ptr() as usize - slice_start))
+        let sl = StartTable {
+            table,
+            kind,
+            start_map,
+            stride,
+            pattern_len,
+            universal_start_unanchored,
+            universal_start_anchored,
+        };
+        Ok((sl, slice.as_ptr().as_usize() - slice_start))
     }
 }
 
@@ -1821,22 +2029,51 @@ impl<T: AsRef<[u8]>> StartTable<T> {
         }
         dst = &mut dst[..nwrite];
 
+        // write start kind
+        let nw = self.kind.write_to::<E>(dst)?;
+        dst = &mut dst[nw..];
+        // write start byte map
+        let nw = self.start_map.write_to(dst)?;
+        dst = &mut dst[nw..];
         // write stride
         E::write_u32(u32::try_from(self.stride).unwrap(), dst);
         dst = &mut dst[size_of::<u32>()..];
-        // write pattern count
-        E::write_u32(u32::try_from(self.patterns).unwrap(), dst);
+        // write pattern length
+        E::write_u32(
+            u32::try_from(self.pattern_len.unwrap_or(0xFFFF_FFFF)).unwrap(),
+            dst,
+        );
+        dst = &mut dst[size_of::<u32>()..];
+        // write universal start unanchored state id, u32::MAX if absent
+        E::write_u32(
+            self.universal_start_unanchored
+                .map_or(u32::MAX, |sid| sid.as_u32()),
+            dst,
+        );
+        dst = &mut dst[size_of::<u32>()..];
+        // write universal start anchored state id, u32::MAX if absent
+        E::write_u32(
+            self.universal_start_anchored.map_or(u32::MAX, |sid| sid.as_u32()),
+            dst,
+        );
         dst = &mut dst[size_of::<u32>()..];
         // write start IDs
-        dst.copy_from_slice(self.table());
+        for (sid, _, _) in self.iter() {
+            E::write_u32(sid.as_u32(), dst);
+            dst = &mut dst[StateID::SIZE..];
+        }
         Ok(nwrite)
     }
 
     /// Returns the number of bytes the serialized form of this transition
     /// table will use.
     fn write_to_len(&self) -> usize {
-        size_of::<u32>() // stride
+        self.kind.write_to_len()
+        + self.start_map.write_to_len()
+        + size_of::<u32>() // stride
         + size_of::<u32>() // # patterns
+        + size_of::<u32>() // universal unanchored start
+        + size_of::<u32>() // universal anchored start
         + self.table().len()
     }
 
@@ -1846,10 +2083,29 @@ impl<T: AsRef<[u8]>> StartTable<T> {
     /// state in the DFA's sparse transitions.
     fn validate(
         &self,
+        sp: &Special,
         trans: &Transitions<T>,
     ) -> Result<(), DeserializeError> {
         for (id, _, _) in self.iter() {
-            let _ = trans.try_state(id)?;
+            if sp.is_match_state(id) {
+                return Err(DeserializeError::generic(
+                    "start states cannot be match states",
+                ));
+            }
+            // Confirm that the start state points to a valid state.
+            let state = trans.try_state(sp, id)?;
+            // And like for the transition table, confirm that the transitions
+            // on all start states themselves point to a valid state.
+            //
+            // It'd probably be better to integrate this validation with the
+            // transition table, or otherwise store a sorted sequence of all
+            // valid state IDs in the sparse DFA itself. That way, we could
+            // check that every pointer to a state corresponds precisely to a
+            // correct and valid state.
+            for i in 0..state.ntrans {
+                let to = state.next_at(i);
+                let _ = trans.try_state(sp, to)?;
+            }
         }
         Ok(())
     }
@@ -1858,18 +2114,26 @@ impl<T: AsRef<[u8]>> StartTable<T> {
     fn as_ref(&self) -> StartTable<&'_ [u8]> {
         StartTable {
             table: self.table(),
+            kind: self.kind,
+            start_map: self.start_map.clone(),
             stride: self.stride,
-            patterns: self.patterns,
+            pattern_len: self.pattern_len,
+            universal_start_unanchored: self.universal_start_unanchored,
+            universal_start_anchored: self.universal_start_anchored,
         }
     }
 
     /// Converts this start list to an owned value.
     #[cfg(feature = "alloc")]
-    fn to_owned(&self) -> StartTable<Vec<u8>> {
+    fn to_owned(&self) -> StartTable<alloc::vec::Vec<u8>> {
         StartTable {
             table: self.table().to_vec(),
+            kind: self.kind,
+            start_map: self.start_map.clone(),
             stride: self.stride,
-            patterns: self.patterns,
+            pattern_len: self.pattern_len,
+            universal_start_unanchored: self.universal_start_unanchored,
+            universal_start_anchored: self.universal_start_anchored,
         }
     }
 
@@ -1879,26 +2143,45 @@ impl<T: AsRef<[u8]>> StartTable<T> {
     /// starting state for the given pattern is returned. If this start table
     /// does not have individual starting states for each pattern, then this
     /// panics.
-    fn start(&self, index: Start, pattern_id: Option<PatternID>) -> StateID {
-        let start_index = index.as_usize();
-        let index = match pattern_id {
-            None => start_index,
-            Some(pid) => {
-                let pid = pid.as_usize();
-                assert!(pid < self.patterns, "invalid pattern ID {:?}", pid);
-                self.stride
-                    .checked_mul(pid)
-                    .unwrap()
-                    .checked_add(self.stride)
-                    .unwrap()
-                    .checked_add(start_index)
-                    .unwrap()
+    fn start(
+        &self,
+        input: &Input<'_>,
+        start: Start,
+    ) -> Result<StateID, MatchError> {
+        let start_index = start.as_usize();
+        let mode = input.get_anchored();
+        let index = match mode {
+            Anchored::No => {
+                if !self.kind.has_unanchored() {
+                    return Err(MatchError::unsupported_anchored(mode));
+                }
+                start_index
+            }
+            Anchored::Yes => {
+                if !self.kind.has_anchored() {
+                    return Err(MatchError::unsupported_anchored(mode));
+                }
+                self.stride + start_index
+            }
+            Anchored::Pattern(pid) => {
+                let len = match self.pattern_len {
+                    None => {
+                        return Err(MatchError::unsupported_anchored(mode))
+                    }
+                    Some(len) => len,
+                };
+                if pid.as_usize() >= len {
+                    return Ok(DEAD);
+                }
+                (2 * self.stride)
+                    + (self.stride * pid.as_usize())
+                    + start_index
             }
         };
         let start = index * StateID::SIZE;
         // This OK since we're allowed to assume that the start table contains
         // valid StateIDs.
-        bytes::read_state_id_unchecked(&self.table()[start..]).0
+        Ok(wire::read_state_id_unchecked(&self.table()[start..]).0)
     }
 
     /// Return an iterator over all start IDs in this table.
@@ -1924,27 +2207,26 @@ impl<T: AsRef<[u8]>> StartTable<T> {
     }
 }
 
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-build")]
 impl<T: AsMut<[u8]>> StartTable<T> {
     /// Set the start state for the given index and pattern.
     ///
     /// If the pattern ID or state ID are not valid, then this will panic.
-    fn set_start(
-        &mut self,
-        index: Start,
-        pattern_id: Option<PatternID>,
-        id: StateID,
-    ) {
-        let start_index = index.as_usize();
-        let index = match pattern_id {
-            None => start_index,
-            Some(pid) => {
+    fn set_start(&mut self, anchored: Anchored, start: Start, id: StateID) {
+        let start_index = start.as_usize();
+        let index = match anchored {
+            Anchored::No => start_index,
+            Anchored::Yes => self.stride + start_index,
+            Anchored::Pattern(pid) => {
                 let pid = pid.as_usize();
-                assert!(pid < self.patterns, "invalid pattern ID {:?}", pid);
+                let len = self
+                    .pattern_len
+                    .expect("start states for each pattern enabled");
+                assert!(pid < len, "invalid pattern ID {:?}", pid);
                 self.stride
                     .checked_mul(pid)
                     .unwrap()
-                    .checked_add(self.stride)
+                    .checked_add(self.stride.checked_mul(2).unwrap())
                     .unwrap()
                     .checked_add(start_index)
                     .unwrap()
@@ -1952,7 +2234,7 @@ impl<T: AsMut<[u8]>> StartTable<T> {
         };
         let start = index * StateID::SIZE;
         let end = start + StateID::SIZE;
-        bytes::write_state_id::<bytes::NE>(
+        wire::write_state_id::<wire::NE>(
             id,
             &mut self.table.as_mut()[start..end],
         );
@@ -1966,9 +2248,9 @@ struct StartStateIter<'a, T> {
 }
 
 impl<'a, T: AsRef<[u8]>> Iterator for StartStateIter<'a, T> {
-    type Item = (StateID, Start, Option<PatternID>);
+    type Item = (StateID, Anchored, Start);
 
-    fn next(&mut self) -> Option<(StateID, Start, Option<PatternID>)> {
+    fn next(&mut self) -> Option<(StateID, Anchored, Start)> {
         let i = self.i;
         if i >= self.st.len() {
             return None;
@@ -1978,18 +2260,13 @@ impl<'a, T: AsRef<[u8]>> Iterator for StartStateIter<'a, T> {
         // This unwrap is okay since the stride of any DFA must always match
         // the number of start state types.
         let start_type = Start::from_usize(i % self.st.stride).unwrap();
-        let pid = if i < self.st.stride {
-            // This means we don't have start states for each pattern.
-            None
+        let anchored = if i < self.st.stride {
+            Anchored::No
+        } else if i < (2 * self.st.stride) {
+            Anchored::Yes
         } else {
-            // These unwraps are OK since we may assume our table and stride
-            // is correct.
-            let pid = i
-                .checked_sub(self.st.stride)
-                .unwrap()
-                .checked_div(self.st.stride)
-                .unwrap();
-            Some(PatternID::new(pid).unwrap())
+            let pid = (i - (2 * self.st.stride)) / self.st.stride;
+            Anchored::Pattern(PatternID::new(pid).unwrap())
         };
         let start = i * StateID::SIZE;
         let end = start + StateID::SIZE;
@@ -1997,7 +2274,7 @@ impl<'a, T: AsRef<[u8]>> Iterator for StartStateIter<'a, T> {
         // This is OK since we're allowed to assume that any IDs in this start
         // table are correct and valid for this DFA.
         let id = StateID::from_ne_bytes_unchecked(bytes);
-        Some((id, start_type, pid))
+        Some((id, anchored, start_type))
     }
 }
 
@@ -2024,7 +2301,7 @@ impl<'a, T: AsRef<[u8]>> Iterator for StateIter<'a, T> {
             return None;
         }
         let state = self.trans.state(StateID::new_unchecked(self.id));
-        self.id = self.id + state.bytes_len();
+        self.id = self.id + state.write_to_len();
         Some(state)
     }
 }
@@ -2071,7 +2348,7 @@ impl<'a> State<'a> {
     ///
     /// This is marked as inline to help dramatically boost sparse searching,
     /// which decodes each state it enters to follow the next transition.
-    #[inline(always)]
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn next(&self, input: u8) -> StateID {
         // This straight linear search was observed to be much better than
         // binary search on ASCII haystacks, likely because a binary search
@@ -2120,19 +2397,66 @@ impl<'a> State<'a> {
     /// is invalid, then this panics.
     fn pattern_id(&self, match_index: usize) -> PatternID {
         let start = match_index * PatternID::SIZE;
-        bytes::read_pattern_id_unchecked(&self.pattern_ids[start..]).0
+        wire::read_pattern_id_unchecked(&self.pattern_ids[start..]).0
     }
 
     /// Returns the total number of pattern IDs for this state. This is always
     /// zero when `is_match` is false.
-    fn pattern_count(&self) -> usize {
+    fn pattern_len(&self) -> usize {
         assert_eq!(0, self.pattern_ids.len() % 4);
         self.pattern_ids.len() / 4
     }
 
+    /// Return an accelerator for this state.
+    fn accelerator(&self) -> &'a [u8] {
+        self.accel
+    }
+
+    /// Write the raw representation of this state to the given buffer using
+    /// the given endianness.
+    fn write_to<E: Endian>(
+        &self,
+        mut dst: &mut [u8],
+    ) -> Result<usize, SerializeError> {
+        let nwrite = self.write_to_len();
+        if dst.len() < nwrite {
+            return Err(SerializeError::buffer_too_small(
+                "sparse state transitions",
+            ));
+        }
+
+        let ntrans =
+            if self.is_match { self.ntrans | (1 << 15) } else { self.ntrans };
+        E::write_u16(u16::try_from(ntrans).unwrap(), dst);
+        dst = &mut dst[size_of::<u16>()..];
+
+        dst[..self.input_ranges.len()].copy_from_slice(self.input_ranges);
+        dst = &mut dst[self.input_ranges.len()..];
+
+        for i in 0..self.ntrans {
+            E::write_u32(self.next_at(i).as_u32(), dst);
+            dst = &mut dst[StateID::SIZE..];
+        }
+
+        if self.is_match {
+            E::write_u32(u32::try_from(self.pattern_len()).unwrap(), dst);
+            dst = &mut dst[size_of::<u32>()..];
+            for i in 0..self.pattern_len() {
+                let pid = self.pattern_id(i);
+                E::write_u32(pid.as_u32(), dst);
+                dst = &mut dst[PatternID::SIZE..];
+            }
+        }
+
+        dst[0] = u8::try_from(self.accel.len()).unwrap();
+        dst[1..][..self.accel.len()].copy_from_slice(self.accel);
+
+        Ok(nwrite)
+    }
+
     /// Return the total number of bytes that this state consumes in its
     /// encoded form.
-    fn bytes_len(&self) -> usize {
+    fn write_to_len(&self) -> usize {
         let mut len = 2
             + (self.ntrans * 2)
             + (self.ntrans * StateID::SIZE)
@@ -2142,11 +2466,6 @@ impl<'a> State<'a> {
         }
         len
     }
-
-    /// Return an accelerator for this state.
-    fn accelerator(&self) -> &'a [u8] {
-        self.accel
-    }
 }
 
 impl<'a> fmt::Debug for State<'a> {
@@ -2163,14 +2482,14 @@ impl<'a> fmt::Debug for State<'a> {
             }
             let (start, end) = self.range(i);
             if start == end {
-                write!(f, "{:?} => {:?}", DebugByte(start), next)?;
+                write!(f, "{:?} => {:?}", DebugByte(start), next.as_usize())?;
             } else {
                 write!(
                     f,
                     "{:?}-{:?} => {:?}",
                     DebugByte(start),
                     DebugByte(end),
-                    next,
+                    next.as_usize(),
                 )?;
             }
             printed = true;
@@ -2180,7 +2499,7 @@ impl<'a> fmt::Debug for State<'a> {
             if printed {
                 write!(f, ", ")?;
             }
-            write!(f, "EOI => {:?}", eoi)?;
+            write!(f, "EOI => {:?}", eoi.as_usize())?;
         }
         Ok(())
     }
@@ -2188,7 +2507,7 @@ impl<'a> fmt::Debug for State<'a> {
 
 /// A representation of a mutable sparse DFA state that can be cheaply
 /// materialized from a state identifier.
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-build")]
 struct StateMut<'a> {
     /// The identifier of this state.
     id: StateID,
@@ -2216,17 +2535,17 @@ struct StateMut<'a> {
     accel: &'a mut [u8],
 }
 
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-build")]
 impl<'a> StateMut<'a> {
     /// Sets the ith transition to the given state.
     fn set_next_at(&mut self, i: usize, next: StateID) {
         let start = i * StateID::SIZE;
         let end = start + StateID::SIZE;
-        bytes::write_state_id::<bytes::NE>(next, &mut self.next[start..end]);
+        wire::write_state_id::<wire::NE>(next, &mut self.next[start..end]);
     }
 }
 
-#[cfg(feature = "alloc")]
+#[cfg(feature = "dfa-build")]
 impl<'a> fmt::Debug for StateMut<'a> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let state = State {
@@ -2242,6 +2561,7 @@ impl<'a> fmt::Debug for StateMut<'a> {
     }
 }
 
+/*
 /// A binary search routine specialized specifically to a sparse DFA state's
 /// transitions. Specifically, the transitions are defined as a set of pairs
 /// of input bytes that delineate an inclusive range of bytes. If the input
@@ -2261,8 +2581,7 @@ impl<'a> fmt::Debug for StateMut<'a> {
 /// guaranteed to be safe and is thus UB (since I don't think the in-memory
 /// representation of `(u8, u8)` has been nailed down). One could define a
 /// repr(C) type, but the casting doesn't seem justified.
-#[allow(dead_code)]
-#[inline(always)]
+#[cfg_attr(feature = "perf-inline", inline(always))]
 fn binary_search_ranges(ranges: &[u8], needle: u8) -> Option<usize> {
     debug_assert!(ranges.len() % 2 == 0, "ranges must have even length");
     debug_assert!(ranges.len() <= 512, "ranges should be short");
@@ -2281,3 +2600,57 @@ fn binary_search_ranges(ranges: &[u8], needle: u8) -> Option<usize> {
     }
     None
 }
+*/
+
+#[cfg(all(test, feature = "syntax", feature = "dfa-build"))]
+mod tests {
+    use crate::{
+        dfa::{dense::DFA, Automaton},
+        nfa::thompson,
+        Input, MatchError,
+    };
+
+    // See the analogous test in src/hybrid/dfa.rs and src/dfa/dense.rs.
+    #[test]
+    fn heuristic_unicode_forward() {
+        let dfa = DFA::builder()
+            .configure(DFA::config().unicode_word_boundary(true))
+            .thompson(thompson::Config::new().reverse(true))
+            .build(r"\b[0-9]+\b")
+            .unwrap()
+            .to_sparse()
+            .unwrap();
+
+        let input = Input::new("β123").range(2..);
+        let expected = MatchError::quit(0xB2, 1);
+        let got = dfa.try_search_fwd(&input);
+        assert_eq!(Err(expected), got);
+
+        let input = Input::new("123β").range(..3);
+        let expected = MatchError::quit(0xCE, 3);
+        let got = dfa.try_search_fwd(&input);
+        assert_eq!(Err(expected), got);
+    }
+
+    // See the analogous test in src/hybrid/dfa.rs and src/dfa/dense.rs.
+    #[test]
+    fn heuristic_unicode_reverse() {
+        let dfa = DFA::builder()
+            .configure(DFA::config().unicode_word_boundary(true))
+            .thompson(thompson::Config::new().reverse(true))
+            .build(r"\b[0-9]+\b")
+            .unwrap()
+            .to_sparse()
+            .unwrap();
+
+        let input = Input::new("β123").range(2..);
+        let expected = MatchError::quit(0xB2, 1);
+        let got = dfa.try_search_rev(&input);
+        assert_eq!(Err(expected), got);
+
+        let input = Input::new("123β").range(..3);
+        let expected = MatchError::quit(0xCE, 3);
+        let got = dfa.try_search_rev(&input);
+        assert_eq!(Err(expected), got);
+    }
+}
-- 
cgit v1.2.3