1 files changed, 814 insertions, 0 deletions
diff --git a/third_party/rust/aho-corasick/src/dfa.rs b/third_party/rust/aho-corasick/src/dfa.rs
new file mode 100644
index 0000000000..f0370a6168
--- /dev/null
+++ b/third_party/rust/aho-corasick/src/dfa.rs
@@ -0,0 +1,814 @@
+/*!
+Provides direct access to a DFA implementation of Aho-Corasick.
+
+This is a low-level API that generally only needs to be used in niche
+circumstances. When possible, prefer using [`AhoCorasick`](crate::AhoCorasick)
+instead of a DFA directly. Using an `DFA` directly is typically only necessary
+when one needs access to the [`Automaton`] trait implementation.
+*/
+
+use alloc::{vec, vec::Vec};
+
+use crate::{
+    automaton::Automaton,
+    nfa::noncontiguous,
+    util::{
+        alphabet::ByteClasses,
+        error::{BuildError, MatchError},
+        int::{Usize, U32},
+        prefilter::Prefilter,
+        primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID},
+        search::{Anchored, MatchKind, StartKind},
+        special::Special,
+    },
+};
+
+/// A DFA implementation of Aho-Corasick.
+///
+/// When possible, prefer using [`AhoCorasick`](crate::AhoCorasick) instead of
+/// this type directly. Using a `DFA` directly is typically only necessary when
+/// one needs access to the [`Automaton`] trait implementation.
+///
+/// This DFA can only be built by first constructing a [`noncontiguous::NFA`].
+/// Both [`DFA::new`] and [`Builder::build`] do this for you automatically, but
+/// [`Builder::build_from_noncontiguous`] permits doing it explicitly.
+///
+/// A DFA provides the best possible search performance (in this crate) via two
+/// mechanisms:
+///
+/// * All states use a dense representation for their transitions.
+/// * All failure transitions are pre-computed such that they are never
+/// explicitly handled at search time.
+///
+/// These two facts combined mean that every state transition is performed
+/// using a constant number of instructions. However, this comes at
+/// great cost. The memory usage of a DFA can be quite exorbitant.
+/// It is potentially multiple orders of magnitude greater than a
+/// [`contiguous::NFA`](crate::nfa::contiguous::NFA) for example. In exchange,
+/// a DFA will typically have better search speed than a `contiguous::NFA`, but
+/// not by orders of magnitude.
+///
+/// Unless you have a small number of patterns or memory usage is not a concern
+/// and search performance is critical, a DFA is usually not the best choice.
+///
+/// Moreover, unlike the NFAs in this crate, it is costly for a DFA to
+/// support for anchored and unanchored search configurations. Namely,
+/// since failure transitions are pre-computed, supporting both anchored
+/// and unanchored searches requires a duplication of the transition table,
+/// making the memory usage of such a DFA ever bigger. (The NFAs in this crate
+/// unconditionally support both anchored and unanchored searches because there
+/// is essentially no added cost for doing so.) It is for this reason that
+/// a DFA's support for anchored and unanchored searches can be configured
+/// via [`Builder::start_kind`]. By default, a DFA only supports unanchored
+/// searches.
+///
+/// # Example
+///
+/// This example shows how to build an `DFA` directly and use it to execute
+/// [`Automaton::try_find`]:
+///
+/// ```
+/// use aho_corasick::{
+///     automaton::Automaton,
+///     dfa::DFA,
+///     Input, Match,
+/// };
+///
+/// let patterns = &["b", "abc", "abcd"];
+/// let haystack = "abcd";
+///
+/// let nfa = DFA::new(patterns).unwrap();
+/// assert_eq!(
+///     Some(Match::must(0, 1..2)),
+///     nfa.try_find(&Input::new(haystack))?,
+/// );
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// It is also possible to implement your own version of `try_find`. See the
+/// [`Automaton`] documentation for an example.
+#[derive(Clone)]
+pub struct DFA {
+    /// The DFA transition table. IDs in this table are pre-multiplied. So
+    /// instead of the IDs being 0, 1, 2, 3, ..., they are 0*stride, 1*stride,
+    /// 2*stride, 3*stride, ...
+    trans: Vec<StateID>,
+    /// The matches for every match state in this DFA. This is first indexed by
+    /// state index (so that's `sid >> stride2`) and then by order in which the
+    /// matches are meant to occur.
+    matches: Vec<Vec<PatternID>>,
+    /// The amount of heap memory used, in bytes, by the inner Vecs of
+    /// 'matches'.
+    matches_memory_usage: usize,
+    /// The length of each pattern. This is used to compute the start offset
+    /// of a match.
+    pattern_lens: Vec<SmallIndex>,
+    /// A prefilter for accelerating searches, if one exists.
+    prefilter: Option<Prefilter>,
+    /// The match semantics built into this DFA.
+    match_kind: MatchKind,
+    /// The total number of states in this DFA.
+    state_len: usize,
+    /// The alphabet size, or total number of equivalence classes, for this
+    /// DFA. Note that the actual number of transitions in each state is
+    /// stride=2^stride2, where stride is the smallest power of 2 greater than
+    /// or equal to alphabet_len. We do things this way so that we can use
+    /// bitshifting to go from a state ID to an index into 'matches'.
+    alphabet_len: usize,
+    /// The exponent with a base 2, such that stride=2^stride2. Given a state
+    /// index 'i', its state identifier is 'i << stride2'. Given a state
+    /// identifier 'sid', its state index is 'sid >> stride2'.
+    stride2: usize,
+    /// The equivalence classes for this DFA. All transitions are defined on
+    /// equivalence classes and not on the 256 distinct byte values.
+    byte_classes: ByteClasses,
+    /// The length of the shortest pattern in this automaton.
+    min_pattern_len: usize,
+    /// The length of the longest pattern in this automaton.
+    max_pattern_len: usize,
+    /// The information required to deduce which states are "special" in this
+    /// DFA.
+    special: Special,
+}
+
+impl DFA {
+    /// Create a new Aho-Corasick DFA using the default configuration.
+    ///
+    /// Use a [`Builder`] if you want to change the configuration.
+    pub fn new<I, P>(patterns: I) -> Result<DFA, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        DFA::builder().build(patterns)
+    }
+
+    /// A convenience method for returning a new Aho-Corasick DFA builder.
+    ///
+    /// This usually permits one to just import the `DFA` type.
+    pub fn builder() -> Builder {
+        Builder::new()
+    }
+}
+
+impl DFA {
+    /// A sentinel state ID indicating that a search should stop once it has
+    /// entered this state. When a search stops, it returns a match if one has
+    /// been found, otherwise no match. A DFA always has an actual dead state
+    /// at this ID.
+    ///
+    /// N.B. DFAs, unlike NFAs, do not have any notion of a FAIL state.
+    /// Namely, the whole point of a DFA is that the FAIL state is completely
+    /// compiled away. That is, DFA construction involves pre-computing the
+    /// failure transitions everywhere, such that failure transitions are no
+    /// longer used at search time. This, combined with its uniformly dense
+    /// representation, are the two most important factors in why it's faster
+    /// than the NFAs in this crate.
+    const DEAD: StateID = StateID::new_unchecked(0);
+
+    /// Adds the given pattern IDs as matches to the given state and also
+    /// records the added memory usage.
+    fn set_matches(
+        &mut self,
+        sid: StateID,
+        pids: impl Iterator<Item = PatternID>,
+    ) {
+        let index = (sid.as_usize() >> self.stride2).checked_sub(2).unwrap();
+        let mut at_least_one = false;
+        for pid in pids {
+            self.matches[index].push(pid);
+            self.matches_memory_usage += PatternID::SIZE;
+            at_least_one = true;
+        }
+        assert!(at_least_one, "match state must have non-empty pids");
+    }
+}
+
+// SAFETY: 'start_state' always returns a valid state ID, 'next_state' always
+// returns a valid state ID given a valid state ID. We otherwise claim that
+// all other methods are correct as well.
+unsafe impl Automaton for DFA {
+    #[inline(always)]
+    fn start_state(&self, anchored: Anchored) -> Result<StateID, MatchError> {
+        // Either of the start state IDs can be DEAD, in which case, support
+        // for that type of search is not provided by this DFA. Which start
+        // state IDs are inactive depends on the 'StartKind' configuration at
+        // DFA construction time.
+        match anchored {
+            Anchored::No => {
+                let start = self.special.start_unanchored_id;
+                if start == DFA::DEAD {
+                    Err(MatchError::invalid_input_unanchored())
+                } else {
+                    Ok(start)
+                }
+            }
+            Anchored::Yes => {
+                let start = self.special.start_anchored_id;
+                if start == DFA::DEAD {
+                    Err(MatchError::invalid_input_anchored())
+                } else {
+                    Ok(start)
+                }
+            }
+        }
+    }
+
+    #[inline(always)]
+    fn next_state(
+        &self,
+        _anchored: Anchored,
+        sid: StateID,
+        byte: u8,
+    ) -> StateID {
+        let class = self.byte_classes.get(byte);
+        self.trans[(sid.as_u32() + u32::from(class)).as_usize()]
+    }
+
+    #[inline(always)]
+    fn is_special(&self, sid: StateID) -> bool {
+        sid <= self.special.max_special_id
+    }
+
+    #[inline(always)]
+    fn is_dead(&self, sid: StateID) -> bool {
+        sid == DFA::DEAD
+    }
+
+    #[inline(always)]
+    fn is_match(&self, sid: StateID) -> bool {
+        !self.is_dead(sid) && sid <= self.special.max_match_id
+    }
+
+    #[inline(always)]
+    fn is_start(&self, sid: StateID) -> bool {
+        sid == self.special.start_unanchored_id
+            || sid == self.special.start_anchored_id
+    }
+
+    #[inline(always)]
+    fn match_kind(&self) -> MatchKind {
+        self.match_kind
+    }
+
+    #[inline(always)]
+    fn patterns_len(&self) -> usize {
+        self.pattern_lens.len()
+    }
+
+    #[inline(always)]
+    fn pattern_len(&self, pid: PatternID) -> usize {
+        self.pattern_lens[pid].as_usize()
+    }
+
+    #[inline(always)]
+    fn min_pattern_len(&self) -> usize {
+        self.min_pattern_len
+    }
+
+    #[inline(always)]
+    fn max_pattern_len(&self) -> usize {
+        self.max_pattern_len
+    }
+
+    #[inline(always)]
+    fn match_len(&self, sid: StateID) -> usize {
+        debug_assert!(self.is_match(sid));
+        let offset = (sid.as_usize() >> self.stride2) - 2;
+        self.matches[offset].len()
+    }
+
+    #[inline(always)]
+    fn match_pattern(&self, sid: StateID, index: usize) -> PatternID {
+        debug_assert!(self.is_match(sid));
+        let offset = (sid.as_usize() >> self.stride2) - 2;
+        self.matches[offset][index]
+    }
+
+    #[inline(always)]
+    fn memory_usage(&self) -> usize {
+        use core::mem::size_of;
+
+        (self.trans.len() * size_of::<u32>())
+            + (self.matches.len() * size_of::<Vec<PatternID>>())
+            + self.matches_memory_usage
+            + (self.pattern_lens.len() * size_of::<SmallIndex>())
+            + self.prefilter.as_ref().map_or(0, |p| p.memory_usage())
+    }
+
+    #[inline(always)]
+    fn prefilter(&self) -> Option<&Prefilter> {
+        self.prefilter.as_ref()
+    }
+}
+
+impl core::fmt::Debug for DFA {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        use crate::{
+            automaton::{fmt_state_indicator, sparse_transitions},
+            util::debug::DebugByte,
+        };
+
+        writeln!(f, "dfa::DFA(")?;
+        for index in 0..self.state_len {
+            let sid = StateID::new_unchecked(index << self.stride2);
+            // While we do currently include the FAIL state in the transition
+            // table (to simplify construction), it is never actually used. It
+            // poses problems with the code below because it gets treated as
+            // a match state incidentally when it is, of course, not. So we
+            // special case it. The fail state is always the first state after
+            // the dead state.
+            //
+            // If the construction is changed to remove the fail state (it
+            // probably should be), then this special case should be updated.
+            if index == 1 {
+                writeln!(f, "F {:06}:", sid.as_usize())?;
+                continue;
+            }
+            fmt_state_indicator(f, self, sid)?;
+            write!(f, "{:06}: ", sid.as_usize())?;
+
+            let it = (0..self.byte_classes.alphabet_len()).map(|class| {
+                (class.as_u8(), self.trans[sid.as_usize() + class])
+            });
+            for (i, (start, end, next)) in sparse_transitions(it).enumerate() {
+                if i > 0 {
+                    write!(f, ", ")?;
+                }
+                if start == end {
+                    write!(
+                        f,
+                        "{:?} => {:?}",
+                        DebugByte(start),
+                        next.as_usize()
+                    )?;
+                } else {
+                    write!(
+                        f,
+                        "{:?}-{:?} => {:?}",
+                        DebugByte(start),
+                        DebugByte(end),
+                        next.as_usize()
+                    )?;
+                }
+            }
+            write!(f, "\n")?;
+            if self.is_match(sid) {
+                write!(f, " matches: ")?;
+                for i in 0..self.match_len(sid) {
+                    if i > 0 {
+                        write!(f, ", ")?;
+                    }
+                    let pid = self.match_pattern(sid, i);
+                    write!(f, "{}", pid.as_usize())?;
+                }
+                write!(f, "\n")?;
+            }
+        }
+        writeln!(f, "match kind: {:?}", self.match_kind)?;
+        writeln!(f, "prefilter: {:?}", self.prefilter.is_some())?;
+        writeln!(f, "state length: {:?}", self.state_len)?;
+        writeln!(f, "pattern length: {:?}", self.patterns_len())?;
+        writeln!(f, "shortest pattern length: {:?}", self.min_pattern_len)?;
+        writeln!(f, "longest pattern length: {:?}", self.max_pattern_len)?;
+        writeln!(f, "alphabet length: {:?}", self.alphabet_len)?;
+        writeln!(f, "stride: {:?}", 1 << self.stride2)?;
+        writeln!(f, "byte classes: {:?}", self.byte_classes)?;
+        writeln!(f, "memory usage: {:?}", self.memory_usage())?;
+        writeln!(f, ")")?;
+        Ok(())
+    }
+}
+
+/// A builder for configuring an Aho-Corasick DFA.
+///
+/// This builder has a subset of the options available to a
+/// [`AhoCorasickBuilder`](crate::AhoCorasickBuilder). Of the shared options,
+/// their behavior is identical.
+#[derive(Clone, Debug)]
+pub struct Builder {
+    noncontiguous: noncontiguous::Builder,
+    start_kind: StartKind,
+    byte_classes: bool,
+}
+
+impl Default for Builder {
+    fn default() -> Builder {
+        Builder {
+            noncontiguous: noncontiguous::Builder::new(),
+            start_kind: StartKind::Unanchored,
+            byte_classes: true,
+        }
+    }
+}
+
+impl Builder {
+    /// Create a new builder for configuring an Aho-Corasick DFA.
+    pub fn new() -> Builder {
+        Builder::default()
+    }
+
+    /// Build an Aho-Corasick DFA from the given iterator of patterns.
+    ///
+    /// A builder may be reused to create more DFAs.
+    pub fn build<I, P>(&self, patterns: I) -> Result<DFA, BuildError>
+    where
+        I: IntoIterator<Item = P>,
+        P: AsRef<[u8]>,
+    {
+        let nnfa = self.noncontiguous.build(patterns)?;
+        self.build_from_noncontiguous(&nnfa)
+    }
+
+    /// Build an Aho-Corasick DFA from the given noncontiguous NFA.
+    ///
+    /// Note that when this method is used, only the `start_kind` and
+    /// `byte_classes` settings on this builder are respected. The other
+    /// settings only apply to the initial construction of the Aho-Corasick
+    /// automaton. Since using this method requires that initial construction
+    /// has already completed, all settings impacting only initial construction
+    /// are no longer relevant.
+    pub fn build_from_noncontiguous(
+        &self,
+        nnfa: &noncontiguous::NFA,
+    ) -> Result<DFA, BuildError> {
+        debug!("building DFA");
+        let byte_classes = if self.byte_classes {
+            nnfa.byte_classes().clone()
+        } else {
+            ByteClasses::singletons()
+        };
+        let state_len = match self.start_kind {
+            StartKind::Unanchored | StartKind::Anchored => nnfa.states().len(),
+            StartKind::Both => {
+                // These unwraps are OK because we know that the number of
+                // NFA states is < StateID::LIMIT which is in turn less than
+                // i32::MAX. Thus, there is always room to multiply by 2.
+                // Finally, the number of states is always at least 4 in the
+                // NFA (DEAD, FAIL, START-UNANCHORED, START-ANCHORED), so the
+                // subtraction of 4 is okay.
+                //
+                // Note that we subtract 4 because the "anchored" part of
+                // the DFA duplicates the unanchored part (without failure
+                // transitions), but reuses the DEAD, FAIL and START states.
+                nnfa.states()
+                    .len()
+                    .checked_mul(2)
+                    .unwrap()
+                    .checked_sub(4)
+                    .unwrap()
+            }
+        };
+        let trans_len =
+            match state_len.checked_shl(byte_classes.stride2().as_u32()) {
+                Some(trans_len) => trans_len,
+                None => {
+                    return Err(BuildError::state_id_overflow(
+                        StateID::MAX.as_u64(),
+                        usize::MAX.as_u64(),
+                    ))
+                }
+            };
+        StateID::new(trans_len.checked_sub(byte_classes.stride()).unwrap())
+            .map_err(|e| {
+                BuildError::state_id_overflow(
+                    StateID::MAX.as_u64(),
+                    e.attempted(),
+                )
+            })?;
+        let num_match_states = match self.start_kind {
+            StartKind::Unanchored | StartKind::Anchored => {
+                nnfa.special().max_match_id.as_usize().checked_sub(1).unwrap()
+            }
+            StartKind::Both => nnfa
+                .special()
+                .max_match_id
+                .as_usize()
+                .checked_sub(1)
+                .unwrap()
+                .checked_mul(2)
+                .unwrap(),
+        };
+        let mut dfa = DFA {
+            trans: vec![DFA::DEAD; trans_len],
+            matches: vec![vec![]; num_match_states],
+            matches_memory_usage: 0,
+            pattern_lens: nnfa.pattern_lens_raw().to_vec(),
+            prefilter: nnfa.prefilter().map(|p| p.clone()),
+            match_kind: nnfa.match_kind(),
+            state_len,
+            alphabet_len: byte_classes.alphabet_len(),
+            stride2: byte_classes.stride2(),
+            byte_classes,
+            min_pattern_len: nnfa.min_pattern_len(),
+            max_pattern_len: nnfa.max_pattern_len(),
+            // The special state IDs are set later.
+            special: Special::zero(),
+        };
+        match self.start_kind {
+            StartKind::Both => {
+                self.finish_build_both_starts(nnfa, &mut dfa);
+            }
+            StartKind::Unanchored => {
+                self.finish_build_one_start(Anchored::No, nnfa, &mut dfa);
+            }
+            StartKind::Anchored => {
+                self.finish_build_one_start(Anchored::Yes, nnfa, &mut dfa)
+            }
+        }
+        debug!(
+            "DFA built, <states: {:?}, size: {:?}, \
+             alphabet len: {:?}, stride: {:?}>",
+            dfa.state_len,
+            dfa.memory_usage(),
+            dfa.byte_classes.alphabet_len(),
+            dfa.byte_classes.stride(),
+        );
+        // The vectors can grow ~twice as big during construction because a
+        // Vec amortizes growth. But here, let's shrink things back down to
+        // what we actually need since we're never going to add more to it.
+        dfa.trans.shrink_to_fit();
+        dfa.pattern_lens.shrink_to_fit();
+        dfa.matches.shrink_to_fit();
+        // TODO: We might also want to shrink each Vec inside of `dfa.matches`,
+        // or even better, convert it to one contiguous allocation. But I think
+        // I went with nested allocs for good reason (can't remember), so this
+        // may be tricky to do. I decided not to shrink them here because it
+        // might require a fair bit of work to do. It's unclear whether it's
+        // worth it.
+        Ok(dfa)
+    }
+
+    /// Finishes building a DFA for either unanchored or anchored searches,
+    /// but NOT both.
+    fn finish_build_one_start(
+        &self,
+        anchored: Anchored,
+        nnfa: &noncontiguous::NFA,
+        dfa: &mut DFA,
+    ) {
+        // This function always succeeds because we check above that all of the
+        // states in the NFA can be mapped to DFA state IDs.
+        let stride2 = dfa.stride2;
+        let old2new = |oldsid: StateID| {
+            StateID::new_unchecked(oldsid.as_usize() << stride2)
+        };
+        for (oldsid, state) in nnfa.states().iter().with_state_ids() {
+            let newsid = old2new(oldsid);
+            if state.is_match() {
+                dfa.set_matches(newsid, nnfa.iter_matches(oldsid));
+            }
+            sparse_iter(
+                nnfa,
+                oldsid,
+                &dfa.byte_classes,
+                |byte, class, mut oldnextsid| {
+                    if oldnextsid == noncontiguous::NFA::FAIL {
+                        if anchored.is_anchored() {
+                            oldnextsid = noncontiguous::NFA::DEAD;
+                        } else {
+                            oldnextsid = nnfa.next_state(
+                                Anchored::No,
+                                state.fail(),
+                                byte,
+                            );
+                        }
+                    }
+                    dfa.trans[newsid.as_usize() + usize::from(class)] =
+                        old2new(oldnextsid);
+                },
+            );
+        }
+        // Now that we've remapped all the IDs in our states, all that's left
+        // is remapping the special state IDs.
+        let old = nnfa.special();
+        let new = &mut dfa.special;
+        new.max_special_id = old2new(old.max_special_id);
+        new.max_match_id = old2new(old.max_match_id);
+        if anchored.is_anchored() {
+            new.start_unanchored_id = DFA::DEAD;
+            new.start_anchored_id = old2new(old.start_anchored_id);
+        } else {
+            new.start_unanchored_id = old2new(old.start_unanchored_id);
+            new.start_anchored_id = DFA::DEAD;
+        }
+    }
+
+    /// Finishes building a DFA that supports BOTH unanchored and anchored
+    /// searches. It works by inter-leaving unanchored states with anchored
+    /// states in the same transition table. This way, we avoid needing to
+    /// re-shuffle states afterward to ensure that our states still look like
+    /// DEAD, MATCH, ..., START-UNANCHORED, START-ANCHORED, NON-MATCH, ...
+    ///
+    /// Honestly this is pretty inscrutable... Simplifications are most
+    /// welcome.
+    fn finish_build_both_starts(
+        &self,
+        nnfa: &noncontiguous::NFA,
+        dfa: &mut DFA,
+    ) {
+        let stride2 = dfa.stride2;
+        let stride = 1 << stride2;
+        let mut remap_unanchored = vec![DFA::DEAD; nnfa.states().len()];
+        let mut remap_anchored = vec![DFA::DEAD; nnfa.states().len()];
+        let mut is_anchored = vec![false; dfa.state_len];
+        let mut newsid = DFA::DEAD;
+        let next_dfa_id =
+            |sid: StateID| StateID::new_unchecked(sid.as_usize() + stride);
+        for (oldsid, state) in nnfa.states().iter().with_state_ids() {
+            if oldsid == noncontiguous::NFA::DEAD
+                || oldsid == noncontiguous::NFA::FAIL
+            {
+                remap_unanchored[oldsid] = newsid;
+                remap_anchored[oldsid] = newsid;
+                newsid = next_dfa_id(newsid);
+            } else if oldsid == nnfa.special().start_unanchored_id
+                || oldsid == nnfa.special().start_anchored_id
+            {
+                if oldsid == nnfa.special().start_unanchored_id {
+                    remap_unanchored[oldsid] = newsid;
+                    remap_anchored[oldsid] = DFA::DEAD;
+                } else {
+                    remap_unanchored[oldsid] = DFA::DEAD;
+                    remap_anchored[oldsid] = newsid;
+                    is_anchored[newsid.as_usize() >> stride2] = true;
+                }
+                if state.is_match() {
+                    dfa.set_matches(newsid, nnfa.iter_matches(oldsid));
+                }
+                sparse_iter(
+                    nnfa,
+                    oldsid,
+                    &dfa.byte_classes,
+                    |_, class, oldnextsid| {
+                        let class = usize::from(class);
+                        if oldnextsid == noncontiguous::NFA::FAIL {
+                            dfa.trans[newsid.as_usize() + class] = DFA::DEAD;
+                        } else {
+                            dfa.trans[newsid.as_usize() + class] = oldnextsid;
+                        }
+                    },
+                );
+                newsid = next_dfa_id(newsid);
+            } else {
+                let unewsid = newsid;
+                newsid = next_dfa_id(newsid);
+                let anewsid = newsid;
+                newsid = next_dfa_id(newsid);
+
+                remap_unanchored[oldsid] = unewsid;
+                remap_anchored[oldsid] = anewsid;
+                is_anchored[anewsid.as_usize() >> stride2] = true;
+                if state.is_match() {
+                    dfa.set_matches(unewsid, nnfa.iter_matches(oldsid));
+                    dfa.set_matches(anewsid, nnfa.iter_matches(oldsid));
+                }
+                sparse_iter(
+                    nnfa,
+                    oldsid,
+                    &dfa.byte_classes,
+                    |byte, class, oldnextsid| {
+                        let class = usize::from(class);
+                        if oldnextsid == noncontiguous::NFA::FAIL {
+                            dfa.trans[unewsid.as_usize() + class] = nnfa
+                                .next_state(Anchored::No, state.fail(), byte);
+                        } else {
+                            dfa.trans[unewsid.as_usize() + class] = oldnextsid;
+                            dfa.trans[anewsid.as_usize() + class] = oldnextsid;
+                        }
+                    },
+                );
+            }
+        }
+        for i in 0..dfa.state_len {
+            let sid = i << stride2;
+            if is_anchored[i] {
+                for next in dfa.trans[sid..][..stride].iter_mut() {
+                    *next = remap_anchored[*next];
+                }
+            } else {
+                for next in dfa.trans[sid..][..stride].iter_mut() {
+                    *next = remap_unanchored[*next];
+                }
+            }
+        }
+        // Now that we've remapped all the IDs in our states, all that's left
+        // is remapping the special state IDs.
+        let old = nnfa.special();
+        let new = &mut dfa.special;
+        new.max_special_id = remap_anchored[old.max_special_id];
+        new.max_match_id = remap_anchored[old.max_match_id];
+        new.start_unanchored_id = remap_unanchored[old.start_unanchored_id];
+        new.start_anchored_id = remap_anchored[old.start_anchored_id];
+    }
+
+    /// Set the desired match semantics.
+    ///
+    /// This only applies when using [`Builder::build`] and not
+    /// [`Builder::build_from_noncontiguous`].
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::match_kind`](crate::AhoCorasickBuilder::match_kind)
+    /// for more documentation and examples.
+    pub fn match_kind(&mut self, kind: MatchKind) -> &mut Builder {
+        self.noncontiguous.match_kind(kind);
+        self
+    }
+
+    /// Enable ASCII-aware case insensitive matching.
+    ///
+    /// This only applies when using [`Builder::build`] and not
+    /// [`Builder::build_from_noncontiguous`].
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::ascii_case_insensitive`](crate::AhoCorasickBuilder::ascii_case_insensitive)
+    /// for more documentation and examples.
+    pub fn ascii_case_insensitive(&mut self, yes: bool) -> &mut Builder {
+        self.noncontiguous.ascii_case_insensitive(yes);
+        self
+    }
+
+    /// Enable heuristic prefilter optimizations.
+    ///
+    /// This only applies when using [`Builder::build`] and not
+    /// [`Builder::build_from_noncontiguous`].
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::prefilter`](crate::AhoCorasickBuilder::prefilter)
+    /// for more documentation and examples.
+    pub fn prefilter(&mut self, yes: bool) -> &mut Builder {
+        self.noncontiguous.prefilter(yes);
+        self
+    }
+
+    /// Sets the starting state configuration for the automaton.
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::start_kind`](crate::AhoCorasickBuilder::start_kind)
+    /// for more documentation and examples.
+    pub fn start_kind(&mut self, kind: StartKind) -> &mut Builder {
+        self.start_kind = kind;
+        self
+    }
+
+    /// A debug setting for whether to attempt to shrink the size of the
+    /// automaton's alphabet or not.
+    ///
+    /// This should never be enabled unless you're debugging an automaton.
+    /// Namely, disabling byte classes makes transitions easier to reason
+    /// about, since they use the actual bytes instead of equivalence classes.
+    /// Disabling this confers no performance benefit at search time.
+    ///
+    /// See
+    /// [`AhoCorasickBuilder::byte_classes`](crate::AhoCorasickBuilder::byte_classes)
+    /// for more documentation and examples.
+    pub fn byte_classes(&mut self, yes: bool) -> &mut Builder {
+        self.byte_classes = yes;
+        self
+    }
+}
+
+/// Iterate over all possible equivalence class transitions in this state.
+/// The closure is called for all transitions with a distinct equivalence
+/// class, even those not explicitly represented in this sparse state. For
+/// any implicitly defined transitions, the given closure is called with
+/// the fail state ID.
+///
+/// The closure is guaranteed to be called precisely
+/// `byte_classes.alphabet_len()` times, once for every possible class in
+/// ascending order.
+fn sparse_iter<F: FnMut(u8, u8, StateID)>(
+    nnfa: &noncontiguous::NFA,
+    oldsid: StateID,
+    classes: &ByteClasses,
+    mut f: F,
+) {
+    let mut prev_class = None;
+    let mut byte = 0usize;
+    for t in nnfa.iter_trans(oldsid) {
+        while byte < usize::from(t.byte()) {
+            let rep = byte.as_u8();
+            let class = classes.get(rep);
+            byte += 1;
+            if prev_class != Some(class) {
+                f(rep, class, noncontiguous::NFA::FAIL);
+                prev_class = Some(class);
+            }
+        }
+        let rep = t.byte();
+        let class = classes.get(rep);
+        byte += 1;
+        if prev_class != Some(class) {
+            f(rep, class, t.next());
+            prev_class = Some(class);
+        }
+    }
+    for b in byte..=255 {
+        let rep = b.as_u8();
+        let class = classes.get(rep);
+        if prev_class != Some(class) {
+            f(rep, class, noncontiguous::NFA::FAIL);
+            prev_class = Some(class);
+        }
+    }
+}